In [1]:
import numpy as np
import sys
import scipy
import matplotlib.pyplot as plt


class SSVM:
    def __init__(self, A_pos, A_neg, C, w0, b0, delta, beta, tol=1e-5, convergSpeed=1e-8, maxIter=100, alpha=1000):

        self.tol = tol
        self.convergSpeed = convergSpeed
        self.C = C
        self.maxIter = maxIter
        self.alpha = alpha # p parameter

        # loss function parameters
        self.delta = delta # -1 -> 1
        self.beta = beta # -0.5 -> 0.5 [apart from -0.5beta -0.5 delta]

        try:
            self.A = np.vstack([np.hstack([A_pos, -np.ones([A_pos.shape[0], 1])]),
                                np.hstack([-A_neg, np.ones([A_neg.shape[0], 1])]),])
            self.w = np.vstack((w0, b0))
        except:
            print("\n===Error in SSVM-init : the dimension of w, b, A_pos, A_neg not agree===")
            sys.exit(1)


    def objf(self, w):
        '''Evaluate the function value:
            w = vector in SVM
            Output:
            return = value
        '''

        try:
            margin_adj_loss = self.beta * np.exp(-self.delta * np.abs(self.A.dot(w))) 
            x =  np.ones((self.A.shape[0], 1)) - self.A.dot(w) + margin_adj_loss
        except:
            print("\n===Error in SSVM-objf : loss function error===")
            sys.exit(1)
        try:
            # temp = (np.ones((self.A.shape[0], 1)) - self.A.dot(w))
            # v = np.maximum(temp, 0)
            # x = (np.ones((self.A.shape[0], 1)) - self.A.dot(w))
            v = x + 1/self.alpha * np.log(1 + np.exp(-self.alpha * x))
            # hinge loss + regularization
            return 0.5 * (v.T.dot(v) + w.T.dot(w) / self.C)
        except TypeError:
            print("\n===Error in SSVM-objf : type of parameter are not the same===")
            sys.exit(1)


    def armijo(self, w, p_k, gap, obj1):
        '''
        Avoid the local maximum(minimum) in Newton method:
        
        w = current point
        p_k = newton direction
        gap = defined in ssvm code
        obj1 = the object function value of current point
        
        Output:
        stepsize = stepsize for Newton method
        '''
        diff = 0
        stepsize = 0.5  # initial size
        count = 1
        try:
            # Armijo: new function <= old function value - fraction * step length * directional derivative
            # Wolfe sufficient decrease condition similar/same to this

            # gap = p_k.T.dot(gradz). stopping criterion - If gap approaches zero, 
            # Newton direction aligns with the steepest descent direction, so small or negative gap means current solution 
            # close to the optimum, or the step might be too large if gap is negative.
            # 0.05 = c1 in functions
            while diff < -0.05 * stepsize * gap:
                stepsize = 0.5 * stepsize # lambda = max{1,0.5,0.25...} as in paper
                w2 = w + stepsize * p_k # (w^i+1, gamma^i+1) = (w_i, gamma_i) + step * direction_i
                obj2 = self.objf(w2)
                diff = obj1 - obj2 # f(w_i, gamma_i) - f(w^i+1, gamma^i+1) >= -delta * step * grad_f(w_i, gamma_i) * direction_i
                
                count = count + 1
                if count > 20:
                    break

        except TypeError:
            print("\n===Error in SSVM-armijo : type of variables are not the same===")
            sys.exit(1)
        except ValueError:
            print("\n===Error in SSVM-armijo : value of variables are not correct===")

        return stepsize

    
    def train(self):

        e = np.ones((self.A.shape[0], 1))
        stopCond = 1
        iter = 0
        # info = {'ws': [self.w]}

        while stopCond > self.tol and iter < self.maxIter:
            iter = iter + 1
            margin_adj_loss = self.beta * np.exp(-self.delta * np.abs(self.A.dot(self.w))) 
            d = e - np.dot(self.A, self.w) + margin_adj_loss
            Point = d[:, 0] > 0

            if Point.all == False:
                return
        
            # Regularisation gradient - loss function gradient
            gradz = self.w / self.C - self.A[Point, :].T.dot(d[Point]) 
            hessian = np.eye(self.A.shape[1]) / self.C + self.A[Point, :].T.dot(self.A[Point, :]) 

            del d
            del Point

            if (gradz.T.dot(gradz) / self.A.shape[1]) > self.tol:
                try:
                    # p_k = np.linalg.solve(-hessian, gradz) # - slow and unstable for large matrices

                    # P, L, U = scipy.linalg.lu(hessian)
                    # d2f_x_k_inv = scipy.linalg.inv(U) @ scipy.linalg.inv(L) @ P.T - doesnt work either

                    # L = scipy.linalg.cholesky(hessian, lower=True)
                    # d2f_x_k_inv = scipy.linalg.cho_solve((L, True), np.eye(hessian.shape[0])) - doesnt work


                    # p_k = -np.dot(d2f_x_k_inv, gradz)

                    # est_direction, info = scipy.sparse.linalg.cg(hessian, gradz, maxiter=10)
                    # p_k = np.mean(est_direction)
                    # print(p_k.shape)

                    d2f_x_k_inv = np.linalg.inv(hessian) 
                    p_k = -np.dot(d2f_x_k_inv, gradz)
                except:
                    print("\n===Error in SSVM-train : inverse of hessian error===")
                    p_k = np.zeros(self.w.shape)
                del hessian

                obj1 = self.objf(self.w)
                w1 = self.w + p_k
                obj2 = self.objf(w1)

                # CHANGE - WILL TAKE TOO LONG TO CONVERGE
                if (obj1 - obj2) <= self.convergSpeed: 
                    # Use the Armijo's rule
                    try:
                        # gap = p_k.T.dot(gradz). stopping criterion - If gap approaches zero, 
                        # Newton direction aligns with the steepest descent direction,so small or 
                        # negative gap means current solution close to the optimum, or the step might be too large if gap is negative.
                        gap = p_k.T.dot(gradz)
                    except:
                        print(
                            "\n===Error in SSVM-train : the dimesion of z and gradz not agree==="
                        )
                        sys.exit(1)
                    # Find the step size & Update to the new point
                    stepsize = self.armijo(self.w, p_k, gap, obj1)
                    self.w = self.w + stepsize * p_k
                else:
                    # Use the Newton method
                    self.w = w1

                try:
                    stopCond = np.linalg.norm(p_k) #2-norm
                except:
                    print("\n===Error in SSVM-train : 2norm of z error===")
                    sys.exit(1)
            else:
                break


        return {"w": self.w[:self.w.shape[0] - 1], "b": self.w[self.w.shape[0] - 1]} #, info


    def convergence_history(self, info, xMin, x0, p, H=None):

        # convert info['xs'] to numpy array
        arr = np.zeros(shape=(2,len(info['xs'])))
        for i in range(len(info['xs'])):
            x = info['xs'][i][0]
            y = info['xs'][i][1]
            arr[0][i] = x
            arr[1][i] = y
        
        if xMin is None:
            xMin = (arr[0][-1], arr[1][-1])
        
        shape = arr.shape[1]
        if p == 'M':
            p = 2
            if H is not None:
                M = H
            else:
                M = self.d2f(xMin)  # M is the Hessian at the solution, M has to be symmetric positive definite
            
            # Convergence of iterates: || x_k - xMin ||_M
            err = info['xs'] - np.tile(xMin, (2, shape))            
            # err = xs - np.tile(x_min[:, np.newaxis], (1, xs.shape[1]))
            con_x = [np.sqrt(np.dot(err[k].T, M.dot(err[k]))) for k in range(shape)]
        else:
            # Convergence of iterates: || x_k - xMin ||_p
            # print((arr - np.array([xMin[0], xMin[1]]*shape).reshape(2,shape)).shape)
            con_x = np.sum(np.abs(arr - np.array([xMin[0], xMin[1]]*shape).reshape(2,shape))**p, axis=0)**(1/p)

        if self.f is not None:
            # Convergence of function values: f(x_k) - f(xMin)
            con_f = [self.f([arr[0,k], arr[1,k]]) - self.f(xMin) for k in range(shape)]

            # Convergence of gradient: || f(x_k)||_p
            con_df = [np.sum(np.abs(self.df([arr[0,k], arr[1,k]]))**p)**(1/p) for k in range(shape)]
        else:
            con_f = []
            con_df = []

        conInfo = {'x': con_x, 'f': con_f, 'df': con_df} # convergence information
        
        return conInfo