In [None]:
import numpy as np
import sys


class SSVM_Conv:
    def __init__(self, A_pos, A_neg, C, w0, b0, delta, beta, con_plot=True, tol=1e-5, convergSpeed=1e-8, maxIter=100, alpha=1000):

        self.tol = tol
        self.convergSpeed = convergSpeed
        self.C = C
        self.maxIter = maxIter
        self.alpha = alpha # p parameter

        # loss function parameters
        self.delta = delta # -0.8
        self.beta = beta # 0.4

        # convergence plot
        self.con_plot = con_plot

        try:
            self.A = np.vstack([np.hstack([A_pos, -np.ones([A_pos.shape[0], 1])]),
                                np.hstack([-A_neg, np.ones([A_neg.shape[0], 1])]),])
            self.w = np.vstack((w0, b0))
        except:
            print("\n===Error in SSVM-init : the dimension of w, b, A_pos, A_neg not agree===")
            sys.exit(1)


    def objf(self, w):
        '''Evaluate the function value:
            w = vector in SVM
            Output:
            return = value
        '''

        try:
            margin_adj_loss = self.beta * np.exp(-self.delta * np.abs(self.A.dot(w))) 
            x =  np.ones((self.A.shape[0], 1)) - self.A.dot(w) + margin_adj_loss
        except:
            print("\n===Error in SSVM-objf : loss function error===")
            sys.exit(1)
        try:
            # temp = (np.ones((self.A.shape[0], 1)) - self.A.dot(w))
            # v = np.maximum(temp, 0)
            # x = (np.ones((self.A.shape[0], 1)) - self.A.dot(w))
            # print(np.log(1 + np.exp(-self.alpha * x)))
            # print(x + 1/self.alpha )
            v = x + 1/self.alpha * np.log(1 + np.exp(-self.alpha * x))

            # hinge loss + regularization
            return 0.5 * (v.T.dot(v) + w.T.dot(w) / self.C)
        except TypeError:
            print("\n===Error in SSVM-objf : type of parameter are not the same===")
            sys.exit(1)


    def zoomInt(self, phi, dphi, alpha_l, alpha_h, c1, c2, trail_step = 'bisection'):
        ''' The purpose of this algorithm is to find a suitable step length (alpha) 
        that satisfies the Wolfe conditions during a line search in an optimization problem.
        Successively decrease the inverval size of the alphas until acceptable step length found'''

        tol = np.finfo(float).eps         
        # Structure containing information about the iteration
        info = {'alpha_ls': [], 'alpha_hs': [], 'alpha_js': [], 'phi_js': [], 'dphi_js': []}

        n = 1
        stop = False
        max_iter = 100
        alpha = 0

        while n < max_iter and not stop:
            # Find trial step length alpha_j in [alpha_l, alpha_h]
            if trail_step.lower() == 'bisection':
                alpha_j = 0.5 * (alpha_h + alpha_l)
            elif trail_step.lower() == 'interp2':
                # You need to implement the 'interp2' method
                raise NotImplementedError("Interp2 method not implemented")

            phi_j = phi(alpha_j)

            # Update info
            info['alpha_ls'].append(alpha_l)
            info['alpha_hs'].append(alpha_h)
            info['alpha_js'].append(alpha_j)
            info['phi_js'].append(phi_j)

            if abs(alpha_h - alpha_l) < tol:
                alpha = alpha_j
                stop = True
                print("Line search stopped because the interval became too small. Returning center of the interval.")
                print(f'Centre: {alpha_h - alpha_l}')
                break

            if phi_j > phi(0) + c1 * alpha_j * dphi(0) or phi(alpha_j) >= phi(alpha_l):
                # alpha_j does not satisfy sufficient decrease condition look for alpha < alpha_j or phi(alpha_j) >= phi(alpha_l)
                alpha_h = alpha_j
                info['dphi_js'].append(np.nan)

            else:
                # alpha_j satisfies sufficient decrease condition
                dphi_j = dphi(alpha_j)
                info['dphi_js'].append(dphi_j)

                if abs(dphi_j) <= -c2 * dphi(0):
                    # alpha_j satisfies strong curvature condition
                    alpha = alpha_j
                    stop = True

                elif dphi_j * (alpha_h - alpha_l) >= 0:
                    # alpha_h : dphi(alpha_l)*(alpha_h - alpha_l) < 0
                    # alpha_j violates this condition but swapping alpha_l <-> alpha_h will reestablish it
                    alpha_h = alpha_l
                
                alpha_l = alpha_j

            n += 1

        return alpha, info
            
    def backtracking(self, deriv, x_k, p, alpha0, opts={'c1': 1e-4, 'rho': None}):

        if opts['c1'] == None:
            c1 = 1e-4
        else:
            c1 = opts['c1']
        # rho = 0.1 for steepest descent, conjugate gradients
        # rho = 0.9 for Newton, Quasi-Newton
        if opts['rho'] == None:
            rho = 0.2
        else:
            rho = opts['rho']
        if alpha0 == None:
            alpha0 = 1


        # Initialize info structure
        info = {'alphas': [alpha0], 'rho': [rho], 'c1': [c1]}

        # Initial step length
        alpha = alpha0

        # Compute f, grad f at x_k
        f_k = self.objf(x_k)
        df_k = deriv(self, w_k=x_k)

        # Backtracking line search
        count = 0
        print(self.objf(x_k + alpha * p), f_k + c1 * alpha * (df_k.T @ p))
        while self.objf(x_k + alpha * p) >= f_k + c1 * alpha * (df_k.T @ p):
            alpha = rho * alpha
            info['alphas'].append(alpha)
            count += 1
            if count > 200:
                break
        
        return alpha, info


    def armijo(self, w, p_k, gap, obj1):
        '''
        Avoid the local maximum(minimum) in Newton method:
        
        w = current point
        p_k = newton direction
        gap = defined in ssvm code
        obj1 = the object function value of current point
        
        Output:
        stepsize = stepsize for Newton method
        '''
        diff = 0
        stepsize = 0.5  # initial size
        count = 1
        try:
            # Armijo: new function <= old function value - fraction * step length * directional derivative
            # Wolfe sufficient decrease condition similar/same to this

            # gap = p_k.T.dot(gradz). stopping criterion - If gap approaches zero, 
            # Newton direction aligns with the steepest descent direction, so small or negative gap means current solution 
            # close to the optimum, or the step might be too large if gap is negative.
            # 0.05 = c1 in functions
            while diff < -0.05 * stepsize * gap:
                stepsize = 0.5 * stepsize # lambda = max{1,0.5,0.25...} as in paper
                w2 = w + stepsize * p_k # (w^i+1, gamma^i+1) = (w_i, gamma_i) + step * direction_i
                obj2 = self.objf(w2)
                diff = obj1 - obj2 # f(w_i, gamma_i) - f(w^i+1, gamma^i+1) >= -delta * step * grad_f(w_i, gamma_i) * direction_i
                
                count = count + 1
                if count > 20:
                    break

        except TypeError:
            print("\n===Error in SSVM-armijo : type of variables are not the same===")
            sys.exit(1)
        except ValueError:
            print("\n===Error in SSVM-armijo : value of variables are not correct===")

        return stepsize
    
    def deriv(self, w_k=np.array([])):
        return w_k / self.C - self.A[self.Point, :].T.dot(self.d[self.Point]) 

    def train(self):

        e = np.ones((self.A.shape[0], 1))
        stopCond = 1
        iter = 0
        info = {'ws': [self.w], 'obj_fun': [self.objf(self.w)[0]], 'gradient': []}
        while stopCond == False and iter < self.maxIter:
            iter = iter + 1
            margin_adj_loss = self.beta * np.exp(-self.delta * np.abs(self.A.dot(self.w))) 
            d = e - np.dot(self.A, self.w) + margin_adj_loss
            Point = d[:, 0] > 0
            self.Point = Point
            self.d = d

            if Point.all == False:
                return
        
            # Regularisation gradient - loss function gradient
            gradient = self.w / self.C - self.A[Point, :].T.dot(d[Point]) 
            hessian = np.eye(self.A.shape[1]) / self.C + self.A[Point, :].T.dot(self.A[Point, :]) 

            del d
            del Point

            if (gradient.T.dot(gradient) / self.A.shape[1]) > self.tol:
                try:
                    d2f_x_k_inv = np.linalg.inv(hessian) 
                    p_k = -np.dot(d2f_x_k_inv, gradient)
                except:
                    print("\n===Error in SSVM-train : inverse of hessian error===")
                    p_k = np.zeros(self.w.shape)
                
                del hessian

                obj1 = self.objf(self.w)
                w1 = self.w + p_k
                obj2 = self.objf(w1)
                
                if (obj1 - obj2) <= self.convergSpeed: 
                    # Strong Wolfe Step
                    # Find the step size & Update to the new point
                    stepsize, step_info = SSVM_Conv.backtracking(self, deriv=SSVM_Conv.deriv, x_k=w1, p=p_k, alpha0=0, opts={'c1': 1e-4, 'rho': 0.9}) 
                    # stepsize = self.armijo(self.w, p_k, gap, obj1)
                    # print(step_info)
                    # del step_info
                    self.w = self.w + stepsize * p_k
                else:
                    # Use the Newton method
                    self.w = w1

                try:
                    # stopCond = np.linalg.norm(p_k) #2-norm
                    stopCond = (np.linalg.norm(gradient) < self.tol * (1 + abs(obj1)))
                    print(stopCond)
                except:
                    print("\n===Error in SSVM-train : 2norm of z error===")
                    sys.exit(1)

                info['ws'].append(self.w)
                info['obj_fun'].append(obj2[0])
                info['gradient'].append(gradient)
                
                if stopCond == True or iter == self.maxIter:
                    margin_adj_loss = self.beta * np.exp(-self.delta * np.abs(self.A.dot(self.w))) 
                    d = e - np.dot(self.A, self.w) + margin_adj_loss
                    Point = d[:, 0] > 0
                    info['final_obj_fun'] = obj2[0][0]
                    info['final_hessian'] = np.eye(self.A.shape[1]) / self.C + self.A[Point, :].T.dot(self.A[Point, :]) 
                else:
                    print('etnerede')
                    info['final_obj_fun'] = 0
                    info['final_hessian'] = 0


            else:
                break

        if self.con_plot:
            arr = np.zeros(shape=(len(info['ws']), self.w.shape[0]))
            for i in range(len(info['ws'])):
                arr[i,:] = info['ws'][i].T
            
            xMin = self.w
            rows = arr.shape[0]

            p = 2
            M = info['final_hessian']  # M is the Hessian at the solution, M has to be symmetric + s.p.d - (17,17)
            
            # Convergence of iterates: || x_k - xMin ||_M
            err = arr - np.tile(xMin.T, (rows, 1))
            con_coeffs = [np.sqrt(np.dot(err[k].T, M.dot(err[k]))) for k in range(rows)] #(289, 289)
            
            # Convergence of function values: f(x_k) - f(xMin)
            con_f = [info['obj_fun'][k][0] - info['final_obj_fun'] for k in range(rows)] #(1, 1)

            # # Convergence of gradient: || f(x_k)||_p
            con_df = [np.sum(np.abs(info['gradient'][k])**p)**(1/p) for k in range(rows-1)]

            conInfo = {'ws': con_coeffs, 'f': con_f, 'df': con_df} # convergence information


        return {"w": self.w[:self.w.shape[0] - 1], "b": self.w[self.w.shape[0] - 1]}, conInfo