--------------------------
# Exercise 1

In [19]:
import numpy as np
from scipy.optimize import minimize
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from scipy.optimize import NonlinearConstraint
from scipy.optimize import LinearConstraint
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from cvxopt.modeling import op, variable, dot
from cvxopt import matrix 
from cvxopt import solvers
import time
from sklearn.metrics.pairwise import rbf_kernel, polynomial_kernel
from copy import copy
import itertools
from tqdm import tqdm_notebook

In [20]:
def load_mnist(path, kind='train'):
    import os
    import gzip
    import numpy as np

    labels_path = os.path.join(path,
                               '%s-labels-idx1-ubyte.gz'
                               % kind)
    images_path = os.path.join(path,
                               '%s-images-idx3-ubyte.gz'
                               % kind)

    with gzip.open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
                               offset=8)

    with gzip.open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,
                               offset=16).reshape(len(labels), 784)

    return images, labels



X_all_labels, y_all_labels = load_mnist('C:/Users/RE-Giorgio/Documents/OptimusPrime/Data', kind='train')


indexLabel2 = np.where((y_all_labels==2))
xLabel2 =  X_all_labels[indexLabel2][:1000,:].astype('float64') 
yLabel2 = y_all_labels[indexLabel2][:1000].astype('float64') 

indexLabel4 = np.where((y_all_labels==4))
xLabel4 =  X_all_labels[indexLabel4][:1000,:].astype('float64') 
yLabel4 = y_all_labels[indexLabel4][:1000].astype('float64') 

indexLabel6 = np.where((y_all_labels==6))
xLabel6 =  X_all_labels[indexLabel6][:100,:].astype('float64') 
yLabel6 = y_all_labels[indexLabel6][:100].astype('float64') 

yLabel2[:] = +1
yLabel4[:] = -1

X = np.concatenate([xLabel2, xLabel4])
y = np.concatenate([yLabel2, yLabel4])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1696995) 

scaler = MinMaxScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.fit_transform(X_test)

In [28]:
class Svm:
    
    def __init__(self, X, y, gamma, C, kernel):
        
        self.X = X
        self.y = y
        self.alpha = np.random.normal(0,1,(1,X.shape[0]))
        self.b = np.random.randint(1)
        self.C = C
        self.gamma = gamma
        self.kernel = kernel
        
    def predict(self,X):
        
        if self.kernel == "gauss":
            z = (self.alpha*self.y) @ self.kernel_gauss(self.X, X) + self.b
        if self.kernel == "poly":
            z = (self.alpha*self.y) @ self.kernel_poly(self.X, X) + self.b
        a = np.sign(z)    
        return a
        
    def kernel_gauss(self, X1, X2):
        return rbf_kernel(X1,X2, gamma = self.gamma)
    
    def kernel_poly(self, X1, X2):
        return polynomial_kernel(X1,X2, gamma = self.gamma)
        
    def optimize(self):
        
        start = time.time()
        m = self.X.shape[0]
        y = self.y.reshape(-1,1)
        
        # computing alpha
        if self.kernel == "gauss":
            K = self.kernel_gauss(self.X, self.X)
        if self.kernel == "poly":
            K = self.kernel_poly(self.X, self.X)
        H = np.outer(y,y) * K
        P = matrix(H)
        q = matrix(-np.ones((m)))
        G = matrix(np.vstack((-np.eye(m),np.eye(m))))
        h = matrix(np.hstack((np.zeros(m), np.ones(m) * self.C)))
        A = matrix(y.reshape(1, -1))
        b = matrix(np.zeros(1))
        solvers.options['show_progress'] = False
        res = solvers.qp(P, q, G, h, A, b)
        
        alpha = np.array(res['x'])
        self.alpha = alpha.T
        
        end = time.time() - start
        print(end)
        
        # computing b
        
        alpha = alpha.ravel()
        idx = np.where(alpha > 1e-5)[0]
        wy = ((y * alpha.reshape(-1,1)).T @ K[:,idx]).T
        b = y[idx] - wy
        self.b = np.mean(b)
        
svm = Svm(X_train, y_train, gamma = 0.05, C = 2, kernel = "gauss")
svm.optimize()
y_pred = svm.predict(X_test)
print(accuracy_score(y_test.reshape(-1,1), y_pred.reshape(-1,1)))
# 9 iterations

2.0200002193450928
0.8875


In [27]:
from sklearn.model_selection import KFold
param_grid = {"gamma" : [0.1, 0.01,0.05,0.001], "C" : [1,1.5,2],"kernel":["poly", "gauss"]}

results = []
combinations = list(itertools.product(*param_grid.values()))
for comb in tqdm_notebook(combinations):
    
    accs = []
    print("current combination :", comb)
    print("\n")
    for i in range(5):    
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) 

        scaler = MinMaxScaler()
        X_train=scaler.fit_transform(X_train)
        X_test=scaler.fit_transform(X_test)
        svm = Svm(X_train, y_train, gamma = comb[0], C = comb[1],kernel = comb[2])
        svm.optimize()
        y_pred = svm.predict(X_test)
        accs.append(accuracy_score(y_test.reshape(-1,1), y_pred.reshape(-1,1)))
    results.append(np.mean(accs))

HBox(children=(IntProgress(value=0, max=24), HTML(value='')))

current combination : (0.1, 1, 'poly')


2.6559948921203613
3.0080368518829346
3.30800461769104
2.5720019340515137
2.567965269088745
current combination : (0.1, 1, 'gauss')


1.915999412536621
2.1280019283294678
2.1120007038116455
2.3279991149902344


KeyboardInterrupt: 

In [131]:
from sklearn.svm import SVC
clf = SVC(gamma = 0.01)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(accuracy_score(y_test.reshape(-1,1), y_pred.reshape(-1,1)))

0.85


In [132]:
clf.intercept_

array([0.22405762])

----------------
# Exercise 2

In [6]:
class Svm_dcmp:
    
    def __init__(self, X, y, gamma, C, kernel, q):
        
        self.X = X
        self.y = y
        self.alpha = np.zeros((X.shape[0]))
        self.b = 0
        self.C = C
        self.gamma = gamma
        self.kernel = kernel
        self.q = q
        self.grad = - np.ones(X.shape[0])
        
    def predict(self,X):
        
        if self.kernel == "gauss":
            z = (self.alpha*self.y) @ self.kernel_gauss(self.X, X) + self.b
        if self.kernel == "poly":
            z = (self.alpha*self.y) @ self.kernel_poly(self.X, X) + self.b
        a = np.sign(z)    
        return a
    
    def kernel_gauss(self, X1, X2):
        return rbf_kernel(X1,X2, gamma = self.gamma)
    
    def kernel_poly(self, X1, X2):
        return polynomial_kernel(X1,X2, gamma = self.gamma)

    def get_working_set(self, alpha):
        
        # box constraints
        y = self.y.ravel(); C = self.C; q = self.q
        R = np.where((alpha < 1e-5) & (y == +1) | (alpha > C-1e-5) & (y == -1) | (alpha > 1e-5) & (alpha < C-1e-5))[0]
        S = np.where((alpha < 1e-5) & (y == -1) | (alpha > C-1e-5) & (y == +1) | (alpha > 1e-5) & (alpha < C-1e-5))[0]
        
        # negative gradient divided by y
        if self.kernel == "gauss":
            K = self.kernel_gauss(self.X, self.X)
        if self.kernel == "poly":
            K = self.kernel_poly(self.X, self.X)
        Q = np.outer(y, y) * K
        grady = - self.grad/y
        
        # working set 
        I = list(np.argpartition(grady[R], q//2)[:q//2])
        J = list(np.argpartition(grady[S], -q//2)[:q//2])
        W = I + J
        W_ = list(set(np.arange(self.X.shape[0])) - set(W))
        
        # optimality condition
        m = max(grady[R])
        M = min(grady[S])
        
        cond = ""
        print(abs(m-M))
        if abs(m - M) < 1e-1:
            cond = "stop"
            
        return W, W_, Q, alpha, cond 
    
    def optimize(self):
        
        start = time.time()
        y = self.y.reshape(-1,1)
        
        old_alpha = np.zeros(self.X.shape[0])
        for i in range(100):
            
            
            W, W_, Q, alpha, cond = self.get_working_set(self.alpha)
            
            if cond == "stop":
                print("AAAAAAAAA")
                break
                
            # computing alpha
            H = Q
            P = cvxopt_matrix(Q[np.ix_(W,W)])
            q = cvxopt_matrix(alpha[W_] @ Q[np.ix_(W, W_)].T -np.ones(len(W)))
            G = cvxopt_matrix(np.vstack((-np.eye(len(W)),np.eye(len(W)))))
            h = cvxopt_matrix(np.hstack((np.zeros(len(W)), np.ones(len(W)) * self.C)))
            A = cvxopt_matrix(y[W].reshape(1, -1))
            b = cvxopt_matrix(y[W_].T @ old_alpha[W_])
            
            solvers.options['show_progress'] = False 
            try:
                res = cvxopt_solvers.qp(P, q, G, h, A, b)
                alpha = np.array(res['x']).T
            except:
                print(i)
                break
            
            #print(np.unique(self.alpha - old_alpha)
            self.alpha[W] = alpha
            
            self.grad += H @ (self.alpha - old_alpha)
            old_alpha = copy(self.alpha)
            
        end = time.time() - start
        print(end)
        
        # computing b
        K = self.kernel_gauss(self.X, self.X); y = self.y.reshape(-1,1) 
        
        alpha = alpha.ravel()
        idx = np.where((alpha > 1e-5) & (alpha < self.C + 1e-5))[0]
        wx = (y * self.alpha.reshape(-1,1)).T @ K[:,idx]
        b = y[idx] - wx.T
        self.b = np.mean(b)
    
svm = Svm_dcmp(X_train, y_train, gamma = 0.01, C = 1, kernel = "gauss",q = 800)
svm.optimize()
y_pred = svm.predict(X_test)
print(accuracy_score(y_test.reshape(-1,1), y_pred.reshape(-1,1)))

2.0


NameError: name 'cvxopt_matrix' is not defined

In [270]:
from sklearn.model_selection import KFold
param_grid = {"gamma" : [0.1, 0.01,0.05,0.001], "C" : [1,1.5,2],"kernel":["poly", "gauss"]}

results = []
combinations = list(itertools.product(*param_grid.values()))
for comb in tqdm_notebook(combinations):
    
    
    print("current combination :", comb)
    print("\n")
    for i in range(5):    
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) 

        scaler = MinMaxScaler()
        X_train=scaler.fit_transform(X_train)
        X_test=scaler.fit_transform(X_test)
        svm = Svm_dcmp(X_train, y_train, gamma = comb[0], C = comb[1],kernel = comb[2], q = comb[3])
        svm.optimize()
        y_pred = svm.predict(X_test)
        accs.append(accuracy_score(y[test_index].reshape(-1,1), y_pred.reshape(-1,1)))
    results.append(np.mean(accs))


HBox(children=(IntProgress(value=0, max=84), HTML(value='')))

current combination : (0.1, 1, 'poly', 10)


77
10.692463874816895


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


[nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan na

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [226]:
combinations[results.index([max(results)])]

(0.01, 1, 800)

In [233]:
results

[0.58,
 0.61,
 0.715,
 0.7525,
 0.6325,
 0.635,
 0.795,
 0.5475,
 0.6375,
 0.75,
 0.765,
 0.64,
 0.6325,
 0.785,
 0.51,
 0.665,
 0.735,
 0.665,
 0.6525,
 0.6325,
 0.6825,
 0.7125,
 0.7475,
 0.6225,
 0.55,
 0.5475,
 0.665,
 0.84,
 0.685,
 0.59,
 0.6475,
 0.63,
 0.5375,
 0.7575,
 0.795,
 0.655,
 0.5525,
 0.56,
 0.6275,
 0.535,
 0.6725,
 0.7775]