--------------------------
# Exercise 1

In [10]:
import numpy as np
from scipy.optimize import minimize
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from scipy.optimize import NonlinearConstraint
from scipy.optimize import LinearConstraint
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from cvxopt.modeling import op, variable, dot
from cvxopt import matrix as cvxopt_matrix
from cvxopt import solvers as cvxopt_solvers
import time
from sklearn.metrics.pairwise import rbf_kernel, polynomial_kernel

In [159]:
def load_mnist(path, kind='train'):
    import os
    import gzip
    import numpy as np

    labels_path = os.path.join(path,
                               '%s-labels-idx1-ubyte.gz'
                               % kind)
    images_path = os.path.join(path,
                               '%s-images-idx3-ubyte.gz'
                               % kind)

    with gzip.open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
                               offset=8)

    with gzip.open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,
                               offset=16).reshape(len(labels), 784)

    return images, labels



X_all_labels, y_all_labels = load_mnist('C:/Users/RE-Giorgio/Documents/OptimusPrime/Data', kind='train')


indexLabel2 = np.where((y_all_labels==2))
xLabel2 =  X_all_labels[indexLabel2][:1000,:].astype('float64') 
yLabel2 = y_all_labels[indexLabel2][:1000].astype('float64') 

indexLabel4 = np.where((y_all_labels==4))
xLabel4 =  X_all_labels[indexLabel4][:1000,:].astype('float64') 
yLabel4 = y_all_labels[indexLabel4][:1000].astype('float64') 

indexLabel6 = np.where((y_all_labels==6))
xLabel6 =  X_all_labels[indexLabel6][:100,:].astype('float64') 
yLabel6 = y_all_labels[indexLabel6][:100].astype('float64') 

yLabel2[:] = +1
yLabel4[:] = -1

X = np.concatenate([xLabel2, xLabel4])
y = np.concatenate([yLabel2, yLabel4])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1696995) 

scaler = MinMaxScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.fit_transform(X_test)

In [160]:
class Svm:
    
    def __init__(self, X, y, gamma, C):
        
        self.X = X
        self.y = y
        self.alpha = np.random.normal(0,1,(1,X.shape[0]))
        self.b = np.random.randint(1)
        self.C = C
        self.gamma = gamma
        
    def predict(self,X):
        
        z = (self.alpha*self.y) @ self.kernel_poly(self.X, X) + self.b
        a = np.sign(z)    
        return a
        
    def kernel_gauss(self, X1, X2):
        return rbf_kernel(X1,X2, gamma = self.gamma)
    
    def kernel_poly(self, X1, X2):
        return polynomial_kernel(X1,X2, gamma = self.gamma)
        
    def optimize(self):
        
        start = time.time()
        m = self.X.shape[0]
        y = self.y.reshape(-1,1)
        
        # computing alpha
        K = self.kernel_poly(self.X, self.X)
        H = np.outer(y,y) * K
        P = cvxopt_matrix(H)
        q = cvxopt_matrix(-np.ones((m)))
        G = cvxopt_matrix(np.vstack((-np.eye(m),np.eye(m))))
        h = cvxopt_matrix(np.hstack((np.zeros(m), np.ones(m) * self.C)))
        A = cvxopt_matrix(y.reshape(1, -1))
        b = cvxopt_matrix(np.zeros(1))
        
        res = cvxopt_solvers.qp(P, q, G, h, A, b)
        
        alpha = np.array(res['x'])
        self.alpha = alpha.T
        
        end = time.time() - start
        print(end)
        
        # computing b
        
        alpha = alpha.ravel()
        idx = np.where(alpha > 1e-5)[0]
        wy = ((y * alpha.reshape(-1,1)).T @ K[:,idx]).T
        b = y[idx] - wy
        self.b = np.mean(b)
        
#         b = 0
#         for i in range(len(idx)):
#             b += y[i] - np.sum(alpha[idx] * self.y[idx] * K[idx[i],idx])
#         self.b = b/len(idx)
        
svm = Svm(X_train, y_train, gamma = 0.011, C = 1.4)
svm.optimize()
y_pred = svm.predict(X_test)
print(accuracy_score(y_test.reshape(-1,1), y_pred.reshape(-1,1)))
# 9 iterations

     pcost       dcost       gap    pres   dres
 0: -2.9344e+02 -4.4356e+03  2e+04  2e+00  6e-13
 1: -2.3601e+02 -2.4206e+03  4e+03  3e-01  4e-13
 2: -2.1224e+02 -1.0642e+03  1e+03  7e-02  3e-13
 3: -2.2820e+02 -4.6035e+02  3e+02  2e-02  3e-13
 4: -2.5017e+02 -3.2431e+02  8e+01  3e-03  3e-13
 5: -2.6268e+02 -2.8191e+02  2e+01  2e-04  3e-13
 6: -2.6728e+02 -2.7054e+02  3e+00  2e-05  3e-13
 7: -2.6824e+02 -2.6851e+02  3e-01  2e-06  3e-13
 8: -2.6833e+02 -2.6834e+02  1e-02  7e-08  3e-13
 9: -2.6833e+02 -2.6833e+02  3e-04  1e-09  4e-13
10: -2.6833e+02 -2.6833e+02  8e-06  2e-11  4e-13
Optimal solution found.
2.6842007637023926
0.8625


In [130]:
svm.b

0.8081783449218535

In [131]:
from sklearn.svm import SVC
clf = SVC(gamma = 0.01)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(accuracy_score(y_test.reshape(-1,1), y_pred.reshape(-1,1)))

0.85


In [132]:
clf.intercept_

array([0.22405762])

----------------
# Exercise 2

In [32]:
import requests

def telegram_bot_sendtext(bot_message):
    

    bot_token = '1062074868:AAEDR3GHvbpIvcdl6fvYP5FWNhg9ryUuJ7o'
    bot_chatID = '284802442'
    send_text = 'https://api.telegram.org/bot' + bot_token + '/sendMessage?chat_id=' + bot_chatID + '&parse_mode=Markdown&text=' + bot_message

    response = requests.get(send_text)

    return response.json()

In [239]:
class Svm_dcmp:
    
    def __init__(self, X, y, gamma, C, q):
        
        self.X = X
        self.y = y
        self.alpha = np.zeros((1,X.shape[0]))
        self.b = 0
        self.C = C
        self.gamma = gamma
        self.q = q
        
    def predict(self,X):
        
        z = (self.alpha*self.y) @ self.kernel_gauss(self.X, X) + self.b
        a = np.sign(z)    
        return a
    
    def kernel_gauss(self, X1, X2):
        return rbf_kernel(X1,X2, gamma = self.gamma)
    
    def kernel_poly(self, X1, X2):
        return polynomial_kernel(X1,X2, gamma = self.gamma)
    
    def obj_fun_dcmp(self, alpha):
        
        y = self.y.reshape(-1,1)
        K = self.kernel_gauss(self.X, self.X)
        Q = np.outer(y, y) * K
        print(  alpha.reshape(-1,1).shape)
        fun = 0.5 * (alpha @ Q[:, self.W]) @ alpha.T -\
        (np.delete(self.alpha, self.W) @ np.delete(Q[:,self.W], self.W) - np.ones(self.W))*alpha
        grad = Q @ alpha.T - np.ones_like(alpha)
        
        return fun, grad
    
    def optimize(self):
        
        
        for i in range(10):
            
            alpha = self.alpha.ravel(); y = self.y.ravel(); C = self.C; q = self.q
            R = np.where( (alpha < 1e-5) & (y == +1) | (alpha > C-1e-5) & (y == -1) | (alpha > 1e-5) & (alpha < C-1e-5))[0]
            S = np.where( (alpha < 1e-5) & (y == -1) | (alpha > C-1e-5) & (y == +1) | (alpha > 1e-5) & (alpha < C-1e-5))[0]

            K = self.kernel_gauss(self.X, self.X)
            Q = np.outer(y, y) * K
            grady = - (Q @ alpha.T - np.ones_like(alpha))/y

            I = list(np.argpartition(grady[R], q//2)[:q//2])
            J = list(np.argpartition(grady[R], -q//2)[:q//2])
            W = I + J
            self.W = W
            
            inits = alpha[W]
            bounds = [(0, self.C)]*alpha[W].shape[0]
            
            
            res = minimize(self.obj_fun_dcmp, x0 = inits, method = "L-BFGS-B", jac = True, bounds = bounds)
            print(res)
            self.alpha[W] = res.x

            K = self.kernel_gauss(self.X, self.X)
            alpha = res.x.ravel()
            idx = np.where(alpha > 1e-5)[0]
            w = np.matmul(np.multiply(y,alpha.reshape(-1,1)).T, K[:,idx]).T
            b = self.y[idx,None] - w
            self.b = np.mean(b)
    
svm = Svm_dcmp(X_train, y_train, gamma = 0.01, C = 1, q = 6)
svm.optimize()
y_pred = svm.predict(X_test)
print(accuracy_score(y_test.reshape(-1,1), y_pred.reshape(-1,1)))

(6, 1)


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 1600 is different from 6)

In [452]:
class Svm_dcmp:
    
    def __init__(self, X, y, gamma, C, q):
        
        self.X = X
        self.y = y
        self.alpha = np.zeros((X.shape[0]))
        self.b = 0
        self.C = C
        self.gamma = gamma
        self.q = q
        
    def predict(self,X):
        
        z = (self.alpha*self.y) @ self.kernel_gauss(self.X, X) + self.b
        a = np.sign(z)    
        return a
    
    def kernel_gauss(self, X1, X2):
        return rbf_kernel(X1,X2, gamma = self.gamma)
    
    def kernel_poly(self, X1, X2):
        return polynomial_kernel(X1,X2, gamma = self.gamma)

    def get_working_set(self):
        
        alpha = self.alpha ; y = self.y.ravel(); C = self.C; q = self.q
        R = np.where( (alpha < 1e-5) & (y == +1) | (alpha > C-1e-5) & (y == -1) | (alpha > 1e-5) & (alpha < C-1e-5))[0]
        S = np.where( (alpha < 1e-5) & (y == -1) | (alpha > C-1e-5) & (y == +1) | (alpha > 1e-5) & (alpha < C-1e-5))[0]

        K = self.kernel_gauss(self.X, self.X)
        Q = np.outer(y, y) * K
        grady = - (Q @ alpha.T - np.ones_like(alpha))/y
        I = list(np.argpartition(grady[R], q//2)[:q//2])
        J = list(np.argpartition(grady[S], -q//2)[:q//2])
        W = I + J
        
        R_max = max(grady[R])
        S_min = min(grady[S])
        
        print(R_max, S_min)
        cond = ""
        if R_max <= S_min:
            cond = "stop"
        return W, Q, alpha, cond 
    
    def optimize(self):
        
        start = time.time()
        
        for i in range(10):
            
            W, Q, alpha, cond= self.get_working_set()
            
            if cond == "stop":
                break
            
            y = self.y.reshape(-1,1)
            
            # computing alpha
            H = Q
            P = cvxopt_matrix(H[:,W][W,:])
            q = cvxopt_matrix(np.delete(alpha.reshape(1,-1), W) @ np.delete(Q[W,:], W, axis = 1).T -np.ones(len(W)))
            G = cvxopt_matrix(np.vstack((-np.eye(len(W)),np.eye(len(W)))))
            h = cvxopt_matrix(np.hstack((np.zeros(len(W)), np.ones(len(W)) * self.C)))
            A = cvxopt_matrix(y[W].reshape(1, -1))
            b = cvxopt_matrix(np.delete(y, W) @ np.delete(alpha, W))

            res = cvxopt_solvers.qp(P, q, G, h, A, b)

            alpha = np.array(res['x'])
            self.alpha[W] = alpha.T

                
        end = time.time() - start
        print(end)
        
        # computing b
        K = self.kernel_gauss(self.X, self.X)
        alpha = self.alpha.ravel()
        idx = np.where(alpha > 1e-5)[0]
        w = np.matmul((self.y.ravel()*alpha).T, K[:,idx]).T
        b = self.y[idx,None] - w
        self.b = np.mean(b)
    
svm = Svm_dcmp(X_train, y_train, gamma = 0.1, C = 1, q = 1000)
svm.optimize()
y_pred = svm.predict(X_test)
print(accuracy_score(y_test.reshape(-1,1), y_pred.reshape(-1,1)))

1.0 -1.0
     pcost       dcost       gap    pres   dres
 0: -2.0193e+02 -1.7050e+03  5e+03  1e+00  2e-15
 1: -1.9420e+02 -8.9823e+02  7e+02  1e-14  6e-16
 2: -2.1018e+02 -2.9447e+02  8e+01  1e-14  7e-16
 3: -2.1514e+02 -2.2584e+02  1e+01  1e-14  6e-16
 4: -2.1591e+02 -2.1663e+02  7e-01  3e-15  5e-16
 5: -2.1600e+02 -2.1604e+02  3e-02  7e-15  5e-16
 6: -2.1601e+02 -2.1601e+02  1e-03  2e-15  6e-16
 7: -2.1601e+02 -2.1601e+02  8e-05  3e-15  5e-16
Optimal solution found.
1.5607195195537944 -1.4289327391811364
     pcost       dcost       gap    pres   dres
 0: -2.5452e+02 -1.9211e+03  6e+03  2e+00  2e-15
 1: -2.3766e+02 -1.0947e+03  9e+02  3e-16  7e-16
 2: -2.6343e+02 -3.9168e+02  1e+02  1e-15  9e-16
 3: -2.7765e+02 -2.9860e+02  2e+01  1e-15  6e-16
 4: -2.8099e+02 -2.8319e+02  2e+00  1e-15  6e-16
 5: -2.8146e+02 -2.8163e+02  2e-01  2e-16  6e-16
 6: -2.8151e+02 -2.8152e+02  8e-03  3e-16  6e-16
 7: -2.8152e+02 -2.8152e+02  4e-04  3e-16  7e-16
 8: -2.8152e+02 -2.8152e+02  2e-05  2e-16  7e-16

In [334]:
svm.alpha

array([[2.21705714e-09, 3.56887795e-01, 2.05980428e-06, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]])

In [415]:
class Svm_dcmp:
    
    def __init__(self, X, y, gamma, C, q):
        
        self.X = X
        self.y = y
        self.alpha = np.zeros((X.shape[0]))
        self.b = 0
        self.C = C
        self.gamma = gamma
        self.q = q
        
    def predict(self,X):
        
        z = (self.alpha*self.y) @ self.kernel_gauss(self.X, X) + self.b
        a = np.sign(z)    
        return a
    
    def kernel_gauss(self, X1, X2):
        return rbf_kernel(X1,X2, gamma = self.gamma)
    
    def kernel_poly(self, X1, X2):
        return polynomial_kernel(X1,X2, gamma = self.gamma)
    

#     def con1(self,x):
#         return self.y @ x 
    
    def get_working_set(self):
        
        alpha = self.alpha; y = self.y.ravel(); C = self.C; q = self.q
        R = np.where( (alpha < 1e-5) & (y == +1) | (alpha > C-1e-5) & (y == -1) | (alpha > 1e-5) & (alpha < C-1e-5))[0]
        S = np.where( (alpha < 1e-5) & (y == -1) | (alpha > C-1e-5) & (y == +1) | (alpha > 1e-5) & (alpha < C-1e-5))[0]

        K = self.kernel_gauss(self.X, self.X)
        Q = np.outer(y, y) * K
        grady = - (Q @ alpha.T - np.ones_like(alpha))/y

        I = list(np.argpartition(grady[R], q//2)[:q//2])
        J = list(np.argpartition(grady[R], -q//2)[:q//2])
        W = I + J
        W_ = list(set(np.arange(self.X.shape[0])) - set(W))
        return W, W_, Q, grady, alpha
    def obj_fun(self, alpha, W, W_ ,Q, alpha_W_):
        
        y = self.y.reshape(-1,1)
        K = self.kernel_gauss(self.X, self.X)
        Q = np.outer(y, y) * K
        fun = 0.5 * alpha @ Q[np.ix_(W,W)] @ alpha.T - (alpha_W_ @ Q[np.ix_(W_,W)] - np.ones(len(W)))*alpha
        grad = Q[np.ix_(W,W)] @ alpha.T - (alpha_W_ @ Q[np.ix_(W_,W)] - np.ones(len(W)))
        
        return fun, grad
        
    def optimize(self):
        
        for i in range(3):
            
            W, W_, Q, grady, alpha = self.get_working_set()
            
            alpha = self.alpha.ravel(); y = self.y.ravel(); C = self.C; q = self.q
            R = np.where( (alpha < 1e-5) & (y == +1) | (alpha > C-1e-5) & (y == -1) | (alpha > 1e-5) & (alpha < C-1e-5))[0]
            S = np.where( (alpha < 1e-5) & (y == -1) | (alpha > C-1e-5) & (y == +1) | (alpha > 1e-5) & (alpha < C-1e-5))[0]

            K = self.kernel_gauss(self.X, self.X)
            Q = np.outer(y, y) * K
            grady = - (Q @ alpha.T - np.ones_like(alpha))/y

            I = list(np.argpartition(grady[R], q//2)[:q//2])
            J = list(np.argpartition(grady[S], -q//2)[:q//2])
            W = I + J
            W_ = list(set(np.arange(self.X.shape[0])) - set(W))
            print(W)
            
            inits = alpha[W]
            args = (W, W_, Q, alpha[W_])
            bounds = [(0, self.C)]*len(W)#,np.ones_like(self.alpha)*self.C)
            #cons = [{"ltype":"eq", "fun": lambda x: self.y @ x, "jac" : lambda x: self.y.reshape(-1,1)}]
            res = minimize(self.obj_fun, x0 = inits, args = args, method = "L-BFGS-B", jac = True, bounds = bounds)
            #print(res)
            self.alpha[W] = res.x.T

        K = self.kernel_gauss(self.X, self.X)
        alpha = self.alpha.ravel()
        idx = np.where(alpha > 1e-5)[0]
        print(self.alpha[0])
        w = np.matmul(np.multiply(y,alpha.reshape(-1,1)).T, K[:,idx]).T
        b = self.y[idx,None] - w
        self.b = np.mean(b)

svm = Svm_dcmp(X_train, y_train, gamma = 0.01, C = 1, q = 10)
svm.optimize()
y_pred = svm.predict(X_test)
print(accuracy_score(y_test.reshape(-1,1), y_pred.reshape(-1,1)))

[535, 534, 538, 537, 536, 399, 397, 794, 793, 792]
[535, 534, 538, 537, 536, 399, 397, 794, 793, 792]
[535, 534, 538, 537, 536, 399, 397, 794, 793, 792]
0.0


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  app.launch_new_instance()


ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [407]:
Svm_dcmp.alpha

AttributeError: type object 'Svm_dcmp' has no attribute 'alpha'