In [1]:
import numpy as np
import pandas as pd
import time
from cvxopt import matrix
from cvxopt import solvers

In [2]:
df = pd.read_csv('spambase.data',header=None).to_numpy()

In [3]:
#set random seed
np.random.seed(1023)
#turn off output of qp solver
solvers.options['show_progress'] = False

In [4]:
#separate data and labels
X = df[:,0:57]
y = df[:,57]
#change labels to -1 and 1
y = 2*y - 1

In [5]:
#separate training and test data
split = .67
N = X.shape[0]
#get dimension of data
d = X.shape[1]
idx = np.random.choice(np.arange(N),size=int(np.floor(split*N)),replace=False)
test_idx = np.setdiff1d(np.arange(N), idx)
#split data
X_train = X[idx,:]
X_test = X[test_idx,:]
#split labels
y_train = y[idx]
y_test = y[test_idx]
#set up some constants
N_train = X_train.shape[0]
N_test = X_test.shape[0]

In [6]:
#standardize data for better results
mean = np.mean(X_train,axis=0)
std = np.std(X_train,axis=0)
X_train = 1/std*(X_train-mean)
X_test = 1/std*(X_test- mean)

In [7]:
#set sigmoid kernel parameters 
sigmoid_sigma = 0
sigmoid_kappa = 1/N_train

In [8]:
#kernels
#Gaussian kernel
def K1(x,y,sigma=1):
    return np.exp(-1/(2*sigma**2)*np.linalg.norm(x-y,axis=1)**2)
#polynomial kernel
def K2(x,y,sigma=0,d=2):
    return (np.sum(x*y,axis=1)+sigma)**d
#linear kernel
def K3(x,y,sigma=0):
    return np.sum(x*y,axis=1)+sigma
#sigmoid kernel
def K4(x,y,sigma=sigmoid_sigma,kappa=sigmoid_kappa):
    return np.tanh(kappa*np.sum(x*y,axis=1)+sigma)

In [9]:
#functions for gram matrices 
#Gaussian kernel GM
def gramK1(X,sigma=1):
    return np.exp(-1/(2*sigma**2)*np.linalg.norm(X.T-X[:,:,None],axis=1)**2)
#polynomial kernel gram matrix
def gramK2(X,sigma=0,d=2):
    return (np.sum(X.T*X[:,:,None],axis=1)+sigma)**d
#linear kernel GM
def gramK3(X,sigma=0):
    return np.sum(X.T*X[:,:,None],axis=1)+sigma
#sigmoid GM
def gramK4(X,sigma=sigmoid_sigma,kappa=sigmoid_kappa):
    return np.tanh(kappa*np.sum(X.T*X[:,:,None],axis=1)+ sigma)

In [10]:
def predict(x,X,alpha,y,K):
    x_rep = np.repeat(x,N_train,axis=0)
    kern_vec = K(x_rep,X)
    return np.sign(alpha.T@(y*kern_vec))

In [11]:
#kernel dictionary
kern_dict = {'gaussian':'K1','polynomial':'K2','linear':'K3','sigmoid':'K4'}
#Gram matrix dictionary
GM_dict = {'gaussian':'gramK1','polynomial':'gramK2', 'linear':'gramK3','sigmoid':'gramK4'}

In [12]:
#stolen from sam
Cs_to_test = [1.0, 10.0, 100.0,1000.0,10.**4,10.**5,10.**6,10.**7,10.**8]
results = pd.DataFrame(columns=['kernel','C','accuracy','comp time'])
for kernel in kern_dict.keys():
    #set kernel 
    K = eval(kern_dict[kernel])
    #specify gram matrix function to use
    GM = eval(GM_dict[kernel])
    #regularization hyperparameter
    for C in Cs_to_test:
        start_time = time.time()
        #get Gram matrix
        #Technically, we don't have to compute the Gram matrix for each C; just once is enough
        #But we want computation time to include forming the Gram matrix
        gram = GM(X_train)
        print('kernel:' + kernel + " \tC:" + str(C))
        #set up and solve SVM optimization problem
        P = matrix(y_train[:,None]@y_train[None,:]*gram)
        q = matrix(-np.ones(N_train))
        G = matrix(np.concatenate([np.eye(N_train),-np.eye(N_train)],axis=0))
        h = matrix(np.concatenate([C*(1/N_train)*np.ones(N_train),np.zeros(N_train)]))
        A = matrix(y_train[None,:])
        b = matrix(np.zeros(1)[None,:])
        sol = solvers.qp(P,q,G,h,A,b)
        #get solution alpha
        alpha = sol['x']
        #convert solution back to numpy
        alpha = np.array(alpha)
        #get run time
        run_time = time.time() - start_time
        
        #evalutate model
        num_correct = 0
        for i in range(N_test):
            pred = predict(X_test[None,i,:],X_train,alpha,y_train,K)
            if y_test[i] == pred:
                num_correct = num_correct + 1
        acc = 100*num_correct/N_test
        #save results
        results = results.append({'kernel':kernel, 'C':C,'accuracy':acc,'comp time':run_time},ignore_index=True)

kernel:gaussian 	C:1.0
kernel:gaussian 	C:10.0
kernel:gaussian 	C:100.0
kernel:gaussian 	C:1000.0
kernel:gaussian 	C:10000.0
kernel:gaussian 	C:100000.0
kernel:gaussian 	C:1000000.0
kernel:gaussian 	C:10000000.0
kernel:gaussian 	C:100000000.0
kernel:polynomial 	C:1.0
kernel:polynomial 	C:10.0
kernel:polynomial 	C:100.0
kernel:polynomial 	C:1000.0
kernel:polynomial 	C:10000.0
kernel:polynomial 	C:100000.0
kernel:polynomial 	C:1000000.0
kernel:polynomial 	C:10000000.0
kernel:polynomial 	C:100000000.0
kernel:linear 	C:1.0
kernel:linear 	C:10.0
kernel:linear 	C:100.0
kernel:linear 	C:1000.0
kernel:linear 	C:10000.0
kernel:linear 	C:100000.0
kernel:linear 	C:1000000.0
kernel:linear 	C:10000000.0
kernel:linear 	C:100000000.0
kernel:sigmoid 	C:1.0
kernel:sigmoid 	C:10.0
kernel:sigmoid 	C:100.0
kernel:sigmoid 	C:1000.0
kernel:sigmoid 	C:10000.0
kernel:sigmoid 	C:100000.0
kernel:sigmoid 	C:1000000.0
kernel:sigmoid 	C:10000000.0
kernel:sigmoid 	C:100000000.0


In [13]:
results.to_csv('results.csv')