#### Part 1 - Defining class for SVM

In [5]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

from cvxopt import solvers
from cvxopt import matrix

from scipy.spatial.distance import cdist

In [6]:
class SVM:
    def __init__(self,C,kernel):
        self.C = C
        self.kernel = kernel

    def fit(self,X_train,y_train):
        self.scaler = StandardScaler()
        self.X = self.scaler.fit_transform(X_train)
        self.y = y_train.reshape(-1,1)
        
        n=self.X.shape[0]
        I_n = np.eye(n)
        P=(self.y@self.y.T)*self.kernel(self.X,self.X)
        q=np.full(n,-1)
        G=np.vstack((I_n,-1*I_n))
        h=np.hstack((np.full(n,self.C),np.zeros(n)))
        A=y_train.reshape(1,-1)
        b=np.zeros(1)

        P,q,G,h,A,b = map(lambda x : matrix(x,tc="d"),(P,q,G,h,A,b))

        solution = solvers.qp(P, q, G, h, A, b)
        self.a = np.asarray(solution['x']).squeeze()
        
        support_indices = np.logical_and(self.a>=1e-10, self.a<self.C)
        X_S = self.X[support_indices]
        self.b = np.mean(self.y - self.a*self.y.T @ self.kernel(self.X, X_S))

    def predict(self,X_test):
        X_test=self.scaler.transform(X_test)
        return np.sign(self.a*self.y.T @ self.kernel(self.X, X_test) + self.b)


#### Part 2 - Defining Radial Basis Function(RBF) Kernel

In [7]:
def rbf_kernel(X1,X2,sigma):
    return np.exp(-cdist(X1, X2, 'sqeuclidean') / (2*sigma**2))

#### Part 3 - Loading and Processing Dataset

In [8]:
titanic_df = pd.read_csv('datasets/titanic_processed.csv')
X = titanic_df.drop('Survived',axis = 1).values
y = titanic_df['Survived'].values
y[y==0] = -1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

#### Part 4 - Implementing SVM

In [9]:
from functools import partial 
C = .35
sigma = 2.5
kernel = partial(rbf_kernel,sigma=sigma)

svm_classifier = SVM(C,kernel)
svm_classifier.fit(X_train,y_train)
y_pred = svm_classifier.predict(X_test)
print(f'Accuracy of the classifer: {(y_test == y_pred).mean()*100:.2f}%')


     pcost       dcost       gap    pres   dres
 0: -2.1166e+02 -5.8558e+02  6e+03  8e+00  3e-15
 1: -8.8848e+01 -4.9225e+02  6e+02  4e-01  3e-15
 2: -8.4608e+01 -1.3669e+02  5e+01  2e-03  2e-15
 3: -9.2566e+01 -1.1007e+02  2e+01  5e-04  2e-15
 4: -9.5567e+01 -1.0239e+02  7e+00  1e-04  2e-15
 5: -9.6721e+01 -9.9922e+01  3e+00  6e-05  2e-15
 6: -9.7365e+01 -9.8738e+01  1e+00  8e-06  2e-15
 7: -9.7606e+01 -9.8266e+01  7e-01  2e-06  2e-15
 8: -9.7781e+01 -9.7964e+01  2e-01  2e-07  2e-15
 9: -9.7826e+01 -9.7894e+01  7e-02  4e-08  2e-15
10: -9.7850e+01 -9.7861e+01  1e-02  5e-09  2e-15
11: -9.7852e+01 -9.7857e+01  4e-03  7e-10  2e-15
12: -9.7854e+01 -9.7855e+01  1e-03  1e-10  2e-15
13: -9.7854e+01 -9.7855e+01  2e-04  2e-11  2e-15
14: -9.7854e+01 -9.7854e+01  4e-05  3e-12  2e-15
Optimal solution found.
Accuracy of the classifer: 75.28%
