## **Import Resources**

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.utils import resample
import random

## **Load Data**

In [7]:

X_train_pca = pd.read_csv('X_train_pca.csv')
X_test_pca = pd.read_csv('X_test_pca.csv')
y_train = pd.read_csv('y_train.csv')
y_test = pd.read_csv('y_test.csv')
y_train, y_test = y_train.values.ravel(), y_test.values.ravel()
y_train, y_test = np.where(y_train <= 0, -1, 1), np.where(y_test <= 0, -1, 1)
X_train_pca, X_test_pca = X_train_pca.to_numpy(), X_test_pca.to_numpy()
X_sample, _, y_sample, _ = train_test_split(X_train_pca, y_train, test_size=0.80, stratify=y_train, random_state=1)
X_sample.shape, y_sample.shape, X_test_pca.shape, y_test.shape

((4482, 15), (4482,), (5604, 15), (5604,))

In [8]:
# used for short run time
from imblearn.under_sampling import RandomUnderSampler

sampling_strategy = {1: 1000, -1: 1000}
rus = RandomUnderSampler(sampling_strategy=sampling_strategy, random_state=1)
X_resampled, y_resampled = rus.fit_resample(X_train_pca, y_train)
X_resampled.shape, y_resampled.shape

((2000, 15), (2000,))

## SVM implementation using SMO

In [9]:
#Followed the paper: https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf
class SVM_SMO:
    def __init__(self, C=100, tol=0.001, class_weight = False, max_iteration=100, tau=1e-6, gamma=0.1, kernel_type='rbf'):
        self.C = C # C - Regularization parameter
        self.tol = tol # tol - Tolerance for stopping criterion
        self.class_weight = class_weight # class_weight - Flag to use class weights (True/False)
        self.max_iteration = max_iteration # max_iteration - Maximum number of iterations for the SMO algorithm
        self.tau = tau # tau - Tolerance to avoid division by zero in kernel calculations
        self.gamma = gamma # gamma - Parameter for RBF kernel
        self.kernel_type = kernel_type # kernel_type - Type of kernel ('linear' or 'rbf')
        self.alpha = None
        self.b = None
        self.X_T = None
        self.y_T = None

    # Radial Basis Function kernel
    def rbf_kernel(self, x1, x2):
        return np.exp(-self.gamma * np.linalg.norm(x1 - x2) ** 2)

    # Linear kernel
    def linear_kernel(self, x1, x2):
        return np.dot(x1, x2)

    # Compute the kernel matrix for the training data
    def compute_kernel_matrix(self, X):
        n_samples = X.shape[0]
        self.kernel_matrix = np.zeros((n_samples, n_samples))
        # Compute the kernel function for each pair of points using the specified kernel type
        for i in range(n_samples):
            for j in range(n_samples):
                if self.kernel_type == 'linear':
                    self.kernel_matrix[i, j] = self.linear_kernel(X[i], X[j])
                else:
                    self.kernel_matrix[i, j] = self.rbf_kernel(X[i], X[j])

    # Compute the gradient of the objective function with respect to alpha
    def compute_gradient(self):
        gradient = self.kernel_matrix @ (self.alpha * self.y_T) + self.y_T
        return gradient

    # Select the working set of alpha pairs to update
    def select_working_set(self):
        gradient = self.compute_gradient()
        # Compute the indices of the alpha pairs that violate the KKT conditions
        I_up = (self.y_T == 1) & (self.alpha < self.C) | (self.y_T == -1) & (self.alpha > 0)
        I_low = (self.y_T == -1) & (self.alpha < self.C) | (self.y_T == 1) & (self.alpha > 0)

        # Compute the indices of the alpha pairs that violate the KKT conditions and have the largest and smallest gradients
        a = self.kernel_matrix + self.kernel_matrix.T - 2 * np.diag(self.kernel_matrix.diagonal())
        b = -self.y_T * gradient

        # Set the diagonal of a to tau to avoid division by zero
        a[a <= 0] = self.tau

        # Compute the indices of the alpha pairs that violate the KKT conditions and have the largest and smallest gradients
        i = np.argmax(np.where(I_up, -self.y_T * gradient, -np.inf))
        j = np.argmin(np.where(I_low, b[i]**2 / a[i], np.inf))

        return i, j

    def update_alpha(self, i, j):
        # Extract entries from the kernel matrix
        Q_i = self.kernel_matrix[i, :]
        Q_j = self.kernel_matrix[j, :]

        # Calculate the quadratic coefficient
        quad_coef = max((Q_i[i] + Q_j[j] - 2 * Q_i[j]), self.tau)

        # Compute delta, the amount to change alpha by
        delta = (-self.y_T[i] + self.y_T[j]) / quad_coef 
        diff = self.alpha[i] - self.alpha[j]

        # Add class weights to the alpha values
        weight_i = self.class_weights[self.y_T[i]]
        weight_j = self.class_weights[self.y_T[j]]

        # Update alpha[i] and alpha[j] 
        self.alpha[i] += delta * weight_i
        self.alpha[j] -= delta * weight_j

        # Clip alpha values based on their difference and the value of C
        if diff > 0:
            if self.alpha[j] < 0:
                self.alpha[j] = 0
                self.alpha[i] = diff
        else:
            if self.alpha[i] < 0:
                self.alpha[i] = 0
                self.alpha[j] = -diff

        if diff > self.C - i - self.C - j:
            if self.alpha[i] > self.C - i:
                self.alpha[i] = self.C - i
                self.alpha[j] = self.C - i - diff
        else:
            if self.alpha[j] > self.C - j:
                self.alpha[j] = self.C - j
                self.alpha[i] = self.C - j + diff
    
    # Check if the stopping criteria is met
    def stopping_criteria_met(self, gradient):
        return np.all(np.logical_or(
            np.logical_and(self.alpha <= 0, gradient >= -self.tol),
            np.logical_and(self.alpha >= self.C, gradient <= self.tol)
        ))
    
    # Compute the class weights
    def compute_weights(self, y):
        n_samples = len(y)
        # y needs adjustment as the class labels are -1 and 1
        adjusted_y = (y + 1) // 2
        class_counts = np.bincount(adjusted_y, minlength=2)
        class_counts = np.maximum(class_counts, 1)
        n_classes = 2
        class_weights = n_samples / (n_classes * class_counts)
        return class_weights

    # Fit the SVM model
    def fit(self, X, y):
        self.X_T = X
        self.y_T = y
        self.alpha = np.zeros(len(y))
        # Begin with computing the kernel matrix
        self.compute_kernel_matrix(X)

        # Compute class weights if True, set to 1 otherwise
        if self.class_weight == True:
            self.class_weights = self.compute_weights(y)
        else:
            self.class_weights = {label: 1 for label in np.unique(y)}

        # SMO algorithm
        iteration = 0
        while iteration < self.max_iteration:
            i, j = self.select_working_set()
            self.update_alpha(i, j)
            gradient = self.compute_gradient()
            if self.stopping_criteria_met(gradient):
                break
            iteration += 1
        
        # indices of support vectors
        support_vectors = [i for i in range(len(y)) if 0 < self.alpha[i] < self.C] 
        # Compute the bias term
        if support_vectors:
            self.b = np.mean([y[i] - np.dot(self.alpha * y, self.kernel_matrix[i])] for i in support_vectors)
        else: 
            self.b = 0 # default bias
    
    def predict(self, X):
        # Predict class labels for the given test data
        kernel_applied = np.array([
            [self.linear_kernel(x, x_train) if self.kernel_type == 'linear' else self.rbf_kernel(x, x_train)
            for x_train in self.X_T] for x in X
        ])

        # Decision function calculation
        y_pred = np.dot(kernel_applied, self.alpha * self.y_T) + self.b
        return np.sign(y_pred)

In [11]:
svc = SVM_SMO(C=1, tol=0.01, class_weight= False, max_iteration = 100, kernel_type = 'linear')
svc.fit(X_sample, y_sample)
y_pred = svc.predict(X_test_pca)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.6473947180585297
Classification Report:
               precision    recall  f1-score   support

          -1       0.86      0.65      0.74      4341
           1       0.35      0.65      0.45      1263

    accuracy                           0.65      5604
   macro avg       0.61      0.65      0.60      5604
weighted avg       0.75      0.65      0.68      5604

