# SVM

In [24]:
import numpy as np
import pandas as pd
import os
from libsvm.svmutil import svm_train, svm_predict, svm_problem
import itertools
from scipy.spatial.distance import cdist

## Part 1.
Use SVM models to tackle classification on images of hand-written digits.

In [19]:
def load_data(data_path):
    X_train = pd.read_csv(os.path.join(data_path, "X_train.csv"), header=None).to_numpy()
    y_train = pd.read_csv(os.path.join(data_path, "Y_train.csv"), header=None).to_numpy().reshape(-1)
    X_test = pd.read_csv(os.path.join(data_path, "X_test.csv"), header=None).to_numpy()
    y_test = pd.read_csv(os.path.join(data_path, "Y_test.csv"), header=None).to_numpy().reshape(-1)

    return X_train, y_train, X_test, y_test

In [20]:
X_train, y_train, X_test, y_test = load_data("./data")

### svm_train usage
<div align="center">
<img src="./img/svm_train_usage.png" width = "600" alt="svm_train usage" align=center />
</div>

In [21]:
kernel_type = {
    "linear": 0,
    "polynomial": 1,
    "RBF": 2}

In [22]:
for key, value in kernel_type.items():
    m = svm_train(y_train, X_train, f"-q -t {value}")
    p_labels, p_acc, p_vals = svm_predict(y_test, X_test, m, "-q")
    print(f"kernel_type: {key}\taccuracy: {p_acc[0]:.2f}")

kernel_type: linear	accuracy: 95.08
kernel_type: polynomial	accuracy: 34.68
kernel_type: RBF	accuracy: 95.32


## Part 2
Please use C-SVC. Since there are some parameters you need to tune for, please do the grid search for finding parameters of the best performing model. For instance, in C-SVC you have a parameter C, and if you use RBF kernel you have another parameter 𝛾, you can search for a set of (C, 𝛾) which gives you best performance in cross-validation.

In [23]:
def gridSearch(X_train, y_train, **param):
    kernel_type = param.get("kernel_type", 0)
    C = param.get("C", [1])
    gamma = param.get("gamma", [1 / X_train.shape[1]])
    coef0 = param.get("coef0", [0])
    degree = param.get("degree", [3])

    combinations = [C, gamma, coef0, degree]
    best_acc = 0
    best_comb = None
    for comb in list(itertools.product(*combinations)):
        acc = svm_train(y_train, X_train, f"-q -t {kernel_type} -v 3 -c {comb[0]} -g {comb[1]} -r {comb[2]} -d {comb[3]}")
        if acc > best_acc:
            best_acc = acc
            best_comb = comb

    print(f"best combination (C, gamma, coef0, degree): {best_comb}\tbest accuracy: {best_acc}")
    return best_comb, best_acc

### Linear kernel

In [24]:
param = {"kernel_type": kernel_type['linear'], "C": [10**x for x in range(-5, 6)]}
best_comb, best_acc = gridSearch(X_train, y_train, **param)

Cross Validation Accuracy = 79.6%
Cross Validation Accuracy = 88.54%
Cross Validation Accuracy = 95.22%
Cross Validation Accuracy = 96.76%
Cross Validation Accuracy = 96.58%
Cross Validation Accuracy = 96.2%
Cross Validation Accuracy = 96.16%
Cross Validation Accuracy = 96.18%
Cross Validation Accuracy = 96.14%
Cross Validation Accuracy = 96.18%
Cross Validation Accuracy = 95.84%
best combination (C, gamma, coef0, degree): (0.01, 0.0012755102040816326, 0, 3)	best accuracy: 96.76


In [25]:
m = svm_train(y_train, X_train, f"-q -t 0 -c {best_comb[0]}")
p_labels, p_acc, p_vals = svm_predict(y_test, X_test, m, "-q")
print(f"linear kernel after grid search accuracy: {p_acc[0]:.2f}")

linear kernel after grid search accuracy: 95.96


### Polynomial kernel

In [26]:
param = {"kernel_type": kernel_type['polynomial'], 
         "C": [10**x for x in range(-3, 4)],
         "gamma": [10**x for x in range(-3, 4)],
         "coef0": [x for x in range(-1, 2)],
         "degree": [x for x in range(2, 5)]}
best_comb, best_acc = gridSearch(X_train, y_train, **param)

Cross Validation Accuracy = 0.22%
Cross Validation Accuracy = 81.86%
Cross Validation Accuracy = 0.22%
Cross Validation Accuracy = 45.96%
Cross Validation Accuracy = 28.7%
Cross Validation Accuracy = 23.78%
Cross Validation Accuracy = 78.4%
Cross Validation Accuracy = 77.18%
Cross Validation Accuracy = 75.3%
Cross Validation Accuracy = 0.28%
Cross Validation Accuracy = 84.96%
Cross Validation Accuracy = 0.56%
Cross Validation Accuracy = 46%
Cross Validation Accuracy = 28.64%
Cross Validation Accuracy = 23.88%
Cross Validation Accuracy = 69.68%
Cross Validation Accuracy = 75.9%
Cross Validation Accuracy = 83.56%
Cross Validation Accuracy = 91.48%
Cross Validation Accuracy = 92.28%
Cross Validation Accuracy = 91.78%
Cross Validation Accuracy = 94.3%
Cross Validation Accuracy = 96.08%
Cross Validation Accuracy = 96.04%
Cross Validation Accuracy = 95.3%
Cross Validation Accuracy = 97.5%
Cross Validation Accuracy = 97.4%
Cross Validation Accuracy = 98.14%
Cross Validation Accuracy = 97.12%


In [27]:
m = svm_train(y_train, X_train, f"-q -t 1 -c {best_comb[0]} -g {best_comb[1]} -r {best_comb[2]} -d {best_comb[3]}")
p_labels, p_acc, p_vals = svm_predict(y_test, X_test, m, "-q")
print(f"polynomial kernel after grid search accuracy: {p_acc[0]:.2f}")

polynomial kernel after grid search accuracy: 97.76


### Radial basis kernel

In [28]:
param = {"kernel_type": kernel_type['RBF'], 
         "C": [10**x for x in range(-3, 4)],
         "gamma": [10**x for x in range(-3, 4)]}
best_comb, best_acc = gridSearch(X_train, y_train, **param)

Cross Validation Accuracy = 81.06%
Cross Validation Accuracy = 89.84%
Cross Validation Accuracy = 49.82%
Cross Validation Accuracy = 20.68%
Cross Validation Accuracy = 78.88%
Cross Validation Accuracy = 35.98%
Cross Validation Accuracy = 20%
Cross Validation Accuracy = 80.92%
Cross Validation Accuracy = 91.86%
Cross Validation Accuracy = 50.6%
Cross Validation Accuracy = 20.86%
Cross Validation Accuracy = 78.84%
Cross Validation Accuracy = 36.08%
Cross Validation Accuracy = 20%
Cross Validation Accuracy = 91.82%
Cross Validation Accuracy = 96.28%
Cross Validation Accuracy = 54.84%
Cross Validation Accuracy = 20.74%
Cross Validation Accuracy = 78.86%
Cross Validation Accuracy = 36.08%
Cross Validation Accuracy = 20%
Cross Validation Accuracy = 95.94%
Cross Validation Accuracy = 97.88%
Cross Validation Accuracy = 91.2%
Cross Validation Accuracy = 30.26%
Cross Validation Accuracy = 26.72%
Cross Validation Accuracy = 36.1%
Cross Validation Accuracy = 20%
Cross Validation Accuracy = 97.02%


In [29]:
m = svm_train(y_train, X_train, f"-q -t 2 -g {best_comb[1]}")
p_labels, p_acc, p_vals = svm_predict(y_test, X_test, m, "-q")
print(f"radial basis kernel after grid search accuracy: {p_acc[0]:.2f}")

radial basis kernel after grid search accuracy: 97.52


## Part 3.
Use linear kernel + RBF kernel together (therefore a new kernel function) and compare its performance with respect to others.

### Precomputed kernel usage
<div align="center">
<img src="./img/precomputed_kernel_usage.png" width = "400" alt="precomputed kernel usage" align=center />
</div>

In [30]:
def linearRBF(X, X_, gamma):
    linear = X @ X_.T
    RBF = np.exp(-gamma * cdist(X, X_, 'sqeuclidean'))
    kernel = linear + RBF
    kernel = np.hstack((np.arange(1, len(X)+1).reshape(-1, 1), kernel))

    return kernel

In [23]:
best_comb = [100, 0.01]

In [32]:
K = linearRBF(X_train, X_train, best_comb[1])
KK = linearRBF(X_test, X_train, best_comb[1])

In [33]:
prob = svm_problem(y_train, K, isKernel=True)
m = svm_train(prob, f"-q -t 4 -c {best_comb[0]}")
p_labels, p_acc, p_vals = svm_predict(y_test, KK, m, "-q")
print(f"kernel type: linear + RBF kernel\taccuracy: {p_acc[0]:.2f}")

kernel type: linear + RBF kernel	accuracy: 95.32


## Observation

In [68]:
m = svm_train(y_train, X_train, f"-q -t 1 -d 4")
p_labels, p_acc, p_vals = svm_predict(y_test, X_test, m, "-q")
print(f"polynomial kernel after grid search accuracy: {p_acc[0]:.2f}")

polynomial kernel after grid search accuracy: 23.72


### linear + polynomial + RBF kernel

In [29]:
def linearPolynomialRBF(X, X_, gamma):
    linear = X @ X_.T
    polynomial = np.power((1 + gamma * (X @ X_.T)), 2)
    RBF = np.exp(-gamma * cdist(X, X_, 'sqeuclidean'))
    kernel = linear + polynomial + RBF
    kernel = np.hstack((np.arange(1, len(X)+1).reshape(-1, 1), kernel))

    return kernel

In [30]:
K = linearPolynomialRBF(X_train, X_train, best_comb[1])
KK = linearPolynomialRBF(X_test, X_train, best_comb[1])

In [31]:
prob = svm_problem(y_train, K, isKernel=True)
m = svm_train(prob, f"-q -t 4 -c {best_comb[0]}")
p_labels, p_acc, p_vals = svm_predict(y_test, KK, m, "-q")
print(f"kernel type: linear + polynomial + RBF kernel\taccuracy: {p_acc[0]:.2f}")

kernel type: linear + polynomial + RBF kernel	accuracy: 95.52
