In [1]:
import numpy as np
import matplotlib.pyplot as plt
from numba import jit

In [2]:
from libsvm.svmutil import *
from libsvm.svm import *

In [3]:
import csv

In [4]:
def ReadCSV(filename):
    with open(filename, newline='') as csvfile:
        row = csv.reader(csvfile)
        data_list = [[float(ele) for ele in array] for array in row]
    
    data = np.array(data_list)
    row, col = data.shape

    return [data, row, col]

# Data prepare

In [5]:
X_train, X_train_row, X_train_col = ReadCSV("X_train.csv")
print(X_train_row, X_train_col)

5000 784


In [6]:
X_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [7]:
Y_train, Y_train_row, Y_train_col = ReadCSV("Y_train.csv")
print(Y_train_row, Y_train_col)

5000 1


In [8]:
Y_train = Y_train.reshape(5000,)

In [9]:
X_test, X_test_row, X_test_col = ReadCSV("X_test.csv")
print(X_test_row, X_test_col)

2500 784


In [10]:
Y_test, Y_test_row, Y_test_col = ReadCSV("Y_test.csv")
print(Y_test_row, Y_test_col)

2500 1


In [11]:
Y_test = Y_test.reshape(2500, )

In [12]:
sample = X_train[0].reshape(28, 28)
sample = np.where(sample > 0, 1, 0)

In [13]:
for i in range(28):
    for j in range(28):
        print(sample[i][j], end='')
    print()

0000000000000000000000000000
0000000000000000000000000000
0000000000000000000000000000
0000000000000000000000000000
0000000000000000000000000000
0000000000000000000000000000
0000000000000001111111100000
0000000000001111111111110000
0000000000011111111111110000
0000000000111111100011110000
0000000001111100000011110000
0000000111111000000011110000
0000000111111000000011110000
0000001111111000000011110000
0000011111100000000011100000
0000111111100000000111000000
0000111111100000001111000000
0000111111100000011110000000
0000111111000000111110000000
0000111111000000111000000000
0000001111111111110000000000
0000001111111111110000000000
0000000000111111100000000000
0000000000000000000000000000
0000000000000000000000000000
0000000000000000000000000000
0000000000000000000000000000
0000000000000000000000000000


In [21]:
# Linear kernel
model = svm_train(Y_train, X_train, '-t 0')
label, acc, val = svm_predict(Y_test, X_test, model)

Accuracy = 95.08% (2377/2500) (classification)


In [22]:
# Polynomial kernel
model = svm_train(Y_train, X_train, '-t 1')
label, acc, val = svm_predict(Y_test, X_test, model)

Accuracy = 34.68% (867/2500) (classification)


In [23]:
# RBF kernel
model = svm_train(Y_train, X_train, '-t 2')
label, acc, val = svm_predict(Y_test, X_test, model)

Accuracy = 95.32% (2383/2500) (classification)


In [17]:
# Linear kernel
C = [2**-3, 2**-1, 2**1]
G = [2**-1, 2**1, 2**3]

max_acc = 0.0
max_cost = 0.0
max_gamma = 0.0

for cost in C:
    for gamma in G:
        model = svm_train(Y_train, X_train, f'-t 0 -s 0 -c {cost} -g {gamma} -v 5')
        print(f'cost: {cost}, gamma: {gamma}, acc: {model}')
        if model > max_acc:
            max_acc = model
            max_cost = cost
            max_gamma = gamma

print(f'max_cost: {max_cost}, max_gamma: {max_gamma}, max_acc: {max_acc}')
model = svm_train(Y_train, X_train, f'-t 0 -s 0 -c {max_cost} -g {max_gamma}')
label, acc, val = svm_predict(Y_test, X_test, model)

Cross Validation Accuracy = 97%
cost: 0.125, gamma: 0.5, acc: 97.0
Cross Validation Accuracy = 96.94%
cost: 0.125, gamma: 2, acc: 96.94
Cross Validation Accuracy = 96.82%
cost: 0.125, gamma: 8, acc: 96.82
Cross Validation Accuracy = 96.24%
cost: 0.5, gamma: 0.5, acc: 96.24000000000001
Cross Validation Accuracy = 96.38%
cost: 0.5, gamma: 2, acc: 96.38
Cross Validation Accuracy = 96.12%
cost: 0.5, gamma: 8, acc: 96.12
Cross Validation Accuracy = 96.28%
cost: 2, gamma: 0.5, acc: 96.28
Cross Validation Accuracy = 96.06%
cost: 2, gamma: 2, acc: 96.06
Cross Validation Accuracy = 96.32%
cost: 2, gamma: 8, acc: 96.32
max_cost: 0.125, max_gamma: 0.5, max_acc: 97.0
Accuracy = 95.92% (2398/2500) (classification)


In [18]:
# RBF kernel
C = [2**-3, 2**-1, 2**1]
G = [2**-1, 2**1, 2**3] 

max_acc = 0.0
max_cost = 0.0
max_gamma = 0.0

for cost in C:
    for gamma in G:
        model = svm_train(Y_train, X_train, f'-t 2 -s 0 -c {cost} -g {gamma} -v 5')
        print(f'cost: {cost}, gamma: {gamma}, acc: {model}')
        if model > max_acc:
            max_acc = model
            max_cost = cost
            max_gamma = gamma

print(f'max_cost: {max_cost}, max_gamma: {max_gamma}, max_acc: {max_acc}')
model = svm_train(Y_train, X_train, f'-t 2 -s 0 -c {max_cost} -g {max_gamma}')
label, acc, val = svm_predict(Y_test, X_test, model)

Cross Validation Accuracy = 22.4%
cost: 0.125, gamma: 0.5, acc: 22.400000000000002
Cross Validation Accuracy = 20.34%
cost: 0.125, gamma: 2, acc: 20.34
Cross Validation Accuracy = 78.96%
cost: 0.125, gamma: 8, acc: 78.96
Cross Validation Accuracy = 26.34%
cost: 0.5, gamma: 0.5, acc: 26.340000000000003
Cross Validation Accuracy = 20.38%
cost: 0.5, gamma: 2, acc: 20.380000000000003
Cross Validation Accuracy = 78.96%
cost: 0.5, gamma: 8, acc: 78.96
Cross Validation Accuracy = 46.12%
cost: 2, gamma: 0.5, acc: 46.12
Cross Validation Accuracy = 25.28%
cost: 2, gamma: 2, acc: 25.28
Cross Validation Accuracy = 20.92%
cost: 2, gamma: 8, acc: 20.919999999999998
max_cost: 0.125, max_gamma: 8, max_acc: 78.96
Accuracy = 78.64% (1966/2500) (classification)
