In [12]:
import os
import numpy as np
import csv
import matplotlib.pyplot as plt
from libsvm.svmutil import *

# SVM Task-1

## MNIST Reading

X_train.csv is a $5000 \times 784 $matrix. Every row corresponds to a $28 \times 28 $gray-scale image. <br>
Y_train.csv is a $5000 \times 1 $matrix, which records the class of the training samples.<br>

In [13]:
# for each row in X_train.csv is an image
def load_X_data(file): 
    X=[]
    with open(file, 'r') as f:
        reader = csv.reader(f)
        for row in reader:
            X.append(np.array(row))
        X = np.asarray(X, dtype=np.float32)
    return X

# 5000 class category
def load_y_data(file):
    y=[]
    with open(file, 'r') as f:
        reader = csv.reader(f)
        for row in reader:
            y.append(row[0])
        y = np.asarray(y, dtype=int)
    return y

In [14]:
train_X = load_X_data(r'data\ML_HW05\data\X_train.csv')
train_y = load_y_data(r'data\ML_HW05\data\y_train.csv')
test_X = load_X_data(r'data\ML_HW05\data\X_test.csv')
test_y = load_y_data(r'data\ML_HW05\data\y_test.csv')

## API svm_train param

-s svm_type : set type of SVM (default 0)<br>
&emsp;0 -- C-SVC (multi-class classification)<br>
-t kernel_type : set type of kernel function (default 2) <br>
&emsp;0 -- linear: u'*v<br>
&emsp;1 -- polynomial: (gamma*u'*v + coef0)^degree<br>
&emsp;2 -- radial basis function: exp(-gamma*|u-v|^2)<br>
-c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)<br>
-g gamma : set gamma in kernel function (default 1/num_features)<br>
-v n: n-fold cross validation mode <br>
-q : quiet mode (no outputs)<br>

### RBF kernel

$K(x,x') = exp(-\gamma ||x-x'||_{2}^{2})$

### Grid Search

In [15]:
def grid_search_RBF(train_X, train_y):
    C = 0
    g = 0
    best_acc = 0
    RBF_kernel = '-t 2 '
    svm_type = '-s 0 '
    cross_validation = '-v 5 '
    for g in [0.001,0.01,0.1,1,10,100]:
        for C in [0.001,0.01,0.1,1,10,100]:
            cost = f'-c {C} '
            gamma = f'-g {g} '
            param = svm_type + RBF_kernel + cost + gamma + cross_validation + '-q'
            print('----------------------------------------------------------')
            print(f'Try the param with C = {C} , g = {g} ')
            acc = svm_train(train_y, train_X, param)     
            print('----------------------------------------------------------')
            
            if acc > best_acc:
                best_acc = acc
                best_C, best_g = C, g
    
    return best_C, best_g

In [16]:
def grid_search_polynomial(train_X, train_y):
    coe = 0
    d = 0
    g = 0
    best_acc = 0
    svm_type = '-s 0 '
    polynomial_kernel = '-t 1 '
    cross_validation = '-v 5 '
    for C in [0.001,0.01,0.1,1,10,100]:
        for g in [0.001,0.01,0.1,1,10,100]:
            for d in [0,1,2,3,4,5]:
                for coe in [0,1,2,3,4,5]:
                    cost = f'-c {C} '
                    gamma = f'-g {g} '
                    degree = f'-d {d} '
                    coef = f'-r {coe} '
                    param = svm_type + polynomial_kernel + cost + gamma + degree + coef + cross_validation + '-q'
                    
                    print('----------------------------------------------------------')
                    print(f'Try the param with C = {C} , g = {g} , degree = {d} , coef = {coe}')
                    acc = svm_train(train_y, train_X, param)     
                    print('----------------------------------------------------------')

                    if acc > best_acc:
                        best_acc = acc
                        best_C, best_g, best_d, best_coe = C, g, d, coe
    
    return  best_C, best_g, best_d, best_coe

In [17]:
def grid_search_linear(train_X, train_y):
    best_acc = 0
    svm_type = '-s 0 '
    polynomial_kernel = '-t 0 '
    cross_validation = '-v 5 '
    for C in [0.001,0.01,0.1,1,10,100]:
        cost = f'-c {C} '
        param = svm_type + polynomial_kernel + cost + cross_validation + '-q'
        
        print('----------------------------------------------------------')
        print(f'Try the param with C = {C}  ')
        acc = svm_train(train_y, train_X, param)     
        print('----------------------------------------------------------')

        if acc > best_acc:
            best_acc = acc
            best_C = C
    
    return best_C

In [18]:
linear_best_C = grid_search_linear(train_X, train_y)

----------------------------------------------------------
Try the param with C = 0.001  
Cross Validation Accuracy = 95.52%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01  
Cross Validation Accuracy = 96.9%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.1  
Cross Validation Accuracy = 96.86%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 1  
Cross Validation Accuracy = 96.26%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 10  
Cross Validation Accuracy = 96.44%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 100  
Cro

In [None]:
poly_best_C, poly_best_g, poly_best_d, poly_best_coe = grid_search_polynomial(train_X, train_y)

----------------------------------------------------------
Try the param with C = 0.001 , g = 0.001 , degree = 0 , coef = 0
Cross Validation Accuracy = 20%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 0.001 , degree = 0 , coef = 1
Cross Validation Accuracy = 20%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 0.001 , degree = 0 , coef = 2
Cross Validation Accuracy = 20%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 0.001 , degree = 0 , coef = 3
Cross Validation Accuracy = 20%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 0.001 , degree = 0 , coef = 4
Cross Validation

Cross Validation Accuracy = 20%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 0.01 , degree = 0 , coef = 3
Cross Validation Accuracy = 20%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 0.01 , degree = 0 , coef = 4
Cross Validation Accuracy = 20%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 0.01 , degree = 0 , coef = 5
Cross Validation Accuracy = 20%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 0.01 , degree = 1 , coef = 0
Cross Validation Accuracy = 79.32%
----------------------------------------------------------
--------------------------------------------------

Cross Validation Accuracy = 20%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 0.1 , degree = 0 , coef = 5
Cross Validation Accuracy = 20%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 0.1 , degree = 1 , coef = 0
Cross Validation Accuracy = 89.52%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 0.1 , degree = 1 , coef = 1
Cross Validation Accuracy = 89.4%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 0.1 , degree = 1 , coef = 2
Cross Validation Accuracy = 89.34%
----------------------------------------------------------
-------------------------------------------------

Cross Validation Accuracy = 95.32%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 1 , degree = 1 , coef = 2
Cross Validation Accuracy = 95.34%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 1 , degree = 1 , coef = 3
Cross Validation Accuracy = 95.42%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 1 , degree = 1 , coef = 4
Cross Validation Accuracy = 95.54%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 1 , degree = 1 , coef = 5
Cross Validation Accuracy = 95.52%
----------------------------------------------------------
--------------------------------------------------

Cross Validation Accuracy = 97.1%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 10 , degree = 1 , coef = 5
Cross Validation Accuracy = 97.06%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 10 , degree = 2 , coef = 0
Cross Validation Accuracy = 98.08%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 10 , degree = 2 , coef = 1
Cross Validation Accuracy = 98.06%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 10 , degree = 2 , coef = 2
Cross Validation Accuracy = 97.94%
----------------------------------------------------------
-----------------------------------------------

Cross Validation Accuracy = 98.2%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 100 , degree = 2 , coef = 2
Cross Validation Accuracy = 97.96%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 100 , degree = 2 , coef = 3
Cross Validation Accuracy = 98%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 100 , degree = 2 , coef = 4
Cross Validation Accuracy = 98.04%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.001 , g = 100 , degree = 2 , coef = 5
Cross Validation Accuracy = 98.04%
----------------------------------------------------------
----------------------------------------------

Cross Validation Accuracy = 85.78%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 0.001 , degree = 2 , coef = 4
Cross Validation Accuracy = 88.08%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 0.001 , degree = 2 , coef = 5
Cross Validation Accuracy = 89.44%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 0.001 , degree = 3 , coef = 0
Cross Validation Accuracy = 28.38%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 0.001 , degree = 3 , coef = 1
Cross Validation Accuracy = 77.22%
----------------------------------------------------------
--------------------------------------

Cross Validation Accuracy = 95.52%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 0.01 , degree = 3 , coef = 0
Cross Validation Accuracy = 62.96%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 0.01 , degree = 3 , coef = 1
Cross Validation Accuracy = 94.82%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 0.01 , degree = 3 , coef = 2
Cross Validation Accuracy = 96.22%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 0.01 , degree = 3 , coef = 3
Cross Validation Accuracy = 96.66%
----------------------------------------------------------
------------------------------------------

Cross Validation Accuracy = 98%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 0.1 , degree = 3 , coef = 3
Cross Validation Accuracy = 97.92%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 0.1 , degree = 3 , coef = 4
Cross Validation Accuracy = 98.12%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 0.1 , degree = 3 , coef = 5
Cross Validation Accuracy = 98.18%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 0.1 , degree = 4 , coef = 0
Cross Validation Accuracy = 96.62%
----------------------------------------------------------
-------------------------------------------------

Cross Validation Accuracy = 97.96%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 1 , degree = 4 , coef = 0
Cross Validation Accuracy = 96.54%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 1 , degree = 4 , coef = 1
Cross Validation Accuracy = 96.78%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 1 , degree = 4 , coef = 2
Cross Validation Accuracy = 97.14%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 1 , degree = 4 , coef = 3
Cross Validation Accuracy = 97.16%
----------------------------------------------------------
------------------------------------------------------

Cross Validation Accuracy = 96.66%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 10 , degree = 4 , coef = 3
Cross Validation Accuracy = 96.74%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 10 , degree = 4 , coef = 4
Cross Validation Accuracy = 96.58%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 10 , degree = 4 , coef = 5
Cross Validation Accuracy = 96.68%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 10 , degree = 5 , coef = 0
Cross Validation Accuracy = 94.98%
----------------------------------------------------------
--------------------------------------------------

Cross Validation Accuracy = 96.66%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 100 , degree = 5 , coef = 0
Cross Validation Accuracy = 94.92%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 100 , degree = 5 , coef = 1
Cross Validation Accuracy = 95.04%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 100 , degree = 5 , coef = 2
Cross Validation Accuracy = 94.98%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.01 , g = 100 , degree = 5 , coef = 3
Cross Validation Accuracy = 94.76%
----------------------------------------------------------
----------------------------------------------

Cross Validation Accuracy = 97.22%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.1 , g = 0.001 , degree = 5 , coef = 3
Cross Validation Accuracy = 97.1%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.1 , g = 0.001 , degree = 5 , coef = 4
Cross Validation Accuracy = 97.04%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.1 , g = 0.001 , degree = 5 , coef = 5
Cross Validation Accuracy = 96.84%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.1 , g = 0.01 , degree = 0 , coef = 0
Cross Validation Accuracy = 20%
----------------------------------------------------------
-----------------------------------------------

Cross Validation Accuracy = 97.62%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.1 , g = 0.1 , degree = 0 , coef = 0
Cross Validation Accuracy = 20%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.1 , g = 0.1 , degree = 0 , coef = 1
Cross Validation Accuracy = 20%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.1 , g = 0.1 , degree = 0 , coef = 2
Cross Validation Accuracy = 20%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.1 , g = 0.1 , degree = 0 , coef = 3
Cross Validation Accuracy = 20%
----------------------------------------------------------
----------------------------------------------------------
Try

Cross Validation Accuracy = 20%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.1 , g = 1 , degree = 0 , coef = 3
Cross Validation Accuracy = 20%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.1 , g = 1 , degree = 0 , coef = 4
Cross Validation Accuracy = 20%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.1 , g = 1 , degree = 0 , coef = 5
Cross Validation Accuracy = 20%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.1 , g = 1 , degree = 1 , coef = 0
Cross Validation Accuracy = 96.98%
----------------------------------------------------------
----------------------------------------------------------
Try the par

Cross Validation Accuracy = 20%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.1 , g = 10 , degree = 1 , coef = 0
Cross Validation Accuracy = 96%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.1 , g = 10 , degree = 1 , coef = 1
Cross Validation Accuracy = 96.3%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.1 , g = 10 , degree = 1 , coef = 2
Cross Validation Accuracy = 96.24%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.1 , g = 10 , degree = 1 , coef = 3
Cross Validation Accuracy = 96.2%
----------------------------------------------------------
----------------------------------------------------------
Try

Cross Validation Accuracy = 96.04%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.1 , g = 100 , degree = 1 , coef = 3
Cross Validation Accuracy = 96.2%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.1 , g = 100 , degree = 1 , coef = 4
Cross Validation Accuracy = 96.34%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.1 , g = 100 , degree = 1 , coef = 5
Cross Validation Accuracy = 96.18%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 0.1 , g = 100 , degree = 2 , coef = 0
Cross Validation Accuracy = 98%
----------------------------------------------------------
------------------------------------------------------

Cross Validation Accuracy = 95.46%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 1 , g = 0.001 , degree = 2 , coef = 0
Cross Validation Accuracy = 80.52%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 1 , g = 0.001 , degree = 2 , coef = 1
Cross Validation Accuracy = 96.08%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 1 , g = 0.001 , degree = 2 , coef = 2
Cross Validation Accuracy = 96.48%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 1 , g = 0.001 , degree = 2 , coef = 3
Cross Validation Accuracy = 96.86%
----------------------------------------------------------
--------------------------------------------------

Cross Validation Accuracy = 97.34%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 1 , g = 0.01 , degree = 2 , coef = 3
Cross Validation Accuracy = 97.36%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 1 , g = 0.01 , degree = 2 , coef = 4
Cross Validation Accuracy = 97.36%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 1 , g = 0.01 , degree = 2 , coef = 5
Cross Validation Accuracy = 97.32%
----------------------------------------------------------
----------------------------------------------------------
Try the param with C = 1 , g = 0.01 , degree = 3 , coef = 0


In [None]:
RBF_best_C, RBF_best_g = grid_search_RBF(train_X, train_y)

In [None]:
RBF_kernel = '-t 2 '
svm_type = '-s 0 '
cost = f'-c {RBF_best_C} '
gamma = f'-g {RBF_best_g} '
best_param = svm_type + RBF_kernel + cost + gamma  + '-q'
print(f' best param of C : {RBF_best_C} , gamma : {RBF_best_g} ')

In [None]:
model = svm_train(train_y, train_X, best_param)
predict_y, predict_acc, predict_val = svm_predict(test_y, test_X, model, '-q')
print(f'Predict test data, accuracy : {predict_acc[0]} %')