In [19]:
import csv
import numpy as np
from svmutil import *

In [20]:
def read_input():
    train_x = []
    train_y = []
    test_x = []
    test_y = []
    
    with open('X_train.csv', newline='') as fp:
        rows = csv.reader(fp)
        for row in rows:
            train_x.append([float(i) for i in row])
    
    with open('Y_train.csv', newline='') as fp:
        rows = csv.reader(fp)
        for row in rows:
            train_y.append(int(row[0]))
        
    with open('X_test.csv', newline='') as fp:
        rows = csv.reader(fp)
        for row in rows:
            test_x.append([float(i) for i in row])
    
    with open('Y_test.csv', newline='') as fp:
        rows = csv.reader(fp)
        for row in rows:
            test_y.append(int(row[0]))
    
    return np.array(train_x), np.array(train_y), np.array(test_x), np.array(test_y) 

In [21]:
def svm_diff_kernel(train_x, train_y, test_x, test_y):
    
    print('Linear kernel function:')
    problem = svm_problem(train_y, train_x)
    parameter = svm_parameter('-q -t 0')
    model = svm_train(problem, parameter)
    svm_predict(test_y, test_x, model)
    
    print('Polynomial kernel function:')
    problem = svm_problem(train_y, train_x)
    parameter = svm_parameter('-q -t 1')
    model = svm_train(problem, parameter)
    svm_predict(test_y, test_x, model)
    
    print('RBF kernel function:')
    problem = svm_problem(train_y, train_x)
    parameter = svm_parameter('-q -t 2')
    model = svm_train(problem, parameter)
    svm_predict(test_y, test_x, model)

In [22]:
def fillin_acc_matrix(c, g, value, acc_matrix):
    row = int(np.log2(c))+4
    col = int(np.log2(g))+4
    acc_matrix[row][col] = value

In [23]:
def grid_search(train_x, train_y):
    C = [2**-4, 2**-3, 2**-2, 2**-1, 1, 2**1, 2**2, 2**3, 2**4]
    gamma = [2**-4, 2**-3, 2**-2, 2**-1, 1, 2**1, 2**2, 2**3, 2**4]
    
    acc_matrix = np.zeros((len(C),len(gamma)))
    best_cg = (0,0)
    best_acc = 0
    
    #C = [16]
    #gamma = [1/16]
    
    for c in C:
        for g in gamma:
            print('c=%f, g=%f' % (c,g))
            problem = svm_problem(train_y, train_x)
            parameter = svm_parameter('-q -s 0 -t 2 -v 3 -c {} -g {}'.format(c,g))
            acc = svm_train(problem, parameter)
            
            fillin_acc_matrix(c,g,acc,acc_matrix)
            if acc > best_acc:
                best_cg = (c,g)
                best_acc = acc
        print(acc_matrix)
    
    return best_cg, best_acc

In [18]:
if __name__ == "__main__":
    train_x, train_y, test_x, test_y = read_input()
    
    '''
    2-1. Use different kernel functions
    '''
    svm_diff_kernel(train_x, train_y, test_x, test_y)
    '''
    2-2. C-SVC, grid search
    '''
    best_cg, best_acc = grid_search(train_x, train_y)
    print(best_cg)
    print(best_acc)

Linear kernel function:
Accuracy = 95.08% (2377/2500) (classification)
Polynomial kernel function:
Accuracy = 34.68% (867/2500) (classification)
RBF kernel function:
Accuracy = 95.32% (2383/2500) (classification)
c=16.000000, g=0.062500
Cross Validation Accuracy = 97.9%
[[ 0.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [ 0.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [ 0.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [ 0.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [ 0.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [ 0.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [ 0.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [ 0.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [97.9  0.   0.   0.   0.   0.   0.   0.   0. ]]
(16, 0.0625)
97.89999999999999
