In [2]:
import numpy as np
from matplotlib import pyplot as plt
import libsvm.python.svm as svm
import libsvm.python.svmutil as svmutil
import pandas as pd

In [3]:
X_train = np.genfromtxt('X_train.csv', delimiter=',') #5000*784
Y_train = np.genfromtxt('Y_train.csv', delimiter=',') #5000
X_test = np.genfromtxt('X_test.csv', delimiter=',') #2500*784
Y_test = np.genfromtxt('Y_test.csv', delimiter=',') #2500

In [4]:
def sparse_matrix(x, iskernel=False):
    row, col = x.shape
    idx_offset = 1
    if (iskernel):
        x = np.hstack((np.linspace(1,row,row).reshape(-1, 1), x))
        idx_offset = 0
        
    x = [{idx+idx_offset:x[i][idx] \
        for _,idx in np.ndenumerate(np.argwhere(x[i]!=0))} \
        for i in range(row)]
    return x

In [5]:
x_train = sparse_matrix(X_train)
y_train = list(Y_train)

x_test = sparse_matrix(X_test)
y_test = list(Y_test)

In [57]:
prob = svmutil.svm_problem(y_train, x_train)

In [6]:
def GridSearchForSVM(kernel, parameter_matrix, problem, n_ford=10):
    opts = list(parameter_matrix.keys())
    opts_max = np.array([len(parameter_matrix[opts[i]]) for i in range(len(opts))])

    current_opt = np.array([0 for i in range(len(opts))])
    results = [];
    
    optstr_init = '-t {:d} -v {:d} '.format(int(kernel),int(n_ford))
    
    overflow = False
    while(True):
        print(np.count_nonzero(current_opt >= opts_max))
        while (np.count_nonzero(current_opt >= opts_max)):
            reset_indicator = np.argwhere(current_opt >= opts_max)
            current_opt[reset_indicator[-1]] -= opts_max[reset_indicator[-1]] 
            if (reset_indicator[-1] == 0):
                overflow = True
                break;
            current_opt[reset_indicator[-1]-1] += 1
        
        if (overflow):
            break
        
        # gen option string
        optstr = optstr_init
        result = []
        for idx,para in enumerate(current_opt):
            optkey = opts[idx]
            optstr += '-' + str(optkey) + ' ' + str(parameter_matrix[optkey][para]) + ' '
            result.append(parameter_matrix[optkey][para])
        
        # get cross-validation result
        result.append(optstr)
        result.append(svmutil.svm_train(problem, optstr))
        results.append(result)
        
        # try next options
        current_opt[-1] += 1
    
    opts += ['opt str', 'result']
    return results, opts

In [None]:
%%time
linear_results, linear_options = GridSearchForSVM(0, {'c' : [10**-5,10**-2,1,10**2,10**5]}, prob)
poly_results, poly_options = GridSearchForSVM(1, {'c' : [10**-2,1,10**2],'g' : [1/100,1/300,1/784], 'r' : [0,1],'d' : [2,3,4,10]}, prob)
rbf_results, rbf_options = GridSearchForSVM(2, {'c' : [10**-5,10**-2,1,10**2,10**5],'g' : [1,1/50,1/100,1/300,1/784]}, prob)

0


In [7]:
def dot_square(x):
    return np.dot(x, x)

def linear_RBF_kernel(u, v, gamma=0.01):
    design_x = u.dot(v.T)
    rbf_design_x = [[dot_square(u_i-v_i)*(-gamma) for v_i in v] for u_i in u]
    rbf_design_x = np.exp(rbf_design_x)
    #design_x = design_x + rbf_design_x
    design_x = rbf_design_x
    return design_x

def linear_RBF_kernel_2(u, v, gamma=0.01):
    design_x = np.matmul(u, v.T)
    rbf_design_x =  np.sum(u**2, axis=1)[:,None] \
                    + np.sum(v**2, axis=1)[None,:] \
                    - 2*design_x
    rbf_design_x = np.abs(rbf_design_x) * -gamma
    rbf_design_x = np.exp(rbf_design_x)
    design_x = design_x + rbf_design_x
    #design_x = rbf_design_x
    return design_x