In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/四下/ML/HW/hw5

/content/drive/MyDrive/四下/ML/HW/hw5


In [3]:
! pip install libsvm

Collecting libsvm
  Downloading libsvm-3.23.0.4.tar.gz (170 kB)
[K     |████████████████████████████████| 170 kB 4.0 MB/s 
[?25hBuilding wheels for collected packages: libsvm
  Building wheel for libsvm (setup.py) ... [?25l[?25hdone
  Created wheel for libsvm: filename=libsvm-3.23.0.4-cp37-cp37m-linux_x86_64.whl size=233368 sha256=b889e1682e24851a055b451e9d58151b66ecd290ce852fe0de5cf7ccfb435a98
  Stored in directory: /root/.cache/pip/wheels/cd/e8/1e/bf95cf256e4d3ffc94289ab508c49d48e34c98220af63e3513
Successfully built libsvm
Installing collected packages: libsvm
Successfully installed libsvm-3.23.0.4


In [4]:
import sys
import time
import numpy as np
from libsvm.svmutil import *
from scipy.spatial.distance import cdist

## Read data

In [11]:
def read_data(dir, pathes):    
    images = np.loadtxt(dir + pathes[0], dtype=float, delimiter=',')
    labels = np.loadtxt(dir + pathes[1], dtype=int, delimiter=',')
    return images, labels

dir = "./data/"
train_pathes = ['X_train.csv', 'Y_train.csv']
test_pathes = ['X_train.csv', 'Y_train.csv'] 

X_train, Y_train = read_data(dir, train_pathes)
X_test, Y_test = read_data(dir, test_pathes)

## Task1

In [6]:
kernel = {'linear': 0, 'polynomial': 1, 'RBF': 2}

In [9]:
def svm(k):
    print(f'kernel_type : {k}')
    
    start = time.time()

    param = svm_parameter(f'-t {kernel[k]}')
    prob  = svm_problem(Y_train, X_train)
    model = svm_train(prob, param)
    _, p_acc, _ = svm_predict(Y_test, X_test, model)

    end = time.time()
    print("Time: %0.2f seconds." % (end - start))
    print()

if __name__ == '__main__':
    # Task 1
    svm('linear')
    svm('polynomial')
    svm('RBF')

kernel_type : linear
Accuracy = 100% (5000/5000) (classification)
Accuracy: (100.0, 0.0, 1.0)
Time: 7.21 seconds.

kernel_type : polynomial
Accuracy = 34.34% (1717/5000) (classification)
Accuracy: (34.339999999999996, 2.6558, 0.1500523847736396)
Time: 61.84 seconds.

kernel_type : RBF
Accuracy = 96.88% (4844/5000) (classification)
Accuracy: (96.88, 0.1164, 0.9430581837530781)
Time: 14.94 seconds.



## Task2

In [7]:
def grid_search_on_c(arg, max_acc):
    best_c = 1e-1
    
    for c in [1e-2, 1e-1, 1e0, 1e1, 1e2]:
        param = svm_parameter(arg.format(c))
        prob  = svm_problem(Y_train, X_train)
        p_acc = svm_train(prob, param)
        if p_acc > max_acc:
            max_acc = p_acc
            best_c = c
    return max_acc, best_c

In [8]:
def grid_search(k):

    fold = 5
    print(f'kernel_type : {k}')

    time_start = time.time()
    max_acc = 0.0
    
    if k == 'linear':
        arg = f'-t {kernel[k]} -c ' + '{} ' + f'-v {fold} -q'
        max_acc, best_c = grid_search_on_c(arg, max_acc)
        best_params = {'C': best_c}
    
    elif k == 'polynomial':
        for degree in range(1, 3):
            for gamma in [1e0, 1e1]:
                for coef0 in [1e0, 1e1]:
                    arg = f'-t {kernel[k]} -c ' + '{} ' + f'-g {gamma} -d {degree} -r {coef0} -v {fold} -q'
                    local_max_acc, best_c = grid_search_on_c(arg, max_acc)
                    if local_max_acc > max_acc:
                        max_acc = local_max_acc
                        best_params = {'degree':degree,'gamma':gamma,'coef0':coef0,'C':best_c}
    
    elif k == 'RBF':
        for gamma in [1e-3, 1e-2, 1e-1]:
            arg = f'-t {kernel[k]} -c ' + '{} ' + f'-g {gamma} -v {fold} -q'
            local_max_acc, best_c = grid_search_on_c(arg, max_acc)
            if local_max_acc > max_acc:
                max_acc = local_max_acc
                best_params = {'gamma':gamma,'C':best_c}
    
    time_end = time.time()
    
    print(f'Best acc : {max_acc}')
    print(f'Best Params : {best_params}')
    print("Time: %0.2f seconds." % (time_end-time_start))
    print()


if __name__ == '__main__':
    # Task 2
    print("\n----------------------------------------------\n")
    grid_search('linear')
    print("\n----------------------------------------------\n")
    grid_search('polynomial')
    print("\n----------------------------------------------\n")
    grid_search('RBF')
    print("\n----------------------------------------------\n")


----------------------------------------------

kernel_type : linear
Cross Validation Accuracy = 97.04%
Cross Validation Accuracy = 97.12%
Cross Validation Accuracy = 96.3%
Cross Validation Accuracy = 96.3%
Cross Validation Accuracy = 95.88%
Best acc : 97.11999999999999
Best Params : {'C': 0.1}
Time: 57.34 seconds.


----------------------------------------------

kernel_type : polynomial
Cross Validation Accuracy = 96.98%
Cross Validation Accuracy = 96.98%
Cross Validation Accuracy = 96.52%
Cross Validation Accuracy = 96.18%
Cross Validation Accuracy = 96.38%
Cross Validation Accuracy = 96.94%
Cross Validation Accuracy = 96.48%
Cross Validation Accuracy = 96.24%
Cross Validation Accuracy = 96.66%
Cross Validation Accuracy = 96.32%
Cross Validation Accuracy = 96.8%
Cross Validation Accuracy = 96.2%
Cross Validation Accuracy = 95.94%
Cross Validation Accuracy = 96.22%
Cross Validation Accuracy = 96.1%
Cross Validation Accuracy = 96.96%
Cross Validation Accuracy = 96.04%
Cross Validatio

## Task 3

In [22]:
def linear_kernel(xi, xj):
    return xi @ xj.T

def RBF_kernel(u, v, gamma):
    return np.exp(-gamma * cdist(u, v, 'sqeuclidean'))

def svm_combined_kernel():
    fold = 5
    max_acc = 0.0
    time_start = time.time()

    for gamma in [1e-3, 1e-2, 1e-1, 1e0, 1e1]:
        # Build a new kernal by combining linear and rbf kernal
        X_train_new = linear_kernel(X_train, X_train) + RBF_kernel(X_train, X_train, gamma)
        X_test_new = linear_kernel(X_test, X_test) + RBF_kernel(X_test, X_test, gamma)
        X_train_new = np.hstack((np.arange(1, len(X_train)+1).reshape(-1, 1), X_train_new))
        X_test_new = np.hstack((np.arange(1, len(X_test)+1).reshape(-1, 1), X_test_new))
        for c in [1e-2, 1e-1, 1e0, 1e1, 1e2]:
          # train the svm
            arg = f'-t 4 -c {c} -g {gamma} -v {fold} -q'
            param = svm_parameter(arg)
            prob  = svm_problem(Y_train, X_train_new, isKernel=True)
            p_acc = svm_train(prob, param)
            if p_acc > max_acc:
                max_acc = p_acc
                best_params = {'gamma':gamma,'C':c}
    
    time_end = time.time()
    print(f'Best acc : {max_acc}')
    print(f'Best Params : {best_params}')
    print("Time: %0.2f seconds." % (time_end-time_start))

if __name__ == '__main__':
    svm_combined_kernel()

Cross Validation Accuracy = 96.9%
Cross Validation Accuracy = 96.9%
Cross Validation Accuracy = 96.06%
Cross Validation Accuracy = 96.44%
Cross Validation Accuracy = 96.32%
Cross Validation Accuracy = 96.98%
Cross Validation Accuracy = 97.14%
Cross Validation Accuracy = 96.32%
Cross Validation Accuracy = 96.16%
Cross Validation Accuracy = 96.2%
Cross Validation Accuracy = 97.04%
Cross Validation Accuracy = 97%
Cross Validation Accuracy = 96.54%
Cross Validation Accuracy = 96.36%
Cross Validation Accuracy = 96.68%
Cross Validation Accuracy = 96.88%
Cross Validation Accuracy = 96.92%
Cross Validation Accuracy = 96.34%
Cross Validation Accuracy = 96.3%
Cross Validation Accuracy = 96.48%
Cross Validation Accuracy = 97.1%
Cross Validation Accuracy = 97%
Cross Validation Accuracy = 96.52%
Cross Validation Accuracy = 96.5%
Cross Validation Accuracy = 96.58%
Best acc : 97.14
Best Params : {'gamma': 0.01, 'C': 0.1}
Time: 652.47 seconds.
