In [1]:
import pandas as pd
import numpy as np
import libsvm.svmutil as svm_util
import tqdm as tqdm

In [2]:
train_file = "satimage.scale"
test_file = "satimage.scale.t"
y_train, x_train = svm_util.svm_read_problem(train_file)
y_test, x_test = svm_util.svm_read_problem(test_file)

In [3]:
def preprocessing(y, target):
        y_tmp = np.copy(np.array(y))
        y_tmp[y_tmp != target] = 0
        y_tmp[y_tmp == target] = 1
        return y_tmp.tolist()

## Problem 11

In [4]:
def Problem_11(x_train, y_train):
        y_5 = preprocessing(y_train, 5)
        x_5 = x_train

        m = svm_util.svm_train(y_5, x_5, '-s 0 -c 10 -t 0')

        sv_coef = m.get_sv_coef()
        sv_list  = m.get_SV()

        w = np.zeros(36)
        for i in range(len(sv_list)):
                tmp = np.zeros(36)
                for key, val in sv_list[i].items():
                        tmp[key-1] = val
                w += tmp * sv_coef[i][0]

        return sv_coef, sv_list, w

sv_coef, sv_list, w = Problem_11(x_train, y_train)

In [5]:
np.linalg.norm(w)

4.646266066206163

## Problem 12

In [6]:
def Problem_12(x_train, y_train):
        record = []
        for i in [2, 3, 4, 5, 6]:
                y = preprocessing(y_train, i)
                x = x_train
                
                type_param = '-s 0 -c 10 ' # C-SVC, C=10
                kernel_param = '-t 1 -g 1 -r 1 -d 3' # polynomial, gamma=1, coef0=1, degree=3
                m = svm_util.svm_train(y, x, type_param + kernel_param)
                p_label, p_acc, p_val = svm_util.svm_predict(y, x, m)
                e_in = 100 - p_acc[0]
                record.append((i, p_acc, e_in))
        return record


In [7]:
record = Problem_12(x_train, y_train)

Accuracy = 100% (4435/4435) (classification)
Accuracy = 99.7294% (4423/4435) (classification)
Accuracy = 99.1657% (4398/4435) (classification)
Accuracy = 100% (4435/4435) (classification)
Accuracy = 99.7069% (4422/4435) (classification)


In [8]:
max_e_in = record[0]
for i in record:
        if (max_e_in[2] < i[2]):
                max_e_in = i
        print(i)
print("Largest Ein: ", max_e_in)

(2, (100.0, 0.0, 1.0), 0.0)
(3, (99.72942502818489, 0.002705749718151071, 0.9841734986966887), 0.27057497181510826)
(4, (99.16572717023675, 0.00834272829763247, 0.9039585433862588), 0.8342728297632505)
(5, (100.0, 0.0, 1.0), 0.0)
(6, (99.70687711386697, 0.002931228861330327, 0.9836984374076272), 0.29312288613303394)
Largest Ein:  (4, (99.16572717023675, 0.00834272829763247, 0.9039585433862588), 0.8342728297632505)


## Problem 13

In [9]:
def Problem_13(x_train, y_train):
        record = []
        for i in [2, 3, 4, 5, 6]:
                y = preprocessing(y_train, i)
                x = x_train
                
                type_param = '-s 0 -c 10 ' # C-SVC, C=10
                kernel_param = '-t 1 -g 1 -r 1 -d 3' # polynomial, gamma=1, coef0=1, degree=3
                m = svm_util.svm_train(y, x, type_param + kernel_param)

                record.append(m.get_nr_sv())
        return record


In [10]:
num_sv = Problem_13(x_train, y_train)
print(num_sv)

[93, 385, 660, 281, 607]


## Problem 14

In [11]:
def Problem_14(x_train, y_train, x_test, y_test):
        record = []
        y = preprocessing(y_train, 1)
        x = x_train
        y_test_1 = preprocessing(y_test, 1)
        x_test_1 = x_test
                
        for C in [0.01, 0.1, 1, 10, 100]:
                print("C = ", C)

                type_param = '-s 0 -c ' + str(C) + ' ' # C-SVC, C=C
                kernel_param = '-t 2 -g 10' # radial basis, gamma=10
                m = svm_util.svm_train(y, x, type_param + kernel_param)
                p_label, p_acc, p_val = svm_util.svm_predict(y_test_1, x_test_1, m)

                record.append((C, p_acc))
        return record


In [12]:
record = Problem_14(x_train, y_train, x_test, y_test)

C =  0.01
Accuracy = 76.95% (1539/2000) (classification)
C =  0.1
Accuracy = 79.25% (1585/2000) (classification)
C =  1
Accuracy = 89.05% (1781/2000) (classification)
C =  10
Accuracy = 89.95% (1799/2000) (classification)
C =  100
Accuracy = 89.95% (1799/2000) (classification)


## Problem 15

In [13]:
def Problem_15(x_train, y_train, x_test, y_test):
        record = []
        y = preprocessing(y_train, 1)
        x = x_train
        y_test_1 = preprocessing(y_test, 1)
        x_test_1 = x_test
                
        for Y in [0.1, 1, 10, 100, 1000]:
                print("Y = ", Y)

                type_param = '-s 0 -c 0.1 ' # C-SVC, C=0.1
                kernel_param = '-t 2 -g ' + str(Y) # radial basis, gamma=Y
                m = svm_util.svm_train(y, x, type_param + kernel_param)
                p_label, p_acc, p_val = svm_util.svm_predict(y_test_1, x_test_1, m)

                record.append((Y, p_acc))
        return record


In [14]:
record = Problem_15(x_train, y_train, x_test, y_test)

Y =  0.1
Accuracy = 98.75% (1975/2000) (classification)
Y =  1
Accuracy = 98.8% (1976/2000) (classification)
Y =  10
Accuracy = 79.25% (1585/2000) (classification)
Y =  100
Accuracy = 76.95% (1539/2000) (classification)
Y =  1000
Accuracy = 76.95% (1539/2000) (classification)


## Problem 16

In [15]:
def Problem_16(x_train, y_train):
        y_train = np.array(preprocessing(y_train, 1))
        x_train = np.array(x_train)

        n_spl = len(x_train)
        sample_range = np.arange(n_spl, dtype=int)

        choose_time = {
                0.1 : 0,
                1 : 0,
                10 : 0,
                100 : 0,
                1000 : 0,
        }

        for i in tqdm.tqdm(range(1000)):
                valid_idx = np.random.choice(sample_range, replace=False, size=200)
                
                idx_table = np.zeros(n_spl)
                idx_table[valid_idx] = 1

                x_val = x_train[valid_idx]
                y_val = y_train[valid_idx]
                x = x_train[idx_table == 0]
                y = y_train[idx_table == 0]

                cur_best = None
                for Y in [0.1, 1, 10, 100, 1000]:
                        type_param = '-s 0 -c 0.1 ' # quiet, C-SVC, C=0.1
                        kernel_param = '-t 2 -g ' + str(Y) # radial basis, gamma=Y

                        m = svm_util.svm_train(y, x, type_param + kernel_param)
                        _, p_acc, _= svm_util.svm_predict(y_val, x_val, m)

                        if (cur_best == None) or (cur_best[1] < p_acc[0]):
                                cur_best = (Y, p_acc[0])
                
                choose_time[cur_best[0]] += 1

        return choose_time

In [16]:
# choose_time = Problem_16(x_train, y_train)