In [1]:
import numpy as np
from hw8_dataload import LFD_Data2
from sklearn import svm

# HW 8
## Primal vs Dual Problem

- The SVM **primal** problem minimizes 0.5w<sup>T</sup>w subject to y<sub>n</sub>(w<sup>T</sup>x<sub>n</sub>+b)&ge;1 for n = 1,2,...,N
- Since w is a d-dimensional vector (corresponding to the dimension of x) and we can also vary b, the primal problem involves a quadratic programming problem with **d+1 variables**.

## Polynomial Kernels

- Implementing polynimal kernels with a soft-margin SVM using the given data set of handwritten digits from the US Postal Service Zip Code data set with extracted features digit, intensity, and symmetry. 
- The polynomial kernel K(x<sub>n</sub>, x<sub>m</sub>) = (1+x<sub>n</sub><sup>T</sup>x<sub>m</sub>)<sup>Q</sup>
- Training **two** types of binary classifiers:
    - one-vs-one (one digit class is +1, another is -1, rest are ignored)
    - one-vs-all (one digit class is +1, everything else is -1)

In [5]:
hw8_train = "hw8_train.dta"
hw8_test = "hw8_test.dta"
hw8_C = 0.01
hw8_Q = 2
hw8_data = LFD_Data2(hw8_train, hw8_test)

my_svm = svm.SVC(C = 0.01, kernel = 'poly',degree = 2, coef0 = 1.0, gamma = 1.0)

In [5]:
alphas_odd = np.array([])
alphas_even = np.array([])

for cur_num in range(10):
    #cur_num-vs-all
    hw8_data.set_filter([cur_num])
    cur_X = hw8_data.get_X("train")
    cur_Y = hw8_data.get_Y("train")
    my_svm.fit(cur_X, cur_Y)
    cur_score = my_svm.score(cur_X, cur_Y)
    cur_numalphas = my_svm.n_support_
    cur_asum = np.array(cur_numalphas).sum()
    print("%d-vs-all binary classifier in-sample error: %f" % (cur_num, (1.0 - cur_score)))
    if cur_num % 2 == 0:
        alphas_even = np.concatenate((alphas_even, [cur_asum]))
    else:
        alphas_odd = np.concatenate((alphas_odd, [cur_asum]))
    
    
aodd_sum = np.sum(alphas_odd)
aeven_sum = np.sum(alphas_even)
a_diff = abs(aodd_sum - aeven_sum)
print("Diff in number of sv's between odd and even: %d" % a_diff)


0-vs-all binary classifier in-sample error: 0.105884
1-vs-all binary classifier in-sample error: 0.014401
2-vs-all binary classifier in-sample error: 0.100261
3-vs-all binary classifier in-sample error: 0.090248
4-vs-all binary classifier in-sample error: 0.089425
5-vs-all binary classifier in-sample error: 0.076258
6-vs-all binary classifier in-sample error: 0.091071
7-vs-all binary classifier in-sample error: 0.088465
8-vs-all binary classifier in-sample error: 0.074338
9-vs-all binary classifier in-sample error: 0.088328
Diff in number of sv's between odd and even: 2071


With C = 0.01, Q=2 and with a n-vs-all classifier, it turns out that 0 out of all the evens has the highest in-sample error and 1 out of all the odds has the lowest in-sample error.

In [6]:
#loading 1-vs-5 data

hw8_data.set_filter([1,5])
x_1v5_train = hw8_data.get_X("train")
y_1v5_train= hw8_data.get_Y("train")
x_1v5_test = hw8_data.get_X("test")
y_1v5_test= hw8_data.get_Y("test")

print(x_1v5_train.shape, y_1v5_train.shape, x_1v5_test.shape, y_1v5_test.shape)

(1561, 2) (1561,) (424, 2) (424,)


In [7]:
pk_Q = [2,5]
pk_C = [pow(10, -x) for x in reversed(range(5))]

for Q in pk_Q:
    my_svm.degree = Q
    print("~~~ For polynomial kernels of degree Q = %d ~~~" % Q)
    for C in pk_C:
        my_svm.C = C
        my_svm.fit(x_1v5_train, y_1v5_train)
        cur_ein = 1.0 - my_svm.score(x_1v5_train, y_1v5_train)
        cur_eout = 1.0 - my_svm.score(x_1v5_test, y_1v5_test)
        cur_numalphas = my_svm.n_support_
        cur_asum = np.array(cur_numalphas).sum()
        print("C = %f | E_in = %f, E_out = %f, num_sv = %d" % (C, cur_ein, cur_eout, cur_asum))
    print("")
        
        

~~~ For polynomial kernels of degree Q = 2 ~~~
C = 0.000100 | E_in = 0.008969, E_out = 0.016509, num_sv = 236
C = 0.001000 | E_in = 0.004484, E_out = 0.016509, num_sv = 76
C = 0.010000 | E_in = 0.004484, E_out = 0.018868, num_sv = 34
C = 0.100000 | E_in = 0.004484, E_out = 0.018868, num_sv = 24
C = 1.000000 | E_in = 0.003203, E_out = 0.018868, num_sv = 24

~~~ For polynomial kernels of degree Q = 5 ~~~
C = 0.000100 | E_in = 0.004484, E_out = 0.018868, num_sv = 26
C = 0.001000 | E_in = 0.004484, E_out = 0.021226, num_sv = 25
C = 0.010000 | E_in = 0.003844, E_out = 0.021226, num_sv = 23
C = 0.100000 | E_in = 0.003203, E_out = 0.018868, num_sv = 25
C = 1.000000 | E_in = 0.003203, E_out = 0.021226, num_sv = 21

