## Problem 1

We need to learn the values in the vector $w$ and also the bias term $b$. But $w$ is a size $d$ vector. So choose D.

In [1]:
import pandas as pd
import numpy as np

train = pd.read_table("http://www.amlbook.com/data/zip/features.train", delim_whitespace = True, header = None)
test = pd.read_table("http://www.amlbook.com/data/zip/features.test", delim_whitespace = True, header = None)


In [4]:
col_names = ['digit', 'intensity', 'symmetry']
train.columns = col_names
test.columns = col_names


In [10]:
Y_train = train['digit']
X_train = train.drop(['digit'], axis = 1)
Y_test = test['digit']
X_test = test.drop(['digit'], axis = 1)


Unnamed: 0,intensity,symmetry
0,0.341092,-4.528937
1,0.444131,-5.496812
2,0.231002,-2.88675
3,0.200275,-3.534375
4,0.291936,-4.352062


In [12]:
from sklearn import svm
poly_svm = svm.SVC(kernel = 'poly', degree=2, coef0=1)
poly_svm.fit(X_train, Y_train)

SVC(coef0=1, degree=2, kernel='poly')

## Problem 2, 3

In [60]:
## Calculate E_in

def E_in(guess, Y):
    return np.mean(guess != Y)

# Performs one v all testing with some parameters
# Set n to +1, everything else to -1
def one_v_all(X, Y, n):
    Y = np.where(Y == n, 1, -1)
    poly_svm = svm.SVC(kernel = 'poly', degree=2, coef0=1, C = 0.01, gamma = 1)
    poly_svm.fit(X, Y)
    guess = poly_svm.predict(X)
    return E_in(guess, Y)
    
for i in range(10):
    z = one_v_all(X_train, Y_train, i)
    print("{} versus all: {}".format(i, z))
    
## A, A

0 versus all: 0.10588396653408312
1 versus all: 0.014401316691811822
2 versus all: 0.10026059525442327
3 versus all: 0.09024825126868742
4 versus all: 0.08942531888629818
5 versus all: 0.07625840076807022
6 versus all: 0.09107118365107666
7 versus all: 0.08846523110684405
8 versus all: 0.07433822520916199
9 versus all: 0.08832807570977919


## Problem 4

In [62]:
# Need to do 0 vs 1
def one_v_all_vectors(X, Y, n):
    Y = np.where(Y == n, 1, -1)
    poly_svm = svm.SVC(kernel = 'poly', degree=2, coef0=1, C = 0.01, gamma = 1)
    poly_svm.fit(X, Y)
    return len(poly_svm.support_)

print(one_v_all_vectors(X_train, Y_train, 1)-one_v_all_vectors(X_train, Y_train, 0))
# C

-1793


## Problem 5

In [63]:
# one v other method
def one_v_other(train, a, b, my_C, q):
    temp = train[(train['digit'] == a) | (train['digit'] == b) ]
    Y = temp['digit']
    X = temp.drop(['digit'], axis = 1)
    Y = np.where(Y == a, 1, -1)
    poly_svm = svm.SVC(kernel = 'poly', degree=q, coef0=1, C = my_C, gamma = 1)
    poly_svm.fit(X, Y)
    return (poly_svm, X, Y)

In [64]:
Cs = [0.001, 0.01, 0.1, 1]
errors = []
for c in Cs:
    model, X, Y = one_v_other(train, 5, 1, c,2)
    print("c = " + str(c))
    print("Support vectors: {}".format(len(model.support_)))
    guess = model.predict(X)
    ein = E_in(guess, Y)
    errors.append(ein)
    print("Error: {}".format(ein))

errors[2] == errors[3]

# D , didn't need to test option C

c = 0.001
Support vectors: 76
Error: 0.004484304932735426
c = 0.01
Support vectors: 34
Error: 0.004484304932735426
c = 0.1
Support vectors: 24
Error: 0.004484304932735426
c = 1
Support vectors: 24
Error: 0.0032030749519538757


False

In [69]:
## Easiest way to do this to try the answers till we get a hit.

#A: C = 0.0001, compute E_in
model2, X, Y = one_v_other(train, 1, 5, 0.0001, 2)
model5, _, _ = one_v_other(train, 1, 5, 0.0001, 5)
guess2 = model2.predict(X)
guess5 = model5.predict(X)
print(E_in(guess2, Y) - E_in(guess5, Y)) ## FALSE

# B: C = 0.001, compute number of vectors
model2, X, Y = one_v_other(train, 1, 5, 0.001, 2)
model5, _, _ = one_v_other(train, 1, 5, 0.001, 5)
print(len(model2.support_) - len(model5.support_)) ## TRUE

# C: C = 0.01, compute E_in
model2, X, Y = one_v_other(train, 1, 5, 0.01, 2)
model5, _, _ = one_v_other(train, 1, 5, 0.01, 5)
guess2 = model2.predict(X)
guess5 = model5.predict(X)
print(E_in(guess2, Y) - E_in(guess5, Y)) ## FALSE

## too lazy to do D, whatever. Choose B

0.004484304932735426
51
0.0006406149903907749


## Problem 7

In [100]:
## Want to select C. 
from sklearn.model_selection import KFold


def ovo_cv(train, a, b, my_C, q):
    temp = train[(train['digit'] == a) | (train['digit'] == b) ]
    Y = temp['digit']
    X = temp.drop(['digit'], axis = 1)
    y = np.where(Y == a, 1, -1)
    X = np.array(X)
    kf = KFold(10, shuffle = True)
    
    err = 0
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
    
        poly_svm = svm.SVC(kernel = 'poly', degree=q, coef0=1, C = my_C, gamma = 1)
        poly_svm.fit(X_train, y_train)
        guess = poly_svm.predict(X_test)
        ein = E_in(guess, y_test)
        err += ein
    return err/10
    
## Run based on C
d = [] 
for i in range(100):
    c = {}
    c[4] = ovo_cv(train, 1, 5, 0.0001, 2)
    c[3] =ovo_cv(train, 1, 5, 0.001, 2)
    c[2] = ovo_cv(train, 1, 5, 0.01, 2)
    c[1] = ovo_cv(train, 1, 5, 0.1, 2)
    c[0] = ovo_cv(train, 1, 5, 1, 2)
    biggest = min(c, key = c.get)
    d.append(biggest)
    
max(set(d), key = d.count)
## C

2

## Problem 8

In [102]:
d = []
for i in range(100):
    x = ovo_cv(train, 1, 5, 0.01, 2)
    d.append(x)
np.mean(d)
## C

0.004695778213294136

## Problem 9

In [107]:
# pasting this method again so I have the reference
def one_v_other_rbf(train, a, b, my_C):
    temp = train[(train['digit'] == a) | (train['digit'] == b) ]
    Y = temp['digit']
    X = temp.drop(['digit'], axis = 1)
    Y = np.where(Y == a, 1, -1)
    poly_svm = svm.SVC(kernel = 'rbf', C = my_C, gamma = 1)
    poly_svm.fit(X, Y)
    return (poly_svm, X, Y)

C = [0.01, 1, 100, 10**4, 10 ** 6]

for c in C:
    print("C:" + str(c))
    z, X, Y = one_v_other_rbf(train, 1, 5, c)
    guess = z.predict(X)
    print(E_in(guess, Y))

# E 

C:0.01
0.003843689942344651
C:1
0.004484304932735426
C:100
0.0032030749519538757
C:10000
0.0025624599615631004
C:1000000
0.0006406149903907751


## Problem 10

In [108]:
# now i FINALLY need to construct the test dataset. Thankfully its not that hard
a, b = 1, 5
temp = test[(test['digit'] == a) | (test['digit'] == b) ]
Y_test = temp['digit']
X_test = temp.drop(['digit'], axis = 1)
Y_test = np.where(Y_test == a, 1, -1)

for c in C:
    print("C:" + str(c))
    z, X, Y = one_v_other_rbf(train, 1, 5, c)
    guess = z.predict(X_test)
    print(E_in(guess, Y_test))
    
# C (C = 100) better for out of sample

C:0.01
0.02358490566037736
C:1
0.02122641509433962
C:100
0.018867924528301886
C:10000
0.02358490566037736
C:1000000
0.02358490566037736
