## DATA 558: Data Competition 2

### SVM Models (from HW7 Prompt)

Geoffrey Li



In [41]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
import datetime
import random
from timeit import default_timer as timer
import pickle
import scipy

### 1: Define Functions

In [74]:
def grad(alpha, K, y, lam):
    return -2*np.mean(np.maximum(0, 1-y*K.dot(alpha))*y*K, axis=1) + 2*lam*K.dot(alpha)

def obj(alpha, K, y, lam):
    return np.mean(np.maximum(0, 1-y*K.dot(alpha))**2) + lam*alpha.dot(K).dot(alpha)

def computegram(X, kernel, **kwargs):
    n = len(X)
    K = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            if 'power' in kwargs:
                K[i, j] = kernel(X[i], X[j], kwargs['power'])
            elif 'sigma' in kwargs:
                K[i, j] = kernel(X[i], X[j], kwargs['sigma'])
    
    return K


def bt_line_search(beta, K, y, lam, eta=1, alpha=0.5, betaparam=0.5, maxiter=10):
    grad_beta = grad(beta, K, y, lam)
    norm_grad_beta = np.linalg.norm(grad_beta)
    found_eta = 0
    iter = 0
    while found_eta == 0 and iter < maxiter:
        if obj(beta - eta * grad_beta, K, y, lam) < \
                        obj(beta, K, y, lam) - alpha * eta * norm_grad_beta ** 2:
            found_eta = 1
        elif iter == maxiter-1:
#             print('Max number of iterations of backtracking line search reached')
            break
        else:
            eta *= betaparam
            iter += 1
    return eta

def fastgradalgo(beta_init, theta_init, K, y, lam, eta_init, maxiter, eps=1e-5):
    beta = beta_init
    theta = theta_init
    eta = eta_init
    grad_theta = grad(theta, K, y, lam)
    grad_beta = grad(beta, K, y, lam)
    grad_beta_norm = np.linalg.norm(grad_beta)
    iter = 0
    beta_list = list()
    while iter < maxiter and grad_beta_norm > eps:
#         print('Fastgradalgo Iteration:', iter)
        
        eta = bt_line_search(theta, K, y, lam, eta=eta)
#         print('Eta:', eta)
        
        beta_new = theta - eta*grad_theta
        theta = beta_new + iter/(iter+3)*(beta_new-beta)
        
        grad_theta = grad(theta, K, y, lam)
        grad_beta = grad(beta, K, y, lam)        
        beta = beta_new.copy()
        
        iter += 1
        if iter % 1 == 0:
            beta_list.append(beta_new)
        
        grad_beta_norm = np.linalg.norm(grad_beta)
#         print('Norm of Gradient at Current Iteration:', grad_beta_norm)
#         print('Objective Value at Current Iteration:', obj(beta, K, y, lam))
        
    return beta_list

def initstepsize(K, lam):
    n = len(K)
    return 1 / scipy.linalg.eigh(2 / n * np.dot(K, K) + 2 * lam * K, eigvals=(n - 1, n - 1), eigvals_only=True)[0]


def gram_linear(X, Z=None):
    return X@Z.T

In [66]:
def misclassification_error(beta, X_train, X_test, y_test, kernel, **kwargs):
    n_test = len(y_test)
    y_pred = np.zeros(n_test)
    y_vals = np.zeros(n_test)
    for i in range(n_test):
        if 'sigma' in kwargs:
            sigma = kwargs['sigma']
            y_vals[i] = np.dot(kernel(X_train, X_test[i, :].reshape(1, -1), sigma).reshape(-1), beta)
        else:
            y_vals[i] = np.dot(kernel(X_train, X_test[i, :]).reshape(-1), beta)
    y_pred = np.sign(y_vals)
    return np.mean(y_pred != y_test), y_vals 

In [22]:
def mysvm(K, y, lam, eta_init, eps=1e-5):
    alpha_init = np.zeros(len(K))[:, np.newaxis]
    theta_init = np.zeros(len(K))[:, np.newaxis]
    
    opt_alpha = fastgradalgo(alpha_init, theta_init, K, y, lam, eta_init, maxiter=1000, eps=1e-3)    
    
    return opt_alpha

### 2: Loading and processing data

In [23]:
path = './data/'

In [24]:
X_train_unstd = np.load(path+'train_features.npy')
y_train = np.load(path+'train_labels.npy')

X_val_unstd = np.load(path+'val_features.npy')
y_val = np.load(path+'val_labels.npy')

X_test_unstd = np.load(path+'test_features.npy')

In [25]:
# Standardize the data
scaler = preprocessing.StandardScaler().fit(X_train_unstd)
X_train = scaler.transform(X_train_unstd)
X_val = scaler.transform(X_val_unstd)
X_test = scaler.transform(X_test_unstd)

In [38]:
# Subsets training and validation data based on input classes
def subset_data(pos_class, neg_class, X_train, y_train, X_val=None, y_val=None):
    train_subset = (np.isin(y_train, pos_class)) | (np.isin(y_train, neg_class))
    y_train_subset = y_train[np.where(train_subset)]
    X_train_subset = X_train[train_subset.nonzero()[0]]
    if type(pos_class) == list:
        y_train_subset = \
            np.fromiter(map(lambda n: 1 if n in pos_class else -1, y_train_subset), dtype=int).reshape(-1, 1)
    else:
        y_train_subset = \
            np.fromiter(map(lambda n: 1 if n == pos_class else -1, y_train_subset), dtype=int).reshape(-1, 1)
   
    if X_val is not None and y_val is not None:
        val_subset = (np.isin(y_val, pos_class)) | (np.isin(y_val, neg_class))
        y_val_subset = y_val[np.where(val_subset)]
        X_val_subset = X_val[val_subset.nonzero()[0]]
        if type(pos_class) == list:
            y_val_subset = \
                np.fromiter(map(lambda n: 1 if n in pos_class else -1, y_val_subset), dtype=int).reshape(-1, 1)
        else:
            y_val_subset = \
                np.fromiter(map(lambda n: 1 if n == pos_class else -1, y_val_subset), dtype=int).reshape(-1, 1)
        return X_train_subset, y_train_subset, X_val_subset, y_val_subset
    
    return X_train_subset, y_train_subset

### 3: sklearn.LinearSVC (one-vs-one)

In a one-vs-one fashion, for each pair of classes, train a linear SVM classifier using scikit- learn’s function LinearSVC, with the default value for the regularization parameter. Compute the multi-class misclassification error obtained using these classifiers trained in a one-vs-one fashion.

In [10]:
from sklearn.svm import LinearSVC

maxiter = 1000
target_accuracy = 1e-5
classes = 100
pairwise_linsvc_sk_ovo = dict()
# opt_reg_pairwise_linsvc_sk_ovo = dict()
t = 0

for i in range(classes):
    for j in range(i+1, classes):
        print('Training model '+str(t)+': ('+str(i)+','+str(j)+') ...')
        start = timer()
        X_train_subset, y_train_subset = subset_data(i, j, X_train, y_train)
        
        linsvc_sk_ovo = LinearSVC(fit_intercept=False, max_iter=maxiter, tol=target_accuracy)
        linsvc_sk_ovo.fit(X_train_subset, y_train_subset.squeeze())
        
        pairwise_linsvc_sk_ovo[(i, j)] = linsvc_sk_ovo
#         opt_reg_pairwise_linsvc_sk_ovo[(i, j)] = pairwise_linsvc_sk_ovo.C_[0]
        
        end = timer()
        print(end - start)
        t += 1
        

Training model 0: (0,1) ...
0.1550004569999146
Training model 1: (0,2) ...
0.161391889000015
Training model 2: (0,3) ...
0.13016138200009664
Training model 3: (0,4) ...
0.13331810799991217
Training model 4: (0,5) ...
0.14343435999990106
Training model 5: (0,6) ...
0.15298060099985378
Training model 6: (0,7) ...
0.13773807400002624
Training model 7: (0,8) ...
0.1286408250000477
Training model 8: (0,9) ...
0.10767872900009934
Training model 9: (0,10) ...
0.11759574300003806
Training model 10: (0,11) ...
0.10529112700010046
Training model 11: (0,12) ...
0.11072972600004505
Training model 12: (0,13) ...
0.11508835299991915
Training model 13: (0,14) ...
0.12418233000016698
Training model 14: (0,15) ...
0.12227917200016236
Training model 15: (0,16) ...
0.1018311159998575
Training model 16: (0,17) ...
0.12257742099995994
Training model 17: (0,18) ...
0.10624404300006063
Training model 18: (0,19) ...
0.10598082599994996
Training model 19: (0,20) ...
0.10610838000002332
Training model 20: (0,21

0.10229126400008681
Training model 166: (1,69) ...
0.1007619459999205
Training model 167: (1,70) ...
0.10779715499984377
Training model 168: (1,71) ...
0.13737610499993025
Training model 169: (1,72) ...
0.1023004230000879
Training model 170: (1,73) ...
0.13307532499993613
Training model 171: (1,74) ...
0.11766815500004668
Training model 172: (1,75) ...
0.1058038360001774
Training model 173: (1,76) ...
0.1225933669998085
Training model 174: (1,77) ...
0.12014681200002997
Training model 175: (1,78) ...
0.1439022639999621
Training model 176: (1,79) ...
0.15276964199983922
Training model 177: (1,80) ...
0.10776988599991455
Training model 178: (1,81) ...
0.10665013800007728
Training model 179: (1,82) ...
0.10923120200004632
Training model 180: (1,83) ...
0.09951229500006775
Training model 181: (1,84) ...
0.11703332699994462
Training model 182: (1,85) ...
0.106294926000146
Training model 183: (1,86) ...
0.11196037400009118
Training model 184: (1,87) ...
0.10289267899997867
Training model 185

0.11894899599997188
Training model 329: (3,39) ...
0.13655072899996412
Training model 330: (3,40) ...
0.11719465700002729
Training model 331: (3,41) ...
0.11138319699989552
Training model 332: (3,42) ...
0.12861114499992254
Training model 333: (3,43) ...
0.11426472300013302
Training model 334: (3,44) ...
0.11596247300008145
Training model 335: (3,45) ...
0.10722582099992906
Training model 336: (3,46) ...
0.12459819200012134
Training model 337: (3,47) ...
0.11896432599996842
Training model 338: (3,48) ...
0.1078471629998603
Training model 339: (3,49) ...
0.1175715740000669
Training model 340: (3,50) ...
0.1535329969999566
Training model 341: (3,51) ...
0.11855243599984533
Training model 342: (3,52) ...
0.11646268399999826
Training model 343: (3,53) ...
0.1394791330001226
Training model 344: (3,54) ...
0.11795515100016019
Training model 345: (3,55) ...
0.11406737999982397
Training model 346: (3,56) ...
0.10963242799994077
Training model 347: (3,57) ...
0.10669608299986066
Training model 

0.1119693630000711
Training model 492: (5,13) ...
0.13566931699983797
Training model 493: (5,14) ...
0.11973252699999648
Training model 494: (5,15) ...
0.11206780399993477
Training model 495: (5,16) ...
0.10348894300000211
Training model 496: (5,17) ...
0.11425291499995183
Training model 497: (5,18) ...
0.11051751300010437
Training model 498: (5,19) ...
0.10980408500017802
Training model 499: (5,20) ...
0.11947876299996096
Training model 500: (5,21) ...
0.12165337700002965
Training model 501: (5,22) ...
0.11679840600004354
Training model 502: (5,23) ...
0.12315191900006539
Training model 503: (5,24) ...
0.11415579799995612
Training model 504: (5,25) ...
0.12833379399990008
Training model 505: (5,26) ...
0.17703708199996981
Training model 506: (5,27) ...
0.12499246400011543
Training model 507: (5,28) ...
0.148286065000093
Training model 508: (5,29) ...
0.1942799300002207
Training model 509: (5,30) ...
0.14808940200009602
Training model 510: (5,31) ...
0.12847897900019234
Training model 

0.1381965760001549
Training model 654: (6,82) ...
0.11633233600014137
Training model 655: (6,83) ...
0.10724502300035965
Training model 656: (6,84) ...
0.12146330500036129
Training model 657: (6,85) ...
0.12973145099977046
Training model 658: (6,86) ...
0.12085679299980256
Training model 659: (6,87) ...
0.11788059899981818
Training model 660: (6,88) ...
0.12052808699991147
Training model 661: (6,89) ...
0.11886322800000926
Training model 662: (6,90) ...
0.10656751799979247
Training model 663: (6,91) ...
0.11335072700012461
Training model 664: (6,92) ...
0.10994885300033275
Training model 665: (6,93) ...
0.12493931299968608
Training model 666: (6,94) ...
0.11763296599974638
Training model 667: (6,95) ...
0.10178090299996256
Training model 668: (6,96) ...
0.12059205600007772
Training model 669: (6,97) ...
0.1364439509998192
Training model 670: (6,98) ...
0.12858739500006777
Training model 671: (6,99) ...
0.1257251060001181
Training model 672: (7,8) ...
0.5565605340002548
Training model 6

0.15703681600007258
Training model 817: (8,62) ...
0.146369310999944
Training model 818: (8,63) ...
0.1651311080004234
Training model 819: (8,64) ...
0.12043387500034441
Training model 820: (8,65) ...
0.12086096199982421
Training model 821: (8,66) ...
0.12992844000018522
Training model 822: (8,67) ...
0.14616243500040582
Training model 823: (8,68) ...
0.11961920900012046
Training model 824: (8,69) ...
0.12629770800003826
Training model 825: (8,70) ...
0.11975968300021123
Training model 826: (8,71) ...
0.14338228799988428
Training model 827: (8,72) ...
0.11503406500014535
Training model 828: (8,73) ...
0.12465759699989576
Training model 829: (8,74) ...
0.13004899899988231
Training model 830: (8,75) ...
0.11685621799961154
Training model 831: (8,76) ...
0.15802669000004244
Training model 832: (8,77) ...
0.13199890499981848
Training model 833: (8,78) ...
0.134327267000117
Training model 834: (8,79) ...
0.13872358400021767
Training model 835: (8,80) ...
0.17372237300014604
Training model 8

0.15284594900003867
Training model 978: (10,44) ...
0.14904948899993542
Training model 979: (10,45) ...
0.12199121000003288
Training model 980: (10,46) ...
0.1261624369999481
Training model 981: (10,47) ...
0.13999429099976624
Training model 982: (10,48) ...
0.11241318800011868
Training model 983: (10,49) ...
0.12877243399998406
Training model 984: (10,50) ...
0.13465978599970185
Training model 985: (10,51) ...
0.12810052199984057
Training model 986: (10,52) ...
0.132907447999969
Training model 987: (10,53) ...
0.1477179389999037
Training model 988: (10,54) ...
0.15193634200022643
Training model 989: (10,55) ...
0.11180319999994026
Training model 990: (10,56) ...
0.12941897800010338
Training model 991: (10,57) ...
0.13953286199966897
Training model 992: (10,58) ...
0.15796331599995028
Training model 993: (10,59) ...
0.12470038700030273
Training model 994: (10,60) ...
0.1568581529995754
Training model 995: (10,61) ...
0.13655881600016073
Training model 996: (10,62) ...
0.156065899999703

0.11504005100005088
Training model 1135: (12,26) ...
0.12966914300022836
Training model 1136: (12,27) ...
0.13403938500005097
Training model 1137: (12,28) ...
0.12051971300024888
Training model 1138: (12,29) ...
0.1256536240002788
Training model 1139: (12,30) ...
0.1277976370001852
Training model 1140: (12,31) ...
0.12172358699990582
Training model 1141: (12,32) ...
0.12840602699998271
Training model 1142: (12,33) ...
0.11661432100027014
Training model 1143: (12,34) ...
0.12014017999990756
Training model 1144: (12,35) ...
0.12754729900007078
Training model 1145: (12,36) ...
0.14596461700011787
Training model 1146: (12,37) ...
0.11983846299972356
Training model 1147: (12,38) ...
0.1238932979999845
Training model 1148: (12,39) ...
0.15388004499982344
Training model 1149: (12,40) ...
0.14374185199994827
Training model 1150: (12,41) ...
0.13197749500022837
Training model 1151: (12,42) ...
0.14021434400001453
Training model 1152: (12,43) ...
0.14590500699978293
Training model 1153: (12,44) 

0.14713135900001362
Training model 1292: (13,97) ...
0.147761287999856
Training model 1293: (13,98) ...
0.15609635500004515
Training model 1294: (13,99) ...
0.1856759620000048
Training model 1295: (14,15) ...
0.1710553580001033
Training model 1296: (14,16) ...
0.18269245900000897
Training model 1297: (14,17) ...
0.32329544000003807
Training model 1298: (14,18) ...
0.193470914000045
Training model 1299: (14,19) ...
0.1620784960000492
Training model 1300: (14,20) ...
0.1381734060000781
Training model 1301: (14,21) ...
0.1568400119999751
Training model 1302: (14,22) ...
0.14168019199996706
Training model 1303: (14,23) ...
0.15741067099997963
Training model 1304: (14,24) ...
0.12510357900009694
Training model 1305: (14,25) ...
0.11915105300022333
Training model 1306: (14,26) ...
0.13122025900020162
Training model 1307: (14,27) ...
0.12670811799989679
Training model 1308: (14,28) ...
0.11956182399990212
Training model 1309: (14,29) ...
0.12498610400007237
Training model 1310: (14,30) ...
0.

0.14281691199994384
Training model 1449: (15,85) ...
0.20658285200033788
Training model 1450: (15,86) ...
0.19396097299977555
Training model 1451: (15,87) ...
0.12796123699990858
Training model 1452: (15,88) ...
0.1808744800000568
Training model 1453: (15,89) ...
0.15348287599999821
Training model 1454: (15,90) ...
0.1789997279997806
Training model 1455: (15,91) ...
0.3590237209996303
Training model 1456: (15,92) ...
0.2797264810001252
Training model 1457: (15,93) ...
0.23063138900033664
Training model 1458: (15,94) ...
0.22611268199989354
Training model 1459: (15,95) ...
0.21550041400041664
Training model 1460: (15,96) ...
0.18391447600015454
Training model 1461: (15,97) ...
0.14047271100025682
Training model 1462: (15,98) ...
0.1279765229996883
Training model 1463: (15,99) ...
0.14137370600019494
Training model 1464: (16,17) ...
0.2104894740000418
Training model 1465: (16,18) ...
0.22950079300017023
Training model 1466: (16,19) ...
0.30343305999986114
Training model 1467: (16,20) ...

0.1288894409999557
Training model 1605: (17,76) ...
0.1268047309999929
Training model 1606: (17,77) ...
0.12719997399972272
Training model 1607: (17,78) ...
0.11600557100018705
Training model 1608: (17,79) ...
0.13023913200004245
Training model 1609: (17,80) ...
0.15088987099989026
Training model 1610: (17,81) ...
0.158428486000048
Training model 1611: (17,82) ...
0.1563494920001176
Training model 1612: (17,83) ...
0.12119651600005454
Training model 1613: (17,84) ...
0.20539325499976258
Training model 1614: (17,85) ...
0.1988219330000902
Training model 1615: (17,86) ...
0.15144575600015742
Training model 1616: (17,87) ...
0.15789233699979377
Training model 1617: (17,88) ...
0.16642771200031348
Training model 1618: (17,89) ...
0.1573264360004032
Training model 1619: (17,90) ...
0.1350652619998982
Training model 1620: (17,91) ...
0.18317842000033124
Training model 1621: (17,92) ...
0.2001260590000129
Training model 1622: (17,93) ...
0.2135884519998399
Training model 1623: (17,94) ...
0.1

0.13156002599998828
Training model 1762: (19,72) ...
0.140653458000088
Training model 1763: (19,73) ...
0.12745494199998575
Training model 1764: (19,74) ...
0.1329281989997071
Training model 1765: (19,75) ...
0.12793682300025466
Training model 1766: (19,76) ...
0.1308692899997368
Training model 1767: (19,77) ...
0.13403382300020894
Training model 1768: (19,78) ...
0.1323117730003105
Training model 1769: (19,79) ...
0.13013667299992449
Training model 1770: (19,80) ...
0.13410205199988923
Training model 1771: (19,81) ...
0.14884114699998463
Training model 1772: (19,82) ...
0.15148333200022535
Training model 1773: (19,83) ...
0.11220966599967142
Training model 1774: (19,84) ...
0.13733764700009488
Training model 1775: (19,85) ...
0.1784159990002081
Training model 1776: (19,86) ...
0.13372707200005607
Training model 1777: (19,87) ...
0.1338736349998726
Training model 1778: (19,88) ...
0.14228810699978567
Training model 1779: (19,89) ...
0.15388792499970805
Training model 1780: (19,90) ...


0.14670724899997367
Training model 1919: (21,72) ...
0.12687222799968367
Training model 1920: (21,73) ...
0.1346974709999813
Training model 1921: (21,74) ...
0.14334254499999588
Training model 1922: (21,75) ...
0.12758701700022357
Training model 1923: (21,76) ...
0.12817700599998716
Training model 1924: (21,77) ...
0.1359149919999254
Training model 1925: (21,78) ...
0.14543032199981099
Training model 1926: (21,79) ...
0.12439013300036095
Training model 1927: (21,80) ...
0.17148349999979473
Training model 1928: (21,81) ...
0.16536011400012285
Training model 1929: (21,82) ...
0.2405163220000759
Training model 1930: (21,83) ...
0.15588076899985026
Training model 1931: (21,84) ...
0.13386045899960664
Training model 1932: (21,85) ...
0.2052901369997926
Training model 1933: (21,86) ...
0.1775291539997852
Training model 1934: (21,87) ...
0.1272296029997051
Training model 1935: (21,88) ...
0.15414875899978142
Training model 1936: (21,89) ...
0.20111088299972835
Training model 1937: (21,90) ...

0.12198882299981051
Training model 2076: (23,76) ...
0.14043407500003013
Training model 2077: (23,77) ...
0.13987165100024868
Training model 2078: (23,78) ...
0.14391972099974737
Training model 2079: (23,79) ...
0.14413130299999466
Training model 2080: (23,80) ...
0.247846890999881
Training model 2081: (23,81) ...
0.18152130399994348
Training model 2082: (23,82) ...
0.2547472769997512
Training model 2083: (23,83) ...
0.16793177099998502
Training model 2084: (23,84) ...
0.18101938900008463
Training model 2085: (23,85) ...
0.2584121380000397
Training model 2086: (23,86) ...
0.20745503699981782
Training model 2087: (23,87) ...
0.1713291770001888
Training model 2088: (23,88) ...
0.16622534599991923
Training model 2089: (23,89) ...
0.18715328100006445
Training model 2090: (23,90) ...
0.14632185699974798
Training model 2091: (23,91) ...
0.2340504929998133
Training model 2092: (23,92) ...
0.2051161280000997
Training model 2093: (23,93) ...
0.3124923939999462
Training model 2094: (23,94) ...
0

0.12364922699998715
Training model 2232: (25,83) ...
0.11146862900022825
Training model 2233: (25,84) ...
0.11572578100003739
Training model 2234: (25,85) ...
0.13891181300004973
Training model 2235: (25,86) ...
0.12608796499989694
Training model 2236: (25,87) ...
0.10476570299988452
Training model 2237: (25,88) ...
0.1273066009998729
Training model 2238: (25,89) ...
0.10646086499991725
Training model 2239: (25,90) ...
0.11844585300013932
Training model 2240: (25,91) ...
0.1190949830001955
Training model 2241: (25,92) ...
0.11400394599968422
Training model 2242: (25,93) ...
0.13539341100022284
Training model 2243: (25,94) ...
0.11508362099993974
Training model 2244: (25,95) ...
0.10358199700021942
Training model 2245: (25,96) ...
0.12428894599997875
Training model 2246: (25,97) ...
0.11697509499981606
Training model 2247: (25,98) ...
0.10938464800028669
Training model 2248: (25,99) ...
0.11186331400040217
Training model 2249: (26,27) ...
0.44754009600001154
Training model 2250: (26,28)

0.12142902599998706
Training model 2388: (27,94) ...
0.14868801300008272
Training model 2389: (27,95) ...
0.12398324200012212
Training model 2390: (27,96) ...
0.12221374699993248
Training model 2391: (27,97) ...
0.11105084800010445
Training model 2392: (27,98) ...
0.10704374200031452
Training model 2393: (27,99) ...
0.11194353000018964
Training model 2394: (28,29) ...
0.14347631500004354
Training model 2395: (28,30) ...
0.18026818799989996
Training model 2396: (28,31) ...
0.1341011099998468
Training model 2397: (28,32) ...
0.20864707200007615
Training model 2398: (28,33) ...
0.14490223099983268
Training model 2399: (28,34) ...
0.15668892899975617
Training model 2400: (28,35) ...
0.1937537329999941
Training model 2401: (28,36) ...
0.2093311150001682
Training model 2402: (28,37) ...
0.1774134409997714
Training model 2403: (28,38) ...
0.1865078050000193
Training model 2404: (28,39) ...
0.15338168000016594
Training model 2405: (28,40) ...
0.12410875000023225
Training model 2406: (28,41) ..

0.2404946070000733
Training model 2544: (30,40) ...
0.15819449799982976
Training model 2545: (30,41) ...
0.16943722399992112
Training model 2546: (30,42) ...
0.15417474099967876
Training model 2547: (30,43) ...
0.15972628700001223
Training model 2548: (30,44) ...
0.19469942900013848
Training model 2549: (30,45) ...
0.13329461399962383
Training model 2550: (30,46) ...
0.19710625799962145
Training model 2551: (30,47) ...
0.19842398199989475
Training model 2552: (30,48) ...
0.15698410400000284
Training model 2553: (30,49) ...
0.15896444200006954
Training model 2554: (30,50) ...
0.18884960200011847
Training model 2555: (30,51) ...
0.12347828699967067
Training model 2556: (30,52) ...
0.15563097899985223
Training model 2557: (30,53) ...
0.19726616800016927
Training model 2558: (30,54) ...
0.19854089099999328
Training model 2559: (30,55) ...
0.1725850980001269
Training model 2560: (30,56) ...
0.1467366110000512
Training model 2561: (30,57) ...
0.22482463200003622
Training model 2562: (30,58) 

0.3498641830001361
Training model 2700: (32,61) ...
0.2503125270000055
Training model 2701: (32,62) ...
0.2995589150000342
Training model 2702: (32,63) ...
0.3203130570000212
Training model 2703: (32,64) ...
0.1876545800000713
Training model 2704: (32,65) ...
0.20167124199997488
Training model 2705: (32,66) ...
0.3046087040002021
Training model 2706: (32,67) ...
0.27414104599984057
Training model 2707: (32,68) ...
0.31503159300018524
Training model 2708: (32,69) ...
0.1515365479999673
Training model 2709: (32,70) ...
0.14581646800024828
Training model 2710: (32,71) ...
0.2558038000001943
Training model 2711: (32,72) ...
0.12208547900036137
Training model 2712: (32,73) ...
0.16197461199999452
Training model 2713: (32,74) ...
0.13974336599994786
Training model 2714: (32,75) ...
0.12929267999970762
Training model 2715: (32,76) ...
0.19424664300004224
Training model 2716: (32,77) ...
0.21670009700028459
Training model 2717: (32,78) ...
0.173456088000421
Training model 2718: (32,79) ...
0.1

0.17373968699985198
Training model 2856: (34,86) ...
0.13293087400006698
Training model 2857: (34,87) ...
0.13354841799991846
Training model 2858: (34,88) ...
0.1187655030003043
Training model 2859: (34,89) ...
0.11779208300004029
Training model 2860: (34,90) ...
0.10874325900022086
Training model 2861: (34,91) ...
0.11662579799985906
Training model 2862: (34,92) ...
0.1111651740002344
Training model 2863: (34,93) ...
0.12835852700027317
Training model 2864: (34,94) ...
0.11731332099998326
Training model 2865: (34,95) ...
0.10835957699964638
Training model 2866: (34,96) ...
0.1111727659999815
Training model 2867: (34,97) ...
0.16366498000024876
Training model 2868: (34,98) ...
0.17848670100011077
Training model 2869: (34,99) ...
0.14794678299995212
Training model 2870: (35,36) ...
0.66182709099985
Training model 2871: (35,37) ...
0.49729068599981474
Training model 2872: (35,38) ...
0.1603790540002592
Training model 2873: (35,39) ...
0.19439411599978484
Training model 2874: (35,40) ...


0.172185589000037
Training model 3013: (37,54) ...
0.23015386299994134
Training model 3014: (37,55) ...
0.12109477500007415
Training model 3015: (37,56) ...
0.17892227899983482
Training model 3016: (37,57) ...
0.18888508400004866
Training model 3017: (37,58) ...
0.1916334550001011
Training model 3018: (37,59) ...
0.12071813299962741
Training model 3019: (37,60) ...
0.20965530900002705
Training model 3020: (37,61) ...
0.18573393199994825
Training model 3021: (37,62) ...
0.22865737099982653
Training model 3022: (37,63) ...
0.16582696500017846
Training model 3023: (37,64) ...
0.13177649799990832
Training model 3024: (37,65) ...
0.13179571800037593
Training model 3025: (37,66) ...
0.1440193990001717
Training model 3026: (37,67) ...
0.18374299500010238
Training model 3027: (37,68) ...
0.16085971399979826
Training model 3028: (37,69) ...
0.16321678499980408
Training model 3029: (37,70) ...
0.1153600610000467
Training model 3030: (37,71) ...
0.18504044499968586
Training model 3031: (37,72) ..

0.1365419940002539
Training model 3170: (39,90) ...
0.14387030700027026
Training model 3171: (39,91) ...
0.16632422000020597
Training model 3172: (39,92) ...
0.13686758400035615
Training model 3173: (39,93) ...
0.17198550900002374
Training model 3174: (39,94) ...
0.16132396300008622
Training model 3175: (39,95) ...
0.1476282040002843
Training model 3176: (39,96) ...
0.14942743900019195
Training model 3177: (39,97) ...
0.15255974699994113
Training model 3178: (39,98) ...
0.13303712399965661
Training model 3179: (39,99) ...
0.1632147969999096
Training model 3180: (40,41) ...
0.15672211099990818
Training model 3181: (40,42) ...
0.18476853599986498
Training model 3182: (40,43) ...
0.16992656100001113
Training model 3183: (40,44) ...
0.17700150700011363
Training model 3184: (40,45) ...
0.11560047200009649
Training model 3185: (40,46) ...
1.0069556290000037
Training model 3186: (40,47) ...
0.30086218399992504
Training model 3187: (40,48) ...
0.1295477070002562
Training model 3188: (40,49) ..

0.20148450199985746
Training model 3326: (42,72) ...
0.13348765999990064
Training model 3327: (42,73) ...
0.15176703699989957
Training model 3328: (42,74) ...
0.13075521199971263
Training model 3329: (42,75) ...
0.1416850640002849
Training model 3330: (42,76) ...
0.18504627100037396
Training model 3331: (42,77) ...
0.20347784300020066
Training model 3332: (42,78) ...
0.14552165599980071
Training model 3333: (42,79) ...
0.1867035529999157
Training model 3334: (42,80) ...
0.1410280239997519
Training model 3335: (42,81) ...
0.17933012399998915
Training model 3336: (42,82) ...
0.17811219099985465
Training model 3337: (42,83) ...
0.14594517300020016
Training model 3338: (42,84) ...
0.15405797099992924
Training model 3339: (42,85) ...
0.1868146270003308
Training model 3340: (42,86) ...
0.16415937799956737
Training model 3341: (42,87) ...
0.1219913899999483
Training model 3342: (42,88) ...
0.13056628499998624
Training model 3343: (42,89) ...
0.11874576499985778
Training model 3344: (42,90) ..

0.29880478199993377
Training model 3482: (45,63) ...
0.1765878259998317
Training model 3483: (45,64) ...
0.13874031100021966
Training model 3484: (45,65) ...
0.18594041400001515
Training model 3485: (45,66) ...
0.24342708799986212
Training model 3486: (45,67) ...
0.3621383740000965
Training model 3487: (45,68) ...
0.28042733700021927
Training model 3488: (45,69) ...
0.1665922249999312
Training model 3489: (45,70) ...
0.1160288619998937
Training model 3490: (45,71) ...
0.18182252199994764
Training model 3491: (45,72) ...
0.10257999100031157
Training model 3492: (45,73) ...
0.12977322400001867
Training model 3493: (45,74) ...
0.11050355500037767
Training model 3494: (45,75) ...
0.12096489499981544
Training model 3495: (45,76) ...
0.1859102600001279
Training model 3496: (45,77) ...
0.1656831210002565
Training model 3497: (45,78) ...
0.1323654030002217
Training model 3498: (45,79) ...
0.1779375269998127
Training model 3499: (45,80) ...
0.122056813999734
Training model 3500: (45,81) ...
0.1

0.2074831599998106
Training model 3638: (48,63) ...
0.19108238199987682
Training model 3639: (48,64) ...
0.14099534000024505
Training model 3640: (48,65) ...
0.13724825399958718
Training model 3641: (48,66) ...
0.16506467799990787
Training model 3642: (48,67) ...
0.20029477700018106
Training model 3643: (48,68) ...
0.16698216400027377
Training model 3644: (48,69) ...
0.17059453599995322
Training model 3645: (48,70) ...
0.10604764200024874
Training model 3646: (48,71) ...
0.14314233899995088
Training model 3647: (48,72) ...
0.10049084299998867
Training model 3648: (48,73) ...
0.11522090299968113
Training model 3649: (48,74) ...
0.10560132799992061
Training model 3650: (48,75) ...
0.10799655600021651
Training model 3651: (48,76) ...
0.14941305199999988
Training model 3652: (48,77) ...
0.1354740059996402
Training model 3653: (48,78) ...
0.1161449240003094
Training model 3654: (48,79) ...
0.12718865299984827
Training model 3655: (48,80) ...
0.12388681399988855
Training model 3656: (48,81) 

0.17173667499992007
Training model 3794: (51,72) ...
0.10972561700009464
Training model 3795: (51,73) ...
0.1352273200000127
Training model 3796: (51,74) ...
0.11545109300004697
Training model 3797: (51,75) ...
0.11158460799970271
Training model 3798: (51,76) ...
0.15506820700011303
Training model 3799: (51,77) ...
0.1344867049997447
Training model 3800: (51,78) ...
0.14351412399992114
Training model 3801: (51,79) ...
0.1438333880000755
Training model 3802: (51,80) ...
0.14211788600005093
Training model 3803: (51,81) ...
0.11581216399963523
Training model 3804: (51,82) ...
0.12634256000001187
Training model 3805: (51,83) ...
0.11224354700016193
Training model 3806: (51,84) ...
0.13064107699983651
Training model 3807: (51,85) ...
0.13945176500010348
Training model 3808: (51,86) ...
0.1281911000000946
Training model 3809: (51,87) ...
0.14361628500000734
Training model 3810: (51,88) ...
0.1364640059996418
Training model 3811: (51,89) ...
0.147609119000208
Training model 3812: (51,90) ...


0.11838403499996275
Training model 3951: (54,91) ...
0.14383884800008673
Training model 3952: (54,92) ...
0.11815453000008347
Training model 3953: (54,93) ...
0.13476999500016973
Training model 3954: (54,94) ...
0.12599854900008722
Training model 3955: (54,95) ...
0.12253452400000242
Training model 3956: (54,96) ...
0.12060078600006818
Training model 3957: (54,97) ...
0.12645506500030024
Training model 3958: (54,98) ...
0.11447512099994128
Training model 3959: (54,99) ...
0.11628239700030463
Training model 3960: (55,56) ...
0.12018238400014525
Training model 3961: (55,57) ...
0.16553553899984763
Training model 3962: (55,58) ...
0.18491955999979837
Training model 3963: (55,59) ...
0.7010853179999685
Training model 3964: (55,60) ...
0.19790742699979091
Training model 3965: (55,61) ...
0.14551730199991653
Training model 3966: (55,62) ...
0.1752392969997345
Training model 3967: (55,63) ...
0.14429128199981278
Training model 3968: (55,64) ...
0.7261040930002309
Training model 3969: (55,65) 

0.20299580400023842
Training model 4107: (58,77) ...
0.19094901500011474
Training model 4108: (58,78) ...
0.19189279100010026
Training model 4109: (58,79) ...
0.23471976100017855
Training model 4110: (58,80) ...
0.14807255699997768
Training model 4111: (58,81) ...
0.18115779699974155
Training model 4112: (58,82) ...
0.20262066599980244
Training model 4113: (58,83) ...
0.14705031499988763
Training model 4114: (58,84) ...
0.16293417099996077
Training model 4115: (58,85) ...
0.21819302099993365
Training model 4116: (58,86) ...
0.1700087409999469
Training model 4117: (58,87) ...
0.1324592969999685
Training model 4118: (58,88) ...
0.12793745499993747
Training model 4119: (58,89) ...
0.12893458200005625
Training model 4120: (58,90) ...
0.12942127600035747
Training model 4121: (58,91) ...
0.15990409200003342
Training model 4122: (58,92) ...
0.14178922400014926
Training model 4123: (58,93) ...
0.1498027920001732
Training model 4124: (58,94) ...
0.14676036299988482
Training model 4125: (58,95) 

0.22224975300014194
Training model 4264: (62,80) ...
0.1403247680000277
Training model 4265: (62,81) ...
0.18130415799987531
Training model 4266: (62,82) ...
0.20432998199976282
Training model 4267: (62,83) ...
0.1493492409999817
Training model 4268: (62,84) ...
0.14217907900001592
Training model 4269: (62,85) ...
0.196660497999801
Training model 4270: (62,86) ...
0.17752310900004886
Training model 4271: (62,87) ...
0.12527097799966214
Training model 4272: (62,88) ...
0.1297487470001215
Training model 4273: (62,89) ...
0.1271074410001347
Training model 4274: (62,90) ...
0.12285828499989293
Training model 4275: (62,91) ...
0.13962941399995543
Training model 4276: (62,92) ...
0.12364217999993343
Training model 4277: (62,93) ...
0.1433577160000823
Training model 4278: (62,94) ...
0.12757906199976787
Training model 4279: (62,95) ...
0.12853785100014647
Training model 4280: (62,96) ...
0.12226652099980129
Training model 4281: (62,97) ...
0.1251298309998674
Training model 4282: (62,98) ...
0

0.10862712900006954
Training model 4420: (66,98) ...
0.1187571770001341
Training model 4421: (66,99) ...
0.10919202300010511
Training model 4422: (67,68) ...
0.6853475460002301
Training model 4423: (67,69) ...
0.20151490899979763
Training model 4424: (67,70) ...
0.12616411899989544
Training model 4425: (67,71) ...
0.21530753600018215
Training model 4426: (67,72) ...
0.11800126199977967
Training model 4427: (67,73) ...
0.1325662200001716
Training model 4428: (67,74) ...
0.11431013099991105
Training model 4429: (67,75) ...
0.12218544799998199
Training model 4430: (67,76) ...
0.20430858600002466
Training model 4431: (67,77) ...
0.1843313530002888
Training model 4432: (67,78) ...
0.13377967299993543
Training model 4433: (67,79) ...
0.2061559179996948
Training model 4434: (67,80) ...
0.13209649099962917
Training model 4435: (67,81) ...
0.21199948100002075
Training model 4436: (67,82) ...
0.21672800700025618
Training model 4437: (67,83) ...
0.15170112400028302
Training model 4438: (67,84) ..

0.2072612899996784
Training model 4577: (72,78) ...
0.15024072400001387
Training model 4578: (72,79) ...
0.14667945599967425
Training model 4579: (72,80) ...
0.10781378299998323
Training model 4580: (72,81) ...
0.11271817200031364
Training model 4581: (72,82) ...
0.12899007300029552
Training model 4582: (72,83) ...
0.11054868200017154
Training model 4583: (72,84) ...
0.11943086699966443
Training model 4584: (72,85) ...
0.13076562599962926
Training model 4585: (72,86) ...
0.11789416200008418
Training model 4586: (72,87) ...
0.10928477500010558
Training model 4587: (72,88) ...
0.13190599400013525
Training model 4588: (72,89) ...
0.1124885809999796
Training model 4589: (72,90) ...
0.11305635500002609
Training model 4590: (72,91) ...
0.13105566000012914
Training model 4591: (72,92) ...
0.15907677599989256
Training model 4592: (72,93) ...
0.14192785600016578
Training model 4593: (72,94) ...
0.1802271279998422
Training model 4594: (72,95) ...
0.14871703099970546
Training model 4595: (72,96) 

0.12557593199971961
Training model 4734: (78,94) ...
0.15123813199988945
Training model 4735: (78,95) ...
0.11659911100014142
Training model 4736: (78,96) ...
0.11534800599974915
Training model 4737: (78,97) ...
0.11382908099994893
Training model 4738: (78,98) ...
0.1341941619998579
Training model 4739: (78,99) ...
0.12902527699998245
Training model 4740: (79,80) ...
0.1201094819998616
Training model 4741: (79,81) ...
0.13663356699998985
Training model 4742: (79,82) ...
0.1559273499997289
Training model 4743: (79,83) ...
0.13155297499997687
Training model 4744: (79,84) ...
0.13205727399963507
Training model 4745: (79,85) ...
0.1391346719997273
Training model 4746: (79,86) ...
0.13705802599997696
Training model 4747: (79,87) ...
0.11668445300028907
Training model 4748: (79,88) ...
0.1344510399999308
Training model 4749: (79,89) ...
0.1231948940003349
Training model 4750: (79,90) ...
0.12633387000005314
Training model 4751: (79,91) ...
0.14058782599977349
Training model 4752: (79,92) ...

0.13124355599984483
Training model 4891: (88,96) ...
0.24985240600017278
Training model 4892: (88,97) ...
0.13195327799985535
Training model 4893: (88,98) ...
0.1110423649997756
Training model 4894: (88,99) ...
0.1476231279998501
Training model 4895: (89,90) ...
0.12644311299982292
Training model 4896: (89,91) ...
0.145393045000219
Training model 4897: (89,92) ...
0.154779888000121
Training model 4898: (89,93) ...
0.1732279309999285
Training model 4899: (89,94) ...
0.16466048800020872
Training model 4900: (89,95) ...
0.11801338499981284
Training model 4901: (89,96) ...
0.14236455899981593
Training model 4902: (89,97) ...
0.14176836300021023
Training model 4903: (89,98) ...
0.11082788099975005
Training model 4904: (89,99) ...
0.2518996369999513
Training model 4905: (90,91) ...
0.17034671399960644
Training model 4906: (90,92) ...
0.16408374599996023
Training model 4907: (90,93) ...
0.18998432699982004
Training model 4908: (90,94) ...
0.19211502799998925
Training model 4909: (90,95) ...
0

In [83]:
def majority_vote(array):
    most = max(list(map(array.count, array)))
    return random.choice(list(set(filter(lambda x: array.count(x) == most, array))))

In [27]:
def predict_pairwise_sk(clfs, x):
    preds = list(map(lambda c: c[0] if clfs[c].predict(x.reshape(1, -1)) == 1 else c[1], clfs))
    return majority_vote(preds)

In [None]:
# Validation set predictions
y_val_preds_linsvc_sk_ovo = [predict_pairwise_sk(pairwise_linsvc_sk_ovo, n) for n in X_val]

# Misclassification Error Rate for Validation Set
1 - np.mean(y_val_preds_linsvc_sk_ovo == y_val)

In [None]:
# Test set predictions
# y_test_preds_linsvc_sk_ovo = list(map(lambda n: predict_pairwise_sk(pairwise_linsvc_sk_ovo, n), X_test))
y_test_preds_linsvc_sk_ovo = [predict_pairwise_sk(pairwise_linsvc_sk_ovo, n) for n in X_test]

# Write to CSV for Kaggle submission
pd.DataFrame({'Category':y_test_preds_linsvc_sk_ovo}).reset_index()\
             .rename(columns={'index':'Id'}).to_csv('./comp2-subm_linsvc_sk_ovo.csv', index=False)

# Write trained models to pickle file
# pickle.dump( pairwise_clfs_sk, open( "pairwise_clfs_sk.p", "wb" ) )

# Write optimal lambdas to pickle file
# pickle.dump( opt_lamb_pairwise_sk, open( "opt_lamb_pairwise_sk.p", "wb" ) )



Accuracy (from Kaggle): 0.54666

Misclassification Error: 0.45334

Pretty good performance for OvO classifier! This didn't perform as well as my OvO L2-regularized Logistic Regression classifier (difference of ~3%), but I believe the difference can be accounted for better cross-validation for optimal regularization parameters from the Logistic Regression classifier.

### 4: sklearn.LinearSVC (one-vs-rest)

In a one-vs-rest fashion, for each class, train a linear SVM classifier using scikit-learn’s function LinearSVC, with the default value for λc. Compute the multi-class misclassification error obtained using these classifiers trained in a one-vs-rest fashion.

In [15]:
from sklearn.svm import LinearSVC

maxiter = 1000
target_accuracy = 1e-4
classes = 100

start = timer()
linsvc_sk_ovr = LinearSVC(fit_intercept=False, max_iter=maxiter, tol=target_accuracy, verbose=True, multi_class='ovr')
linsvc_sk_ovr.fit(X_train_unstd, y_train)
end = timer()
print(end - start)

[LibLinear]217.95099875800952




In [16]:
# Validation set predictions
# Accuracy for Validation Set (1 - Misclassification Error)
linsvc_sk_ovr.score(X_val_unstd, y_val)

0.4625

In [17]:
# Test set predictions
y_test_preds_linsvc_sk_ovr = linsvc_sk_ovr.predict(X_test_unstd)

# Write to CSV for Kaggle submission
pd.DataFrame({'Category':y_test_preds_linsvc_sk_ovr}).reset_index()\
             .rename(columns={'index':'Id'}).to_csv('./comp2-subm_linsvc_sk_ovr.csv', index=False)

Accuracy (from Kaggle): 0.45166

Misclassification Rate: 0.54834

Interestingly OvR appeared to perform worse than OvO linear SVM classification. I initially had my doubts about OvO as a multi-class classification problem initially, but these results are proving my doubts wrong. I reflect more about OvO vs OvR in the conclusion below.

Another interesting finding was that OvR linear SVM classification appears to converge much faster on unstandardized data. When I tried to train on the standardized data, it was taking an extraordinary amount of time, but unstandardized appeared to converge fairly quickly.

I'm unsure of the reason for this; all literature I've found online seems to suggest the opposite: if convergence is taking too long for SVM, it is suggested that you standardize the data first.

### 5: Training with my implementation of linear SVM (just picked 2 classes)

In [48]:
# Subset Data
X_train_subset, y_train_subset, X_val_subset, y_val_subset = subset_data(1, 0, X_train, y_train, X_val, y_val)

# Initialize things
n, d = X_train_subset.shape
lam = 1
beta_init = np.zeros(n)
theta_init = np.zeros(n)

K = gram_linear(X_train_subset, X_train_subset)
eta_init = initstepsize(K, lam)
maxiter = 10

# Run the algorithm
beta_list = fastgradalgo(beta_init, theta_init, K, y_train_subset, lam, eta_init, maxiter, eps=1e-3)
beta_T = beta_list[len(beta_list)-1]

Max number of iterations of backtracking line search reached
Max number of iterations of backtracking line search reached
Max number of iterations of backtracking line search reached
Max number of iterations of backtracking line search reached
Max number of iterations of backtracking line search reached
Max number of iterations of backtracking line search reached
Max number of iterations of backtracking line search reached
Max number of iterations of backtracking line search reached
Max number of iterations of backtracking line search reached
Max number of iterations of backtracking line search reached


In [68]:
# Misclassification Error
misclassification_error(beta_T, X_train_subset, X_val_subset, y_val_subset.squeeze(), gram_linear)[0]

0.025

Confirmed with this test that my implementation of linear OvO SVM appears to work on a toy subset of 2 classes.

### 6: Training with my implementation of linear SVM (one-vs-one)

In [None]:
# Initialize things
n, d = X_train_subset.shape
lam = 1
beta_init = np.zeros(n)
theta_init = np.zeros(n)
maxiter = 10
pairwise_linsvc_ovo = dict()
t = 0

for i in range(classes):
    for j in range(i+1, classes):
        start = timer()
        print('Training model '+str(t)+': ('+str(i)+','+str(j)+') ...')
        beta_list = list()
        X_train_subset, y_train_subset = subset_data(i, j, X_train, y_train)
        K = gram_linear(X_train_subset, X_train_subset)
        eta_init = initstepsize(K, lam)
        
        beta_list = fastgradalgo(beta_init, theta_init, K, y_train_subset, lam, eta_init, maxiter, eps=1e-2)
        beta_T = beta_list[len(beta_list)-1]
        
        pairwise_linsvc_ovo[(i, j)] = beta_T
        
        end = timer()
        print(end - start)
        t += 1

Training model 0: (0,1) ...
2.1342854920076206
Training model 1: (0,2) ...
2.207382304011844
Training model 2: (0,3) ...
2.1286219830217306
Training model 3: (0,4) ...
2.211706027999753
Training model 4: (0,5) ...
2.1099595650157426
Training model 5: (0,6) ...
2.317591454979265
Training model 6: (0,7) ...
2.366494478977984
Training model 7: (0,8) ...
2.1107371099933516
Training model 8: (0,9) ...
2.2410722319909837
Training model 9: (0,10) ...
2.0303070319932885
Training model 10: (0,11) ...
2.332247363985516
Training model 11: (0,12) ...
2.298518066003453
Training model 12: (0,13) ...
2.3436273320112377
Training model 13: (0,14) ...
2.1609913780121133
Training model 14: (0,15) ...
2.1397291400062386
Training model 15: (0,16) ...
2.1406395949888974
Training model 16: (0,17) ...
2.1451269359968137
Training model 17: (0,18) ...
1.8254001589957625
Training model 18: (0,19) ...
2.166671246988699
Training model 19: (0,20) ...
2.144627733010566
Training model 20: (0,21) ...
1.997774625982856

2.1617739609791897
Training model 169: (1,72) ...
2.1334999880054966
Training model 170: (1,73) ...
2.1391311869956553
Training model 171: (1,74) ...
2.1544613229925744
Training model 172: (1,75) ...
2.2248445579898544
Training model 173: (1,76) ...
2.2322722929820884
Training model 174: (1,77) ...
2.186003848008113
Training model 175: (1,78) ...
1.9870705739886034
Training model 176: (1,79) ...
2.2211798619828187
Training model 177: (1,80) ...
2.1478725360066164
Training model 178: (1,81) ...
2.157945153012406
Training model 179: (1,82) ...
2.1333090759871993
Training model 180: (1,83) ...
2.1634680969873443
Training model 181: (1,84) ...
2.1392102239769883
Training model 182: (1,85) ...
2.1470186740043573
Training model 183: (1,86) ...
2.1345532510022167
Training model 184: (1,87) ...
2.1487194189976435
Training model 185: (1,88) ...
2.1330593369784765
Training model 186: (1,89) ...
1.998905818007188
Training model 187: (1,90) ...
2.1356798150227405
Training model 188: (1,91) ...
2.1

2.087732710002456
Training model 335: (3,45) ...
2.1247203910024837
Training model 336: (3,46) ...
2.1191093229863327
Training model 337: (3,47) ...
2.113000016019214
Training model 338: (3,48) ...
2.1252867500006687
Training model 339: (3,49) ...
2.1742976359964814
Training model 340: (3,50) ...
2.173365566006396
Training model 341: (3,51) ...
2.1198181599902455
Training model 342: (3,52) ...
2.1671958309889305
Training model 343: (3,53) ...
2.127542956994148
Training model 344: (3,54) ...
2.139564761018846
Training model 345: (3,55) ...
2.148718985990854
Training model 346: (3,56) ...
2.1450901970092673
Training model 347: (3,57) ...
2.1286419880052563
Training model 348: (3,58) ...
2.0960723540047184
Training model 349: (3,59) ...
2.122544429003028
Training model 350: (3,60) ...
2.1293318509997334
Training model 351: (3,61) ...
2.0884660049923696
Training model 352: (3,62) ...
2.1083399550116155
Training model 353: (3,63) ...
2.1504314439953305
Training model 354: (3,64) ...
2.01546

2.098711406986695
Training model 501: (5,22) ...
2.1745306590164546
Training model 502: (5,23) ...
2.2673940350068733
Training model 503: (5,24) ...
2.2755021439807024
Training model 504: (5,25) ...
2.1580666640074924
Training model 505: (5,26) ...
2.0260792799817864
Training model 506: (5,27) ...
2.0998754470201675
Training model 507: (5,28) ...
2.1263824609923176
Training model 508: (5,29) ...
2.1395570900058374
Training model 509: (5,30) ...
2.1251667989999987
Training model 510: (5,31) ...
2.1591971989837475
Training model 511: (5,32) ...
1.814287160988897
Training model 512: (5,33) ...
2.1266729240014683
Training model 513: (5,34) ...
2.115944723977009
Training model 514: (5,35) ...
2.1261243390035816
Training model 515: (5,36) ...
1.9617598869954236
Training model 516: (5,37) ...
2.1334336360159796
Training model 517: (5,38) ...
2.12710463500116
Training model 518: (5,39) ...
2.1216171880078036
Training model 519: (5,40) ...
2.113525378983468
Training model 520: (5,41) ...
2.1247

2.165527231001761
Training model 667: (6,95) ...
2.155392212996958
Training model 668: (6,96) ...
2.177077686996199
Training model 669: (6,97) ...
2.166996404994279
Training model 670: (6,98) ...
2.1706193390127737
Training model 671: (6,99) ...
2.156885884993244
Training model 672: (7,8) ...
2.182766589015955
Training model 673: (7,9) ...
2.213318447000347
Training model 674: (7,10) ...
2.157784854003694
Training model 675: (7,11) ...
2.2299826800008304
Training model 676: (7,12) ...
2.2185140420042444
Training model 677: (7,13) ...
2.298167522007134
Training model 678: (7,14) ...
2.036665003019152
Training model 679: (7,15) ...
2.2443395589943975
Training model 680: (7,16) ...
2.3020763949898537
Training model 681: (7,17) ...
2.106839726999169
Training model 682: (7,18) ...
2.146843901980901
Training model 683: (7,19) ...
2.301623359991936
Training model 684: (7,20) ...
2.2954816249839496
Training model 685: (7,21) ...
2.16541533000418
Training model 686: (7,22) ...
2.316104675002861

2.1262727309949696
Training model 833: (8,78) ...
2.132498671999201
Training model 834: (8,79) ...
2.135138935991563
Training model 835: (8,80) ...
2.127346794004552
Training model 836: (8,81) ...
2.138160061003873
Training model 837: (8,82) ...
2.1323034079978243
Training model 838: (8,83) ...
2.166081731003942
Training model 839: (8,84) ...
2.1206003400147893
Training model 840: (8,85) ...
2.1456715909880586
Training model 841: (8,86) ...
2.1369866220047697
Training model 842: (8,87) ...
2.137655125989113
Training model 843: (8,88) ...
2.1319473600015044
Training model 844: (8,89) ...
2.0171445720188785
Training model 845: (8,90) ...
2.1820588179980405
Training model 846: (8,91) ...
2.221115041000303
Training model 847: (8,92) ...
2.189824269997189
Training model 848: (8,93) ...
2.1775176289957017
Training model 849: (8,94) ...
2.1896705249964725
Training model 850: (8,95) ...
2.1961328590114135
Training model 851: (8,96) ...
2.1568837400118355
Training model 852: (8,97) ...
2.206652

2.319485578977037
Training model 998: (10,64) ...
2.262778962001903
Training model 999: (10,65) ...
2.1169611969962716
Training model 1000: (10,66) ...
2.2140485819836613
Training model 1001: (10,67) ...
2.311675759992795
Training model 1002: (10,68) ...
2.130570080014877
Training model 1003: (10,69) ...
2.116248655016534
Training model 1004: (10,70) ...
2.142624076019274
Training model 1005: (10,71) ...
2.170801943022525
Training model 1006: (10,72) ...
2.1251859089825302
Training model 1007: (10,73) ...
2.109263433027081
Training model 1008: (10,74) ...
2.131285351002589
Training model 1009: (10,75) ...
2.1197918789985124
Training model 1010: (10,76) ...
2.1480113190191332
Training model 1011: (10,77) ...
2.121526397007983
Training model 1012: (10,78) ...
2.1295907509920653
Training model 1013: (10,79) ...
2.1257108130084816
Training model 1014: (10,80) ...
2.12785668799188
Training model 1015: (10,81) ...
2.1190747960063163
Training model 1016: (10,82) ...
2.1263550920120906
Trainin

2.1637623909919057
Training model 1157: (12,48) ...
2.410427394002909
Training model 1158: (12,49) ...
2.911995806993218
Training model 1159: (12,50) ...
2.2635195949987974
Training model 1160: (12,51) ...
2.1943499380140565
Training model 1161: (12,52) ...
2.143553485017037
Training model 1162: (12,53) ...
2.1386664519959595
Training model 1163: (12,54) ...
2.1115075520065147
Training model 1164: (12,55) ...
2.152924524998525
Training model 1165: (12,56) ...
2.147705544019118
Training model 1166: (12,57) ...
2.0124865580000915
Training model 1167: (12,58) ...
2.1299753690254875
Training model 1168: (12,59) ...
2.1468259220127948
Training model 1169: (12,60) ...
2.0916478039871436
Training model 1170: (12,61) ...
2.155371601023944
Training model 1171: (12,62) ...
2.2805510780017357
Training model 1172: (12,63) ...
2.1500392069865484
Training model 1173: (12,64) ...
2.179674244020134
Training model 1174: (12,65) ...
2.1513820529798977
Training model 1175: (12,66) ...
2.1659795139858034


2.1463935729989316
Training model 1316: (14,36) ...
1.9812218170263804
Training model 1317: (14,37) ...
2.189056028990308
Training model 1318: (14,38) ...
2.225873255985789
Training model 1319: (14,39) ...
2.1968962989922147
Training model 1320: (14,40) ...
2.193232732999604
Training model 1321: (14,41) ...
2.0011320849880576
Training model 1322: (14,42) ...
2.1932756129826885
Training model 1323: (14,43) ...
2.55899491297896
Training model 1324: (14,44) ...
3.0340823190053925
Training model 1325: (14,45) ...
2.1379336430109106
Training model 1326: (14,46) ...
2.1401025719824247
Training model 1327: (14,47) ...
1.9888894739851821
Training model 1328: (14,48) ...
2.1449155769951176
Training model 1329: (14,49) ...
2.1394520080066286
Training model 1330: (14,50) ...
2.1392877229955047
Training model 1331: (14,51) ...
2.143240628996864
Training model 1332: (14,52) ...
2.1578737880045082
Training model 1333: (14,53) ...
2.1394130009866785
Training model 1334: (14,54) ...
2.1379487219965085

2.028747468983056
Training model 1475: (16,28) ...
2.1403604979859665
Training model 1476: (16,29) ...
2.224660687992582
Training model 1477: (16,30) ...
2.2169440599973314
Training model 1478: (16,31) ...
2.3284565710055176
Training model 1479: (16,32) ...
2.186337466002442
Training model 1480: (16,33) ...
2.194722804997582
Training model 1481: (16,34) ...
2.1691448880010284
Training model 1482: (16,35) ...
2.1665034200123046
Training model 1483: (16,36) ...
2.185879083990585
Training model 1484: (16,37) ...
2.171383462002268
Training model 1485: (16,38) ...
2.1691327349981293
Training model 1486: (16,39) ...
2.175415511999745
Training model 1487: (16,40) ...
2.176719757000683
Training model 1488: (16,41) ...
2.125096088013379
Training model 1489: (16,42) ...
2.249628933001077
Training model 1490: (16,43) ...
2.753099794994341
Training model 1491: (16,44) ...
2.384080988005735
Training model 1492: (16,45) ...
2.2194804580067284
Training model 1493: (16,46) ...
2.22923041600734
Trainin

2.20505582500482
Training model 1634: (18,24) ...
2.061851829988882
Training model 1635: (18,25) ...
2.200738282990642
Training model 1636: (18,26) ...
2.166549613000825
Training model 1637: (18,27) ...
2.188554952008417
Training model 1638: (18,28) ...
2.1751136030070484
Training model 1639: (18,29) ...
2.209686659014551
Training model 1640: (18,30) ...
2.1712563220062293
Training model 1641: (18,31) ...
2.2204448049888015
Training model 1642: (18,32) ...
2.2266822770179715
Training model 1643: (18,33) ...
2.2236240050115157
Training model 1644: (18,34) ...
2.26291871900321
Training model 1645: (18,35) ...
2.1766070480225608
Training model 1646: (18,36) ...
2.198401539993938
Training model 1647: (18,37) ...
2.1867749159864616
Training model 1648: (18,38) ...
2.316472898994107
Training model 1649: (18,39) ...
2.1785611730010714
Training model 1650: (18,40) ...
2.235601874999702
Training model 1651: (18,41) ...
2.2583734339859802
Training model 1652: (18,42) ...
2.152390211005695
Traini

2.147461567976279
Training model 1794: (20,25) ...
2.1862411060137674
Training model 1795: (20,26) ...
2.1710259079991374
Training model 1796: (20,27) ...
2.1717326329962816
Training model 1797: (20,28) ...
2.1699654550175183
Training model 1798: (20,29) ...
2.163715394010069
Training model 1799: (20,30) ...
2.17180939798709
Training model 1800: (20,31) ...
2.1799733709776774
Training model 1801: (20,32) ...
2.1828669589885976
Training model 1802: (20,33) ...
2.169737588003045
Training model 1803: (20,34) ...
2.017398937023245
Training model 1804: (20,35) ...
2.057833128987113
Training model 1805: (20,36) ...
2.1758614029968157
Training model 1806: (20,37) ...
2.1805927489767782
Training model 1807: (20,38) ...
2.180203762021847
Training model 1808: (20,39) ...
2.191424704011297
Training model 1809: (20,40) ...
2.1771993539878167
Training model 1810: (20,41) ...
2.2029927159892395
Training model 1811: (20,42) ...
2.2209898410073947
Training model 1812: (20,43) ...
2.250595510995481
Tra

In [None]:
pickle.dump( pairwise_linsvc_ovo, open( "pairwise_linsvc_ovo_betas.p", "wb" ) )

In [117]:
def pairwise_misclassification_error_predict(clfs, X_train, X_test, y_test, kernel):
    n_test = len(X_test)
    y_pred = np.zeros(n_test)
    y_vals = np.zeros(n_test)
    
    for i in range(n_test):
        y_pred[i] = majority_vote(list(map(
            lambda c: c[0] 
                if np.sign(np.dot(kernel(
                    subset_data(c[0], c[1], X_train, y_train)[0], X_test[i, :]
                ).reshape(-1), clfs[c])) == 1 
                else c[1], clfs)))
    
    if y_test is not None:
        return np.mean(y_pred != y_test)
    else:
        return y_pred

In [None]:
pairwise_misclassification_error_predict(pairwise_linsvc_ovo, X_train, X_val, y_val, gram_linear)

In [None]:
y_test_preds_linsvc_ovo = \
    pairwise_misclassification_error_predict(pairwise_linsvc_ovo, X_train, X_test, None, gram_linear)

# Write to CSV for Kaggle submission
pd.DataFrame({'Category':y_test_preds_linsvc_ovo}).reset_index()\
             .rename(columns={'index':'Id'}).to_csv('./comp2-subm_linsvc_ovo.csv', index=False)

Interestingly, I was able to train my one-vs-one SVM classifier in a reasonable amount of time (with the appropriate max_iteration parameters). However, I got stuck with making predicitions. At the point of writing, I've been running the prediction function for over 2 days and unfortunately I'm out of time to finish running it. 

Analyzing the computational complexity of my prediction function, it appears it should be O(n_test \* C) where C is the number of OvO classifiers = $\frac{k(k-1)}{2}$ where k is the number of classes. Thus this function runs at roughly O(n*$k^2$). 

To the best of my knowledge, all classifiers need to make predictions since we're taking a majority vote here for each prediction. Upon reflection perhaps my majority vote code could also be vectorized/optimized. I find the slowdown occurs when finding the random choice between tied codes. If we simply predicted the minimum mode of each prediction group, my code would run a lot faster.

### 7: Training with my implementation of linear SVM (one-vs-rest)

In [None]:
# Initialize things
classes = 100
n, d = X_train_subset.shape
lam = 1
beta_init = np.zeros(n)
theta_init = np.zeros(n)
maxiter = 10
linsvc_ovr = dict()
t = 0

for i in range(classes):
    start = timer()
    print('Training model '+str(i)+'...')
    beta_list = list()
    rest_of_classes = list(np.delete(np.arange(classes), i))
    X_train_subset, y_train_subset = subset_data(i, rest_of_classes, X_train, y_train)
    K = gram_linear(X_train_subset, X_train_subset)
    eta_init = initstepsize(K, lam)

    beta_list = fastgradalgo(beta_init, theta_init, K, y_train_subset, lam, eta_init, maxiter, eps=1e-1)
    beta_T = beta_list[len(beta_list)-1]

    linsvc_ovr[i] = beta_T

    end = timer()
    print(end - start)
    t += 1

In [None]:
def ovr_misclassification_error_predict(clfs, X_train, X_test, y_test, kernel):
    n_test = len(X_test)
    y_pred = np.zeros(n_test)
    y_vals = np.zeros(n_test)
    
    for i in range(n_test):
        y_pred[i] = majority_vote(list(map(
            lambda c: c 
                if np.dot(kernel(
                    subset_data(c, list(np.delete(np.arange(classes), c)), X_train, y_train)[0], X_test[i, :]
                ).reshape(-1), clfs[c]) > 0 
                else -1, clfs)))
    
    if y_test is not None:
        return np.mean(y_pred != y_test)
    else:
        return y_pred

In [None]:
ovr_misclassification_error_predict(linsvc_ovr, X_train, X_val, y_val, gram_linear)

In [None]:
y_test_preds_linsvc_ovr = \
    pairwise_misclassification_error_predict(pairwise_linsvc_ovr, X_train, X_test, None, gram_linear)

# Write to CSV for Kaggle submission
pd.DataFrame({'Category':y_test_preds_linsvc_ovo}).reset_index()\
             .rename(columns={'index':'Id'}).to_csv('./comp2-subm_linsvc_ovo.csv', index=False)

The above code runs my implementation of One-vs-Rest SVM classifier. As mentioned above due to time constraints I wasn't able to complete the running of this code. The classifier trains k models (where k is the number of classes) where each class is selected as the positive class once and the rest of the classes are treated as negative.

Come prediction time, each model will make a prediction: the selected prediction will be the class predicted to be the furthest away from the decision boundary (most positive value). If the prediction is negative for a particular classifier than no prediction is made for that particular classifier.

### Conclusion

In my experience doing this data competition project, it was interesting to me to see that OvO classifiers seemed to perform better than OvR classifiers. I reserve the possibility that I could have been doing something wrong, but after considering the two approaches more, it does make sense. 

One weakness of OvR classifiers is when the positive class is not predicted. Given a multi-class classification problem with many classes, the negative class predictions are essentially useless (it appears). For OvO, many negative class predictions may be wrong, but at least that specific pairwise OvO classifier is doing it's prescribed job at comparing one class vs the other. 

One tradeoff comes in terms of computational complexity. There needs to be more models trained for OvO classifiers (k*(k-1)/2, where as OvR only needs k classifiers. However, each OvO classifier only considers 2/k fraction of the dataset, while each OvR classifier trains on the whole dataset k times. Perhaps the relative values of k and n should give you clues as to which approach is best.