In [1]:
import numpy as np
import time
time_start = time.time()
X = np.genfromtxt('00_Data/train_X_dog_cat.csv', delimiter=',')
y = np.genfromtxt('00_Data/train_y_dog_cat.csv', delimiter=',')
print(X.shape)

n = X.shape[0]
d = X.shape[1]
print(y.shape)
time_end = time.time()
print("Done loading training data, using time:(min)", (time_end-time_start)/60)

(1953, 3072)
(1953,)
Done loading training data, using time:(min) 0.06249371767044067


In [2]:
X_test = np.genfromtxt('00_Data/test_X_dog_cat.csv', delimiter=',')
y_test = np.genfromtxt('00_Data/test_y_dog_cat.csv', delimiter=',')
m = X_test.shape[0]
print(m)

2000


In [6]:
from sklearn import preprocessing
scaler = preprocessing.MinMaxScaler().fit(X)
X = scaler.transform(X)
X_test = scaler.transform(X_test)


In [3]:
def linear_kernel(x1,x2):
    # x1 = (n,d) X2 = (m,d)
    return(np.dot(x1,x2.T))

def inhomogeneous_poly_kernel(x1,x2):
    n = x1.shape[0]
    m = x2.shape[0]
    return(np.power(np.ones(shape=(n,m)) + np.dot(x1,x2.T), 5))

def gaussian_kernel(x1,x2):
    from sklearn.gaussian_process.kernels import RBF
    return(RBF()(x1,x2))

In [13]:
def evaluation(ans, y):
    right = 0
    num = ans.shape[0]
    for i in range(ans.shape[0]):
        if (ans[i] - 0.5)*y[i] > 0:
            right += 1
    return(right/float(num))

In [54]:
def tolerable(delta,tol):
    from numpy import linalg as LA
    if LA.norm(delta,1) > tol:
        return False
    else:
        return True

In [51]:
def probability(alpha, Ki):
    s = np.dot(alpha, Ki)
    return(1/(1+np.exp(-s)))

def train(K,y,max_iter,regu_para, eta,tol):
    time_start = time.time()
    #alpha = np.random.uniform(size=(n))
    alpha = np.zeros(n)
    for t in range(max_iter):
        samples = np.random.permutation(n)[:batch_size]
        g = regu_para * alpha
        #g = np.zeros(n)
        for i in samples:
            pi = probability(alpha, K[i])
            g[i] = g[i] + (pi - (y[i]+1)/2.0)
        alpha = alpha - eta*g
        if tolerable(g, tol):
            break
        if t % 100 == 0:
            print("============== %d =================" % t)
            print("alpha:", alpha)
            print("train_accuracy:",evaluation(probability(alpha,K), y))
    time_end = time.time()
    print("Done training, using time:(min)", 
          (time_end-time_start)/60)
    return(alpha)

In [52]:
def experiment(kernel, X,y,X_test,y_test,regu_para, eta, max_iter=50000, batch_size=640):
    K = kernel(X,X_scaled)
    K_test = kernel(X,X_test)
    regu_para = 2.0
    eta = 0.001
    tol = 0.01
    alpha = train(K,y, max_iter,regu_para, eta,tol)
    print("Done training")
    print(alpha)
    print("training result")
    ans = probability(alpha,K)
    print(ans)
    print(evaluation(ans, y))
    print("test result")
    ans = probability(alpha,K_test)
    print(ans)
    print(evaluation(ans, y_test))

In [None]:
experiment(linear_kernel, X, y, X_test, y_test, regu_para=0, eta=0.01)

alpha: [-0.0005  0.      0.     ...  0.0005  0.      0.    ]
train_accuracy: 0.5202252944188428
alpha: [-0.00293835 -0.01973765 -0.02821809 ...  0.02570185  0.02077341
  0.02998518]
train_accuracy: 0.5192012288786483
alpha: [-0.00241005 -0.04428172 -0.04264285 ...  0.03898995  0.047769
  0.04913684]
train_accuracy: 0.5263696876600102
alpha: [-0.00433871 -0.06089966 -0.05332393 ...  0.05579073  0.06266677
  0.06474206]
train_accuracy: 0.5335381464413722
alpha: [-0.00961388 -0.07067441 -0.06669776 ...  0.06919193  0.07117133
  0.07665978]
train_accuracy: 0.5340501792114696
alpha: [-0.0184963  -0.07843941 -0.06418814 ...  0.0781083   0.08345807
  0.08536764]
train_accuracy: 0.5325140809011777
alpha: [-0.03844859 -0.08768363 -0.05362499 ...  0.08865056  0.08902669
  0.092448  ]
train_accuracy: 0.5376344086021505
alpha: [-0.04278443 -0.0959061  -0.04731228 ...  0.09623844  0.0925329
  0.09551301]
train_accuracy: 0.5340501792114696
alpha: [-0.04596004 -0.09415752 -0.04026316 ...  0.10261544 

alpha: [-0.10253423 -0.12938155 -0.0138437  ...  0.12609943  0.12652153
  0.12727076]
train_accuracy: 0.5325140809011777
alpha: [-0.10089706 -0.12493997 -0.01307299 ...  0.12583842  0.12372472
  0.124781  ]
train_accuracy: 0.5391705069124424
alpha: [-0.10782701 -0.12424716 -0.01406561 ...  0.12300266  0.12632963
  0.12295534]
train_accuracy: 0.5350742447516641
alpha: [-0.11048977 -0.12545961 -0.01327533 ...  0.12139624  0.12711176
  0.12308882]
train_accuracy: 0.5391705069124424
alpha: [-0.11418101 -0.12462369 -0.01394955 ...  0.11857926  0.13120349
  0.13151393]
train_accuracy: 0.5386584741423451
alpha: [-0.11082922 -0.12184925 -0.01448748 ...  0.12795911  0.12924247
  0.12841681]
train_accuracy: 0.540194572452637
alpha: [-0.10322647 -0.12695306 -0.01530629 ...  0.13481438  0.13017189
  0.13134367]
train_accuracy: 0.5345622119815668
alpha: [-0.1028038  -0.13241424 -0.01716648 ...  0.12837321  0.13288658
  0.13221835]
train_accuracy: 0.5320020481310804
alpha: [-0.10575285 -0.12861506 -

alpha: [-0.09955536 -0.12695932 -0.01129969 ...  0.12229618  0.13019712
  0.12967542]
train_accuracy: 0.5335381464413722
alpha: [-0.09939014 -0.13008089 -0.01058151 ...  0.12575908  0.13295767
  0.12427578]
train_accuracy: 0.5376344086021505
alpha: [-0.09734709 -0.12561864 -0.01189215 ...  0.1264432   0.13170669
  0.12715984]
train_accuracy: 0.5325140809011777
alpha: [-0.09862133 -0.12374726 -0.01012151 ...  0.12541717  0.12475082
  0.12320029]
train_accuracy: 0.5325140809011777
alpha: [-0.09391478 -0.12105324 -0.01104982 ...  0.12886057  0.12469652
  0.12831274]
train_accuracy: 0.5309779825908858
alpha: [-0.09141415 -0.11913174 -0.01067706 ...  0.13007841  0.12560884
  0.12660903]
train_accuracy: 0.5355862775217614
alpha: [-0.08957041 -0.12124046 -0.00874189 ...  0.12711286  0.1235955
  0.12644173]
train_accuracy: 0.5320020481310804
alpha: [-0.0867209  -0.12333795 -0.00797058 ...  0.12866282  0.12810272
  0.12988625]
train_accuracy: 0.5314900153609831
alpha: [-0.08557327 -0.1253814  -

alpha: [-0.10898789 -0.12042671 -0.01908792 ...  0.12343904  0.12850635
  0.12163205]
train_accuracy: 0.5386584741423451
alpha: [-0.10456394 -0.1211214  -0.01643863 ...  0.12181669  0.12921355
  0.11655786]
train_accuracy: 0.5314900153609831
alpha: [-0.10626083 -0.12182112 -0.01530653 ...  0.12044623  0.13199186
  0.11907207]
train_accuracy: 0.5442908346134152
alpha: [-0.10927629 -0.1272854  -0.01333344 ...  0.12561222  0.12744388
  0.12023341]
train_accuracy: 0.5325140809011777
alpha: [-0.10360264 -0.1298948  -0.01753026 ...  0.12267718  0.12642235
  0.12330488]
train_accuracy: 0.5391705069124424
alpha: [-0.10555836 -0.12935124 -0.01705761 ...  0.12430114  0.12883138
  0.1209591 ]
train_accuracy: 0.5407066052227343
alpha: [-0.10359589 -0.12844891 -0.01599506 ...  0.12276366  0.12988732
  0.12059924]
train_accuracy: 0.5391705069124424
alpha: [-0.10871727 -0.12912343 -0.0140841  ...  0.12245966  0.13199165
  0.12117141]
train_accuracy: 0.5325140809011777
alpha: [-0.1066361  -0.12965014 

alpha: [-0.09026024 -0.12964952 -0.02258646 ...  0.13703475  0.13315284
  0.1338966 ]
train_accuracy: 0.5320020481310804
alpha: [-0.09124538 -0.12881176 -0.02034472 ...  0.13471277  0.12603086
  0.12959058]
train_accuracy: 0.5381464413722479
alpha: [-0.09330101 -0.1301107  -0.01762704 ...  0.13106964  0.12452202
  0.12837664]
train_accuracy: 0.5386584741423451
alpha: [-0.09156049 -0.13371182 -0.01605198 ...  0.13112297  0.12454831
  0.12700951]
train_accuracy: 0.5320020481310804
alpha: [-0.09393745 -0.129206   -0.01604924 ...  0.13064973  0.1267329
  0.12568493]
train_accuracy: 0.5386584741423451
alpha: [-0.09303855 -0.13405934 -0.01423931 ...  0.12694736  0.12155197
  0.12484883]
train_accuracy: 0.5391705069124424
alpha: [-0.0929881  -0.14169017 -0.01334946 ...  0.12752421  0.11927139
  0.12686666]
train_accuracy: 0.5320020481310804
alpha: [-0.09578618 -0.13862856 -0.01366847 ...  0.13005241  0.11675105
  0.12341326]
train_accuracy: 0.536610343061956
alpha: [-0.0943848  -0.13809793 -0

alpha: [-0.10838104 -0.11958886 -0.0098364  ...  0.11877331  0.12587034
  0.12730034]
train_accuracy: 0.5309779825908858
alpha: [-0.10350444 -0.1223344  -0.01151331 ...  0.11815452  0.12670544
  0.13150842]
train_accuracy: 0.5355862775217614
alpha: [-0.10646023 -0.12734068 -0.0145004  ...  0.12484535  0.12615213
  0.12831649]
train_accuracy: 0.5386584741423451
alpha: [-0.10541697 -0.12787981 -0.01315401 ...  0.12719413  0.12203206
  0.12686725]
train_accuracy: 0.5396825396825397
alpha: [-0.10445145 -0.12390456 -0.0116827  ...  0.12777312  0.12448453
  0.1271876 ]
train_accuracy: 0.5355862775217614
alpha: [-0.10497155 -0.12566253 -0.01125467 ...  0.12762636  0.12189525
  0.12676199]
train_accuracy: 0.5320020481310804
alpha: [-0.10053743 -0.12321555 -0.009447   ...  0.12479359  0.12055487
  0.12476112]
train_accuracy: 0.5350742447516641
alpha: [-0.09252947 -0.12086278 -0.00824595 ...  0.13294953  0.12429705
  0.12818016]
train_accuracy: 0.5386584741423451
alpha: [-0.09110038 -0.12462629 

alpha: [-0.09302129 -0.13583436 -0.01119076 ...  0.12526169  0.12444984
  0.12477511]
train_accuracy: 0.5442908346134152
alpha: [-0.09886917 -0.13369374 -0.01055298 ...  0.12232334  0.12784821
  0.12385331]
train_accuracy: 0.5314900153609831
alpha: [-0.09672409 -0.13390055 -0.00949427 ...  0.12561464  0.12916486
  0.12423278]
train_accuracy: 0.5427547363031234
alpha: [-0.0987142  -0.1333417  -0.00779848 ...  0.12070196  0.1332375
  0.12178588]
train_accuracy: 0.5386584741423451
alpha: [-0.09661892 -0.12809832 -0.01123013 ...  0.11884619  0.12830845
  0.12420751]
train_accuracy: 0.5386584741423451
alpha: [-0.10008463 -0.12819002 -0.01662073 ...  0.1180798   0.13043777
  0.12513898]
train_accuracy: 0.5350742447516641
alpha: [-0.09943041 -0.1284141  -0.01863891 ...  0.11728864  0.13397903
  0.12590714]
train_accuracy: 0.5335381464413722
alpha: [-0.10286494 -0.1276896  -0.01913764 ...  0.11749114  0.13286621
  0.12116415]
train_accuracy: 0.5371223758320532
alpha: [-0.09794263 -0.12513718 -