In [1]:
import scipy.io as sio
import numpy as np
import numpy.random as nr
from sklearn import preprocessing

In [2]:
train_data = sio.loadmat('./dataset/train.mat')
test_data = sio.loadmat('./dataset/test.mat')
tr_img = train_data['train_images']
tr_lb_num = train_data['train_labels'][:,0]
tr_img = np.reshape(tr_img, (784, 60000), order = 'F')
ts_img = np.reshape(np.transpose(test['test_images']), (784, 10000), order = 'F')
all_img = np.append(tr_img, ts_img, axis = 1)

In [50]:
tr_lb_num = train_data['train_labels'][:,0]

In [3]:
temp_lb = np.zeros((len(tr_lb),10))

for i in np.arange(len(tr_lb)):
    j = tr_lb[i]
    temp_lb[i,j] = 1

tr_lb = temp_lb

In [4]:
all_img = all_img.astype(float) # change data type from int to float, in order to 
                                # facilitate calculations for standardization
all_img = preprocessing.scale(all_img, axis = 1)
ts_img = all_img[:, 60000:70000]
tr_img = all_img[:, 0:60000]
valid_ind = nr.choice(60000, 10000, replace = False)
vd_img = tr_img[: , valid_ind]
tr_img = tr_img[:, np.setdiff1d(np.arange(60000), valid_ind)]
vd_lb = tr_lb[valid_ind, :]
tr_lb = tr_lb[np.setdiff1d(np.arange(60000), valid_ind), :]

In [61]:
vd_lb_num = tr_lb_num[valid_ind]
tr_lb_num = tr_lb_num[np.setdiff1d(np.arange(60000), valid_ind)]

In [5]:
nr.seed(0)
n_in = 784
n_hid = 200
n_out = 10
epsilon = 0.01
V0 = nr.normal(scale = epsilon, size = (n_hid, n_in + 1))
W0 = nr.normal(scale = epsilon, size = (n_out, n_hid + 1))

In [6]:
def add_bias(mat):
    if len(mat.shape) > 1:
        ncol = mat.shape[1]
        temp = np.array([[1.0 for j in range(ncol)]])
        mat = np.append(mat, temp, axis = 0)
    else:
        mat = np.append(mat, [1.0], axis = 0)
    return mat

tr_img = add_bias(tr_img) # add one column of 1's to all data
tr_img.shape




(785, 50000)

In [57]:
def mean_squared(true, pred):
    temp = np.square(true - pred)
    err = np.sum(temp)
    return err/2

def cross_entropy(true, pred):
    n,k = true.shape
    ind = (true == 1)
    temp = np.sum(true[ind] * np.log(pred[ind])) + np.sum((1-true[~ind]) * np.log(1-pred[~ind]))
    err = - temp
    return err


   
def sigmoid_stb(mat):
    # Numerically-stable sigmoid function
    
    ind = (mat >= 0)
    temp = np.zeros(mat.shape)
    temp[ind] = 1/(1+np.exp(-mat[ind]))
    z =np.exp(mat[~ind])
    temp[~ind] = z /(1+z)
    
    return temp


def sigmoid(mat):
    temp = 1/(1+ np.exp(- mat))
    return temp

def predict( V,W, img):
    temp = np.dot(V, img)
    hidden = np.tanh(temp)
    hidden = add_bias(hidden)
    temp = np.dot(W, hidden)
    return np.transpose(sigmoid_stb(temp))

def calculate_loss(V,W, img, true_label, loss_fun):
    pred_label = predict(V, W, img)
    loss = loss_fun(true_label, pred_label)
    return loss

def misclassification(V,W,img, true_num):
    pred = predict(V,W,img)
    pred_num = np.argmax(pred, axis = 1)
    return np.sum(pred_num != true_num)/len(true_num)
    


In [40]:
def find_gradient(V,W, i, loss_fun_name):
    
    x = tr_img[:,i:i+1]
    y = np.transpose(tr_lb[i:i+1,:])
    A = np.dot(V, x)
    B = np.tanh(A)
    B_bias = add_bias(B)
    C = np.dot(W, B_bias)
    D = sigmoid_stb(C)

    I = np.identity(n_out)
    if loss_fun_name == 'mean_squared':
        dEdD = - y + D
    elif loss_fun_name == 'cross_entropy':
        ind = (y == 1)
        temp = np.zeros(y.shape)
        temp[ind] = - y[ind]/D[ind]
        temp[~ind] = (1-y[~ind])/(1-D[~ind])
        dEdD = temp
        

    dDdC = np.multiply(D, 1-D)
    dCdB = np.transpose(W[:, :n_hid])
    dBdA = 1- np.square(B)
    
    dEdC = np.multiply(dDdC, dEdD)
    dEdB = np.dot(dCdB, dEdC)
    dEdA = np.multiply(dBdA, dEdB)
    dEdV = np.array([])
    dEdW = np.array([])
 
    
    dAdV = x[:,0]
    dEdV = np.outer(dEdA[:,0], dAdV)

    dCdW = B_bias[:,0]
    dEdW = np.outer(dEdC[:,0], dCdW)

        
    return np.concatenate((dEdV.ravel('F'), dEdW.ravel('F')))


def column_to_matrix(vec, r, c):
    return vec.reshape((r,c), order = 'F')
    
def matrix_to_column(mat):
    return mat.ravel(order = 'F')
    

In [9]:
b = find_gradient(V0, W0, 2, 'cross_entropy')
def numerical_gradient(V, W, i, eps = 1e-8):
    grad = np.concatenate((matrix_to_column(V), matrix_to_column(W)))

    num_grad = np.zeros(grad.shape)
    temp = grad
    for j in range(len(grad)):
        
        temp[j] = grad[j] + eps
        V_temp = column_to_matrix(temp[0:n_hid*(n_in + 1)], n_hid, n_in + 1)
        W_temp = column_to_matrix(temp[n_hid*(n_in + 1):], n_out, n_hid + 1)
        loss1 = calculate_loss(V_temp,W_temp, tr_img[:, i:i+1], tr_lb[i:i+1,:], mean_squared)
        

        temp[j] = grad[j] - 2 * eps
        V_temp = column_to_matrix(temp[0:n_hid*(n_in + 1)], n_hid, n_in + 1)
        W_temp = column_to_matrix(temp[n_hid*(n_in + 1):], n_out, n_hid + 1)
        loss2 = calculate_loss(V_temp,W_temp, tr_img[:, i:i+1],tr_lb[i:i+1,:], mean_squared)
        
        num_grad[j] = (loss1-loss2)/(2*eps)
        temp = grad
        
    return num_grad
        
    
def numerical_gradient_cross_entropy(V, W, i, eps = 1e-8):
    grad = np.concatenate((matrix_to_column(V), matrix_to_column(W)))

    num_grad = np.zeros(grad.shape)
    temp = grad
    for j in range(len(grad)):
        
        temp[j] = grad[j] + eps
        V_temp = column_to_matrix(temp[0:n_hid*(n_in + 1)], n_hid, n_in + 1)
        W_temp = column_to_matrix(temp[n_hid*(n_in + 1):], n_out, n_hid + 1)
        loss1 = calculate_loss(V_temp,W_temp, tr_img[:, i:i+1], tr_lb[i:i+1,:], cross_entropy)
        

        temp[j] = grad[j] - 2 * eps
        V_temp = column_to_matrix(temp[0:n_hid*(n_in + 1)], n_hid, n_in + 1)
        W_temp = column_to_matrix(temp[n_hid*(n_in + 1):], n_out, n_hid + 1)
        loss2 = calculate_loss(V_temp,W_temp, tr_img[:, i:i+1],tr_lb[i:i+1,:], cross_entropy)
        
        num_grad[j] = (loss1-loss2)/(2*eps)
        temp = grad
        
    return num_grad
        
num_grad_cross = numerical_gradient_cross_entropy(V0, W0, 2)

In [41]:
def train(V, W, img, true, index, start, end, step):
    n = len(true)
    epoch = 0
    step = 0.01
    V_temp = V
    W_temp = W
    
    VW_temp = np.concatenate((matrix_to_column(V), matrix_to_column(W)))
    
    for i in index[start * 1000: end * 1000]:
        grad = find_gradient(V_temp, W_temp, i, 'mean_squared')
        VW_temp = VW_temp - step * grad
        V_temp = column_to_matrix(VW_temp[0:n_hid*(n_in + 1)], n_hid, n_in + 1)
        W_temp = column_to_matrix(VW_temp[n_hid*(n_in + 1):], n_out, n_hid + 1)


    return (V_temp, W_temp)
            

In [25]:
V_temp = V0
W_temp = W0
ind = nr.choice(50000,50000, replace = False)
loss = []
for j in range(50):
    V_temp, W_temp = train(V_temp, W_temp, tr_img, tr_lb, ind, j, j+1, 0.01)
    loss_temp = calculate_loss(V_temp, W_temp, tr_img, tr_lb, mean_squared)
    print('j:', loss_temp)
    loss = np.append(loss, [loss_temp])

j: 24966.316093
j: 15453.2102276
j: 11989.0813818
j: 10185.7701805
j: 8978.12291982
j: 8245.09613124
j: 7885.60807255
j: 7342.83743417
j: 7037.11591919
j: 6755.192499
j: 6467.3166437
j: 6382.57642781
j: 6144.14366289
j: 5946.6077592
j: 5819.33277965
j: 5718.52110709
j: 5615.30886097
j: 5503.18867482
j: 5371.45580887
j: 5284.95304697
j: 5114.5221942
j: 5080.11327407
j: 4941.17846776
j: 4860.36049486
j: 4819.44926474
j: 4752.25213291
j: 4658.20909661
j: 4542.1796784
j: 4535.81037602
j: 4486.79010302
j: 4474.27184489
j: 4325.49886134
j: 4259.41465115
j: 4227.78780027
j: 4221.59135082
j: 4132.10437112
j: 4090.04372243
j: 4054.66798276
j: 3977.11561183
j: 3941.15649738
j: 3883.10343628
j: 3818.72103135
j: 3808.81525277
j: 3739.33910793
j: 3699.68808487
j: 3690.53221229
j: 3653.44797654
j: 3631.96808956
j: 3590.59149197
j: 3565.3734661


In [33]:
V1, W1 = (V_temp, W_temp)
loss1 = loss

In [42]:
V_temp = V1
W_temp = W1
loss = loss1
ind = nr.choice(50000,50000, replace = False)
for j in range(50):
    V_temp, W_temp = train(V_temp, W_temp, tr_img, tr_lb, ind, j, j+1, 0.01)
    loss_temp = calculate_loss(V_temp, W_temp, tr_img, tr_lb, mean_squared)
    print(j, ':', loss_temp)
    loss = np.append(loss, [loss_temp])

0 : 3482.50340147
1 : 3495.15003139
2 : 3450.82598257
3 : 3392.19072646
4 : 3364.5324147
5 : 3403.47574984
6 : 3329.01380836
7 : 3364.22500559
8 : 3266.16708432
9 : 3220.09844226
10 : 3263.36108398
11 : 3210.88174484
12 : 3198.97333064
13 : 3199.22419166
14 : 3112.79356454
15 : 3090.55871914
16 : 3062.12539353
17 : 3056.5541814
18 : 3074.4045549
19 : 2995.7324759
20 : 2966.40387636
21 : 2964.04605217
22 : 2916.13944754
23 : 2914.08895153
24 : 2903.18423858
25 : 2909.64219573
26 : 2870.92678926
27 : 2848.57650209
28 : 2832.22020295
29 : 2785.78191992
30 : 2746.77456287
31 : 2737.6700142
32 : 2730.8132854
33 : 2672.57316946
34 : 2684.19814136
35 : 2687.88173154
36 : 2672.41222466
37 : 2686.08716164
38 : 2630.49499685
39 : 2595.81484022
40 : 2567.37241406
41 : 2578.9926902
42 : 2629.8276593
43 : 2513.95858318
44 : 2501.51088436
45 : 2474.27796892
46 : 2464.89356759
47 : 2450.13990008
48 : 2457.70647508
49 : 2445.78847474


In [43]:
V2, W2, loss2 = (V_temp, W_temp, loss)

In [48]:
V_temp = V2
W_temp = W2
loss = loss2
ind = nr.choice(50000,50000, replace = False)
for j in range(50):
    V_temp, W_temp = train(V_temp, W_temp, tr_img, tr_lb, ind, j, j+1, 0.01* 0.5)
    loss_temp = calculate_loss(V_temp, W_temp, tr_img, tr_lb, mean_squared)
    print(j, ':', loss_temp)
    loss = np.append(loss, [loss_temp])

0 : 2404.68453593
1 : 2414.10472024
2 : 2395.40974939
3 : 2388.63919324
4 : 2381.40142433
5 : 2361.33871954
6 : 2344.60153391
7 : 2314.58416826
8 : 2307.69837703
9 : 2287.17792633
10 : 2315.22488132
11 : 2283.19774372
12 : 2276.61358051
13 : 2296.60223352
14 : 2243.68188967
15 : 2228.16535221
16 : 2211.29847239
17 : 2240.84071695
18 : 2195.9516599
19 : 2158.88352495
20 : 2182.40723677
21 : 2203.87549129
22 : 2160.41003934
23 : 2141.86605859
24 : 2128.29953105
25 : 2115.19394689
26 : 2099.34571958
27 : 2071.27256968
28 : 2053.42078225
29 : 2097.48912955
30 : 2058.00264998
31 : 2090.28104336
32 : 2020.69997339
33 : 2026.06937488
34 : 1997.25095899
35 : 1998.12162228
36 : 2001.55962089
37 : 2004.22421538
38 : 1996.72308005
39 : 1973.16810813
40 : 1922.56268248
41 : 1947.71804508
42 : 1922.12840947
43 : 1910.47591072
44 : 1914.37749385
45 : 1897.908399
46 : 1898.1954439
47 : 1885.7634308
48 : 1865.0280181
49 : 1866.76751866


In [49]:
V3, W3, loss3 = V_temp, W_temp, loss

In [63]:
V_temp = V3
W_temp = W3
loss = loss3
ind = nr.choice(50000,50000, replace = False)
misclass = []
for j in range(50):
    V_temp, W_temp = train(V_temp, W_temp, tr_img, tr_lb, ind, j, j+1, 0.01* 0.5)
    loss_temp = calculate_loss(V_temp, W_temp, tr_img, tr_lb, mean_squared)
    mis_temp = misclassification(V_temp, W_temp, tr_img, tr_lb_num)
    print(j, ':', loss_temp, mis_temp)
    loss = np.append(loss, [loss_temp])
    misclass = np.append(misclass, [mis_temp])

0 : 1865.27365071 0.04034
1 : 1843.56006603 0.04004
2 : 1816.34007496 0.03916
3 : 1816.64658353 0.03948
4 : 1800.45610563 0.0393
5 : 1817.89330066 0.03966
6 : 1789.39547419 0.0395
7 : 1782.92041035 0.03822
8 : 1787.09112761 0.03874
9 : 1790.65311702 0.03912
10 : 1740.8299757 0.0378
11 : 1760.57873694 0.03822
12 : 1729.74045768 0.03818
13 : 1737.67724543 0.0382
14 : 1745.30903002 0.03826
15 : 1718.49068021 0.03778
16 : 1711.06000332 0.0378
17 : 1702.82843488 0.03694
18 : 1689.91238345 0.03614
19 : 1705.23463846 0.0366
20 : 1686.29455482 0.036
21 : 1702.21048772 0.03664
22 : 1668.15356592 0.03556
23 : 1646.85848715 0.0354
24 : 1671.92604151 0.03544
25 : 1671.77222462 0.03592
26 : 1635.68155738 0.03548
27 : 1625.46773148 0.03478
28 : 1648.25935393 0.035
29 : 1620.99953138 0.03452
30 : 1617.45774936 0.03466
31 : 1629.58265688 0.03468
32 : 1622.77337998 0.03468
33 : 1590.57466702 0.03408
34 : 1576.77201905 0.03368
35 : 1581.47831439 0.03402
36 : 1564.88633807 0.03306
37 : 1565.84968458 0.03

In [64]:
V4, W4, loss4, misclass4 = V_temp, W_temp, loss, misclass

In [65]:
V_temp = V4
W_temp = W4
loss = loss4
ind = nr.choice(50000,50000, replace = False)
misclass = misclass4
for j in range(50):
    V_temp, W_temp = train(V_temp, W_temp, tr_img, tr_lb, ind, j, j+1, 0.01* 0.5*0.5)
    loss_temp = calculate_loss(V_temp, W_temp, tr_img, tr_lb, mean_squared)
    mis_temp = misclassification(V_temp, W_temp, tr_img, tr_lb_num)
    print(j, ':', loss_temp, mis_temp)
    loss = np.append(loss, [loss_temp])
    misclass = np.append(misclass, [mis_temp])

0 : 1477.22684578 0.03158
1 : 1454.66709448 0.03116
2 : 1451.02337926 0.03118
3 : 1451.00399606 0.03076
4 : 1440.45450582 0.03088
5 : 1435.68963056 0.03084
6 : 1440.24534517 0.0307
7 : 1436.49018962 0.03048
8 : 1421.22736444 0.03032
9 : 1430.96078307 0.0308
10 : 1417.23381028 0.02946
11 : 1409.52268003 0.02984
12 : 1437.69329207 0.02996
13 : 1415.17779053 0.02976
14 : 1393.95140293 0.02938
15 : 1378.73705951 0.02914
16 : 1395.36520458 0.02976
17 : 1376.31973209 0.02836
18 : 1371.07703121 0.02884
19 : 1369.127901 0.02872
20 : 1370.91032191 0.02922
21 : 1376.36072916 0.02882
22 : 1365.92313465 0.02848
23 : 1371.90336452 0.02832
24 : 1362.08731051 0.02842
25 : 1329.62833917 0.02792
26 : 1320.92783885 0.02768
27 : 1310.70938353 0.02722
28 : 1331.00898196 0.02766
29 : 1340.20347457 0.02746
30 : 1301.19162495 0.02688
31 : 1304.64394016 0.02696
32 : 1289.15489999 0.02676
33 : 1334.47254894 0.02784
34 : 1310.35759923 0.0273
35 : 1288.02637252 0.0267
36 : 1289.56030362 0.02674
37 : 1271.9387721

In [66]:
V5, W5, loss5, misclass5 = V_temp, W_temp, loss, misclass

In [69]:
V_temp = V5
W_temp = W5
loss = loss5
ind = nr.choice(50000,50000, replace = False)
misclass = misclass5
for j in range(50):
    V_temp, W_temp = train(V_temp, W_temp, tr_img, tr_lb, ind, j, j+1, 0.01* 0.5*0.5)
    loss_temp = calculate_loss(V_temp, W_temp, tr_img, tr_lb, mean_squared)
    mis_temp = misclassification(V_temp, W_temp, tr_img, tr_lb_num)
    print(j, ':', loss_temp, mis_temp)
    loss = np.append(loss, [loss_temp])
    misclass = np.append(misclass, [mis_temp])

0 : 1243.0284382 0.02576
1 : 1203.58381855 0.02518
2 : 1215.24498653 0.02486
3 : 1205.40067591 0.02484
4 : 1183.22876294 0.02444
5 : 1176.26337262 0.02452
6 : 1197.06687547 0.0245
7 : 1195.29209815 0.02476
8 : 1187.76525418 0.02492
9 : 1172.20755547 0.02458
10 : 1172.53979677 0.02462
11 : 1155.57179811 0.02412
12 : 1170.04381481 0.02372
13 : 1161.86660723 0.02394
14 : 1158.03869369 0.024
15 : 1154.37993893 0.02338
16 : 1134.58420251 0.02346
17 : 1145.14415663 0.02364
18 : 1128.02793427 0.02362
19 : 1128.01957347 0.02348
20 : 1121.01261717 0.02328
21 : 1115.74243688 0.02304
22 : 1122.77434552 0.02326
23 : 1115.68870863 0.02324
24 : 1109.45480949 0.02306
25 : 1107.87784734 0.02346
26 : 1112.19245051 0.02318
27 : 1107.82309779 0.023
28 : 1095.66580897 0.0226
29 : 1092.69637245 0.02282
30 : 1081.06774744 0.02286
31 : 1091.68630652 0.02254
32 : 1102.08105543 0.02224
33 : 1097.55898248 0.02268
34 : 1093.59314716 0.02252
35 : 1075.9226693 0.02226
36 : 1061.85545373 0.02206
37 : 1052.58526442 

In [70]:
V6, W6, loss6, misclass6 = V_temp, W_temp, loss, misclass

In [73]:
misclassification(V6, W6, add_bias(vd_img), vd_lb_num)

0.040300000000000002

In [74]:
V_temp = V6
W_temp = W6
loss = loss6
ind = nr.choice(50000,50000, replace = False)
misclass = misclass6
for j in range(50):
    V_temp, W_temp = train(V_temp, W_temp, tr_img, tr_lb, ind, j, j+1, 0.01* 0.5*0.5)
    loss_temp = calculate_loss(V_temp, W_temp, tr_img, tr_lb, mean_squared)
    mis_temp = misclassification(V_temp, W_temp, tr_img, tr_lb_num)
    print(j, ':', loss_temp, mis_temp)
    loss = np.append(loss, [loss_temp])
    misclass = np.append(misclass, [mis_temp])

0 : 998.434868544 0.02026
1 : 994.262534929 0.02006
2 : 1002.88342086 0.02028
3 : 985.433737393 0.02016
4 : 981.876944178 0.02028
5 : 995.855837003 0.02034
6 : 974.542001838 0.01968
7 : 987.075411887 0.01996
8 : 984.616922567 0.0199
9 : 968.352027718 0.02004
10 : 963.23529442 0.01978
11 : 961.787101963 0.01952
12 : 976.467221593 0.0197
13 : 953.813932033 0.01984
14 : 964.269509145 0.0197
15 : 992.95056636 0.01966
16 : 962.080268932 0.01906
17 : 955.231303195 0.01886
18 : 964.713303674 0.0187
19 : 938.862395781 0.01864
20 : 952.771245927 0.01904
21 : 933.686806574 0.01868
22 : 935.587335813 0.01894
23 : 930.545600756 0.01894
24 : 925.794532894 0.01874
25 : 922.874716852 0.0188
26 : 917.370090158 0.01894
27 : 923.845295558 0.01862
28 : 910.835211634 0.01852
29 : 917.16171097 0.01866
30 : 921.057398135 0.01848
31 : 915.483573023 0.01854
32 : 935.824487634 0.01834
33 : 922.451679205 0.01802
34 : 903.924515604 0.01806
35 : 903.845394504 0.01808
36 : 908.537024622 0.01828
37 : 905.490493712 

In [76]:
V7, W7, loss7, misclass7 = V_temp, W_temp, loss, misclass
misclassification(V_temp, W_temp, add_bias(vd_img), vd_lb_num)

0.039

In [78]:
V_temp = V7
W_temp = W7
loss = loss7
ind = nr.choice(50000,50000, replace = False)
misclass = misclass7
for j in range(50):
    V_temp, W_temp = train(V_temp, W_temp, tr_img, tr_lb, ind, j, j+1, 0.01* 0.5*0.5 *0.5)
    loss_temp = calculate_loss(V_temp, W_temp, tr_img, tr_lb, mean_squared)
    mis_temp = misclassification(V_temp, W_temp, tr_img, tr_lb_num)
    print(j, ':', loss_temp, mis_temp)
    loss = np.append(loss, [loss_temp])
    misclass = np.append(misclass, [mis_temp])

0 : 847.215530716 0.01692
1 : 839.146997375 0.0168
2 : 846.988258702 0.01682
3 : 832.769556529 0.01682
4 : 841.992159293 0.01694
5 : 839.582969205 0.01684
6 : 822.394229446 0.01664
7 : 838.234716159 0.0169
8 : 836.895659572 0.01678
9 : 838.521696632 0.01646
10 : 824.860341883 0.01626
11 : 816.62941189 0.01622
12 : 817.682819339 0.01624
13 : 805.832326129 0.01626
14 : 817.299092731 0.01636
15 : 804.646347213 0.01604
16 : 822.843158651 0.0162
17 : 811.367043203 0.01578
18 : 803.324828736 0.01536
19 : 800.483285225 0.01546
20 : 794.270384442 0.01556
21 : 799.040451902 0.01572
22 : 800.934879015 0.01606
23 : 797.020291097 0.0157
24 : 797.288924395 0.01548
25 : 804.166960008 0.01552
26 : 801.843551893 0.01572
27 : 784.319148149 0.01532
28 : 789.558400388 0.01558
29 : 783.945380732 0.0157
30 : 773.450826019 0.01526
31 : 778.106064586 0.0152
32 : 778.931520926 0.0155
33 : 797.947671444 0.01594
34 : 774.685290229 0.01534
35 : 773.021199086 0.01546
36 : 768.0574536 0.0152
37 : 755.045997466 0.0

In [79]:
V8, W8, loss8, misclass8 = V_temp, W_temp, loss, misclass
misclassification(V_temp, W_temp, add_bias(vd_img), vd_lb_num)

0.036600000000000001

In [80]:
V_temp = V8
W_temp = W8
loss = loss8
ind = nr.choice(50000,50000, replace = False)
misclass = misclass8
for j in range(50):
    V_temp, W_temp = train(V_temp, W_temp, tr_img, tr_lb, ind, j, j+1, 0.01* 0.5*0.5 *0.5)
    loss_temp = calculate_loss(V_temp, W_temp, tr_img, tr_lb, mean_squared)
    mis_temp = misclassification(V_temp, W_temp, tr_img, tr_lb_num)
    print(j, ':', loss_temp, mis_temp)
    loss = np.append(loss, [loss_temp])
    misclass = np.append(misclass, [mis_temp])

0 : 729.631872863 0.01424
1 : 719.916506409 0.01418
2 : 725.120290997 0.01428
3 : 722.291491224 0.01418
4 : 722.727115197 0.01414
5 : 715.351669317 0.01402
6 : 713.990598253 0.01416
7 : 715.493517151 0.01418
8 : 717.989115607 0.0142
9 : 707.718708336 0.01412
10 : 714.856515265 0.01422
11 : 711.438226028 0.01408
12 : 726.29492754 0.01398
13 : 700.861442876 0.0137
14 : 701.829066375 0.01388
15 : 692.996719646 0.01404
16 : 688.875128338 0.01386
17 : 695.135125089 0.01404
18 : 694.408669686 0.01406
19 : 686.606665808 0.01396
20 : 687.617062474 0.01372
21 : 684.320146857 0.01378
22 : 694.050170133 0.01374
23 : 683.443603855 0.0138
24 : 682.152887531 0.01354
25 : 695.8247924 0.01346
26 : 681.585712384 0.01374
27 : 687.237295695 0.01352
28 : 676.906751377 0.01354
29 : 676.27588549 0.01352
30 : 675.782475059 0.01352
31 : 666.461211353 0.0134
32 : 664.702493221 0.0131
33 : 670.79317903 0.0133
34 : 656.363575999 0.01314
35 : 658.990088196 0.01346
36 : 658.190551632 0.01336
37 : 650.380531942 0.0

In [81]:
V9, W9, loss9, misclass9 = V_temp, W_temp, loss, misclass
misclassification(V_temp, W_temp, add_bias(vd_img), vd_lb_num)

0.035700000000000003

In [82]:
V_temp = V9
W_temp = W9
loss = loss9
ind = nr.choice(50000,50000, replace = False)
misclass = misclass9
for j in range(50):
    V_temp, W_temp = train(V_temp, W_temp, tr_img, tr_lb, ind, j, j+1, 0.01* 0.5*0.5 *0.5)
    loss_temp = calculate_loss(V_temp, W_temp, tr_img, tr_lb, mean_squared)
    mis_temp = misclassification(V_temp, W_temp, tr_img, tr_lb_num)
    print(j, ':', loss_temp, mis_temp)
    loss = np.append(loss, [loss_temp])
    misclass = np.append(misclass, [mis_temp])

0 : 624.726967436 0.01238
1 : 629.005044544 0.01238
2 : 621.892850067 0.01252
3 : 630.02009211 0.01262
4 : 617.743183879 0.01232
5 : 616.05327215 0.01234
6 : 622.846626076 0.01252
7 : 620.30638301 0.01248
8 : 618.109371878 0.01232
9 : 637.156903517 0.01278
10 : 616.500104895 0.01248
11 : 612.56655295 0.01248
12 : 609.143875895 0.01212
13 : 602.273399415 0.01238
14 : 604.608142234 0.01222
15 : 611.325902423 0.01216
16 : 616.869307553 0.01208
17 : 612.667158228 0.01228
18 : 600.008407117 0.01194
19 : 612.747398252 0.01216
20 : 610.053085239 0.01232
21 : 605.586891342 0.01206
22 : 598.198921246 0.01206
23 : 592.931195085 0.01204
24 : 590.792489665 0.01192
25 : 591.347145886 0.01192
26 : 600.220127153 0.0119
27 : 584.731333481 0.01166
28 : 582.25101036 0.01188
29 : 586.782273436 0.01166
30 : 589.279258442 0.01162
31 : 587.228460735 0.0118
32 : 579.143599102 0.01158
33 : 584.241134122 0.01178
34 : 576.884310138 0.0116
35 : 576.783914238 0.01148
36 : 575.563799549 0.01138
37 : 570.40563597 0

In [83]:
V10, W10, loss10, misclass10 = V_temp, W_temp, loss, misclass
misclassification(V_temp, W_temp, add_bias(vd_img), vd_lb_num)

0.036299999999999999

In [84]:
pred_kag = predict(V9, W9, add_bias(ts_img))
pred_num = np.argmax(pred_kag,axis=1)

In [97]:
pred_num.shape

(10000,)

In [96]:
pred_txt = np.asarray([[i+1, pred_num[i]] for i in np.arange(10000)])
np.savetxt('pred_mean.csv',pred_txt ,fmt = '%1.u' , delimiter = ',', header = 'Id,Category',comments='')