In [1]:
import pandas as pd
import numpy as np
from bokeh.plotting import figure
from bokeh.io import output_notebook, show
output_notebook()

In [2]:
mnist = pd.read_csv('data/MNIST/train.csv')

In [3]:
labels = mnist.iloc[:,0]
images = mnist.iloc[:,1:].values/255

In [198]:
def DRelu(x):
    y=x.copy()
    y[y<=0]=0
    y[y>0]=1
    return y

In [208]:
DRelu(np.array([-1,2,1]))
np.maximum([-1,2,1],0)

array([0, 2, 1])

In [305]:
layer1 = {'weights': np.zeros(shape=(784,200)),
          'inputs':np.zeros(shape=(1,200)),
          'bias':np.zeros(shape=(1,200)),
          'activation':np.zeros(shape=(1,200)),
          'delta':np.zeros(shape=(1,200)),
          'sigma': lambda x: np.maximum(x,0),
          'sigmaprime': DRelu,
         }

In [306]:
layer2 = {'weights': np.zeros(shape=(200,10)),
          'inputs': np.zeros(shape=(1,10)),
          'bias': np.zeros(shape=(1,10)),
          'activation': np.zeros(shape=(1,10)),
          'delta':np.zeros(shape=(1,10)),  
          'sigma': lambda x: x,
          'sigmaprime': lambda x: 1,
         }

In [307]:
def softmax(v):
    if np.max(v)>50:
        print('uh oh')
    H=np.exp(v)
    return H/H.sum()

In [308]:
def initialize(layer):
    layer['weights']=np.random.normal(0,.2,size=layer['weights'].shape)
    layer['bias'] = np.random.normal(0,.2,size=layer['bias'].shape)
    

In [309]:
def forward(layer,x):
    layer['inputs'] = np.dot(x,layer['weights'])+layer['bias']
    layer['activations'] = layer['sigma'](layer['inputs'])

In [310]:
def nn_forward(Layers,x):
    forward(Layers[0],x)
    for i,layer in enumerate(Layers[1:]):
        forward(layer,Layers[i]['activations'])
    return softmax(Layers[-1]['activations'])

In [311]:
def fb_pass(Layers,x,label,epsilon):
    P = nn_forward(Layers, x)
    loss = -np.log(P[0,label])
    P[0,label] = P[0,label]-1
    Layers[-1]['delta'] = P
    for i, layer in enumerate(Layers[-2::-1]):
        d = np.dot(Layers[i+1]['delta'],Layers[i+1]['weights'].transpose())
        s = layer['sigmaprime'](layer['inputs'])
        layer['delta'] = s*d
    Layers[0]['gradient'] = np.outer(Layers[0]['delta'],x)
    for i, layer in enumerate(Layers[1:]):
        layer['gradient'] = np.outer(Layers[i]['activations'],layer['delta'])
        layer['weights'] = layer['weights']-epsilon*layer['gradient']
    return loss
        

In [312]:
epsilon = .001
loss = []
N = 20000
E = 1000
Layers = [layer1, layer2]
for layer in Layers:
    initialize(layer)

In [313]:
Lsave = 100
for epoch in range(E):
    L = 0
    for j in range(N):
        L += fb_pass(Layers,images[j,:],labels[j],epsilon)/N
    loss.append(L)
    if np.abs(Lsave-L)<.0001:
        break
    Lsave = L
    print('{} - {:.5f}'.format(epoch, L))

0 - 0.90279
1 - 0.48054
2 - 0.41414
3 - 0.37866
4 - 0.35573
5 - 0.33942
6 - 0.32711
7 - 0.31742
8 - 0.30956
9 - 0.30305
10 - 0.29754
11 - 0.29282
12 - 0.28873
13 - 0.28513
14 - 0.28195
15 - 0.27911
16 - 0.27655
17 - 0.27424
18 - 0.27214
19 - 0.27022
20 - 0.26846
21 - 0.26683
22 - 0.26533
23 - 0.26394
24 - 0.26264
25 - 0.26143
26 - 0.26030
27 - 0.25923
28 - 0.25824
29 - 0.25729
30 - 0.25641
31 - 0.25557
32 - 0.25478
33 - 0.25402
34 - 0.25331
35 - 0.25263
36 - 0.25198
37 - 0.25136
38 - 0.25078
39 - 0.25021
40 - 0.24967
41 - 0.24916
42 - 0.24867
43 - 0.24819
44 - 0.24774
45 - 0.24730
46 - 0.24688
47 - 0.24647
48 - 0.24608
49 - 0.24571
50 - 0.24535
51 - 0.24500
52 - 0.24466
53 - 0.24433
54 - 0.24402
55 - 0.24371
56 - 0.24341
57 - 0.24313
58 - 0.24285
59 - 0.24258
60 - 0.24232
61 - 0.24207
62 - 0.24182
63 - 0.24158
64 - 0.24135
65 - 0.24112
66 - 0.24091
67 - 0.24069
68 - 0.24048
69 - 0.24028
70 - 0.24008
71 - 0.23989
72 - 0.23971
73 - 0.23952
74 - 0.23935
75 - 0.23917
76 - 0.23900
77 - 0.23

In [314]:
count = 0
for j in range(20000,30000):
    if nn_forward(Layers,images[j]).argmax() == labels[j]:
        count+=1
print(count/(30000-20000))

0.906


In [315]:
F = figure()
F.line(x=range(len(loss)),y=loss)
show(F)