In [1]:
import numpy as np
import matplotlib.pyplot as plt 
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

np.random.seed(42)

In [2]:
iris_data = load_iris() 
x = iris_data.data
y_ = iris_data.target.reshape(-1, 1)

encoder = OneHotEncoder(sparse=False)
train_x, test_x, train_y, test_y = train_test_split(x, y_, test_size=0.20)
train_y = encoder.fit_transform(train_y)

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [3]:
def relu(x):
    x[x<0]=0.0
    return x

def softmax(arr):
    arr = arr/np.max(arr)
    return np.exp(arr)/(np.sum(np.exp(arr),axis=0))

def diff_relu(x):
    x[x>0]=1.0
    x[x<=0]=0.0
    return x

def glorot_initializer(out,inp):
    limit = np.sqrt(6*1.0/(inp+out))
    return np.random.uniform(-limit,limit,(out,inp))

In [4]:
## Architecture and weights initializations
inp = 4
hidden1 = 10
hidden2 = 10
output = 3

W1 = glorot_initializer(hidden1,inp)
Bi1 = glorot_initializer(hidden1,1)
W2 = glorot_initializer(hidden2,hidden1)
Bi2 = glorot_initializer(hidden2,1)
W3 = glorot_initializer(output,hidden2)
Bi3 = glorot_initializer(output,1)

In [5]:
epochs = 1000
lr = 5*1e-5
alpha = 1e-6

f = open("Momen.txt", "w")
for i in range(epochs):
    loss = 0
#     train_x, train_y = shuffle(train_x, train_y)
    grads = [0,0,0,0,0,0]
    for j in range(len(train_x)):
        #Forward pass
        t = train_x[j]
        t1 = train_y[j]
        h1 = (np.matmul(W1,t.reshape(-1,1)).reshape(-1,1)+Bi1).reshape(-1,1)
        h1_r = (relu(h1)).reshape(-1,1)
        h2 = (np.matmul(W2,h1_r).reshape(-1,1)+Bi2).reshape(-1,1)
        h2_r = (relu(h2)).reshape(-1,1)
        out = (np.matmul(W3,h2_r.reshape(-1,1))+Bi3).reshape(-1,1)
        y = (softmax(out)).reshape(-1,1)
        
        #Backprop
        d3 = y - (t1).reshape(-1,1)
        d2 = np.matmul(W3.T,d3) * diff_relu(h2)
        d1 = np.matmul(W2.T,d2) * diff_relu(h1)
        
        grads[0] = alpha*grads[0] - lr*d3
        grads[1] = alpha*grads[1] - lr*d2
        grads[2] = alpha*grads[2] - lr*d1
        grads[3] = alpha*grads[3] - lr*np.matmul(d3,h2_r.T)
        grads[4] = alpha*grads[4] - lr*np.matmul(d2,h1_r.T)
        grads[5] = alpha*grads[5] - lr*np.matmul(d1,t.reshape(-1,1).T)
        
        Bi3 += grads[0]
        Bi2 += grads[1]
        Bi1 += grads[2]
        W3  += grads[3]
        W2  += grads[4]
        W1  += grads[5]
        
        loss -= np.sum(t1*np.log(y))
    
    print(i,loss/len(train_x))
    f.write("%s" %(loss/len(train_x)))
    f.write("\n")
    
f.close()

0 4.7619118276859504
1 4.755682937067992
2 4.74920315361916
3 4.74243106007783
4 4.7353414782340915
5 4.728048395091203
6 4.720448906807673
7 4.7125446721245465
8 4.704289402579158
9 4.695799207996378
10 4.687096552127363
11 4.678233422079379
12 4.669226247626704
13 4.6601868031109
14 4.650773528969428
15 4.640900911206656
16 4.630574563793159
17 4.619588504710651
18 4.6079933249446725
19 4.5951244169092345
20 4.580339453500158
21 4.564119011364539
22 4.545744172103116
23 4.5257879696103585
24 4.501039000230379
25 4.474492365288792
26 4.445926846371579
27 4.415522312942653
28 4.380846003325356
29 4.339899016486767
30 4.297114556897809
31 4.239587941837957
32 4.181218985638656
33 4.129987033995392
34 4.084750688596033
35 4.042658371203997
36 4.004992509604951
37 3.9704374170978327
38 3.9394589317148863
39 3.911427194277334
40 3.8866857438724027
41 3.865460972114216
42 3.8479056715088924
43 3.833662263010648
44 3.8211550335639837
45 3.810141550268596
46 3.8000670090454554
47 3.7910819559

390 3.676185316297455
391 3.6763311553788554
392 3.6764798163430745
393 3.676636495545003
394 3.6768329652854765
395 3.677023491784692
396 3.6772083668362456
397 3.677387868959128
398 3.6775622639566894
399 3.677732370357286
400 3.6778981506775636
401 3.6780599129331923
402 3.6782178648445742
403 3.6783722044761378
404 3.6785264721143367
405 3.678677427080905
406 3.6788253899351364
407 3.6789705199601515
408 3.679031769942884
409 3.679078900116191
410 3.679138014928482
411 3.6792415663126596
412 3.6792962557790543
413 3.6793527042133443
414 3.679447632687952
415 3.6795886690590063
416 3.679741436534355
417 3.679853144473991
418 3.6799840757450832
419 3.6800900623823
420 3.680213261247408
421 3.680311243643071
422 3.680421472360155
423 3.6805403474168936
424 3.6805750258205383
425 3.6806293821527025
426 3.6807345903731328
427 3.680852766844725
428 3.680964557372037
429 3.681055455691762
430 3.6811952463243007
431 3.6813469207468428
432 3.681495990656053
433 3.681614005399581
434 3.68172

791 3.7266614799594686
792 3.726874430593841
793 3.7270330050134226
794 3.7271994733628255
795 3.7273605404803267
796 3.7275138473598157
797 3.727713497118214
798 3.7278696556940236
799 3.7280628564028837
800 3.728222275375828
801 3.7284144740569696
802 3.7285704432720683
803 3.7287593826438825
804 3.7289075215164575
805 3.7290804243246876
806 3.729230483083207
807 3.7294090808423968
808 3.72954352364996
809 3.7296809194629135
810 3.7298521069241284
811 3.7299799014919546
812 3.7301034849094163
813 3.730284945772858
814 3.7304111803201168
815 3.7305444386272484
816 3.7307141267734414
817 3.730845235151421
818 3.7310100084118636
819 3.731132224727429
820 3.7312496937443416
821 3.7314176374895
822 3.7315506004947165
823 3.7317160880780236
824 3.7318438907768305
825 3.7320044015279783
826 3.73212661332731
827 3.7322523418604123
828 3.732404933158152
829 3.7324978406428326
830 3.7326492701855907
831 3.73280433238751
832 3.732905023234067
833 3.7330429376700582
834 3.7331974338346607
835 3.

In [6]:
y_pred = []
for j in range(len(test_x)):
    t = test_x[j]
    h1 = (np.matmul(W1,t.reshape(-1,1)).reshape(-1,1)+Bi1).reshape(-1,1)
    h1_r = (relu(h1)).reshape(-1,1)
    h2 = (np.matmul(W2,h1_r).reshape(-1,1)+Bi2).reshape(-1,1)
    h2_r = (relu(h2)).reshape(-1,1)
    out = (np.matmul(W3,h2_r.reshape(-1,1))+Bi3).reshape(-1,1)
    y = (softmax(out)).reshape(-1,1)
    y_pred.append(y)

In [7]:
a = np.array(y_pred)
a = np.squeeze(a)
eww = []
for i in a:
    eww.append(np.argmax(i))
print(np.array(eww))
print(np.squeeze(test_y))
print(accuracy_score(np.squeeze(test_y), np.array(eww)))

[2 0 2 1 1 0 0 2 2 1 2 0 0 0 0 1 2 1 2 2 0 2 0 2 2 2 2 2 0 0]
[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]
0.8666666666666667
