In [1]:
import numpy as np
import matplotlib.pyplot as plt 
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

np.random.seed(42)

In [2]:
iris_data = load_iris() 
x = iris_data.data
y_ = iris_data.target.reshape(-1, 1)

encoder = OneHotEncoder(sparse=False)
train_x, test_x, train_y, test_y = train_test_split(x, y_, test_size=0.20)
train_y = encoder.fit_transform(train_y)

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [3]:
def relu(x):
    x[x<0]=0.0
    return x

def softmax(arr):
    arr = arr/np.max(arr)
    return np.exp(arr)/(np.sum(np.exp(arr),axis=0))

def diff_relu(x):
    x[x>0]=1.0
    x[x<=0]=0.0
    return x

def glorot_initializer(out,inp):
    limit = np.sqrt(6*1.0/(inp+out))
    return np.random.uniform(-limit,limit,(out,inp))

def glorot_normal(out,inp):
    limit = np.sqrt(2*1.0/(inp+out))
    return np.random.normal(0,limit,(out,inp))

In [4]:
## Architecture and weights initializations
inp = 4
hidden1 = 10
hidden2 = 10
output = 3

W1 = glorot_normal(hidden1,inp)
Bi1 = glorot_normal(hidden1,1)
W2 = glorot_normal(hidden2,hidden1)
Bi2 = glorot_normal(hidden2,1)
W3 = glorot_normal(output,hidden2)
Bi3 = glorot_normal(output,1)

In [5]:
epochs = 1000
delta = 1e-10
eps = 3*1e-5

f = open("AdaG.txt","w")

for i in range(epochs):
    loss = 0
    train_x, train_y = shuffle(train_x, train_y)
    grads = [0,0,0,0,0,0]
    for j in range(len(train_x)):
        #Forward pass
        t = train_x[j]
        t1 = train_y[j]
        h1 = (np.matmul(W1,t.reshape(-1,1)).reshape(-1,1)+Bi1).reshape(-1,1)
        h1_r = (relu(h1)).reshape(-1,1)
        h2 = (np.matmul(W2,h1_r).reshape(-1,1)+Bi2).reshape(-1,1)
        h2_r = (relu(h2)).reshape(-1,1)
        out = (np.matmul(W3,h2_r.reshape(-1,1))+Bi3).reshape(-1,1)
        y = (softmax(out)).reshape(-1,1)
        
        #Backprop
        d3 = y - (t1).reshape(-1,1)
        d2 = np.matmul(W3.T,d3) * diff_relu(h2)
        d1 = np.matmul(W2.T,d2) * diff_relu(h1)
        
        grads[0] += (d3)*(d3)
        grads[1] += (d2)*(d2)
        grads[2] += (d1)*(d1)
        grads[3] += (np.matmul(d3,h2_r.T))*(np.matmul(d3,h2_r.T))
        grads[4] += (np.matmul(d2,h1_r.T))*(np.matmul(d2,h1_r.T))
        grads[5] += (np.matmul(d1,t.reshape(-1,1).T))*(np.matmul(d1,t.reshape(-1,1).T))
        
        Bi3 -= (eps*d3)/(delta+np.sqrt(grads[0]))
        Bi2 -= (eps*d2)/(delta+np.sqrt(grads[1]))
        Bi1 -= (eps*d1)/(delta+np.sqrt(grads[2]))
        W3  -= (eps*np.matmul(d3,h2_r.T))/(delta+np.sqrt(grads[3]))
        W2  -= (eps*np.matmul(d2,h1_r.T))/(delta+np.sqrt(grads[4]))
        W1  -= (eps*np.matmul(d1,t.reshape(-1,1).T))/(delta+np.sqrt(grads[5]))
        
        loss -= np.sum(t1*np.log(y))
    
    print(i,loss/len(train_x))
    f.write("%s" %(loss/len(train_x)))
    f.write("\n")
    
f.close()

0 4.38099006593371
1 4.379606149444338
2 4.377834312688666
3 4.377034407351376
4 4.37562014408943
5 4.374870909941955
6 4.373567000609516
7 4.372645989885033
8 4.3714468174738705
9 4.3703478527989
10 4.369185833632502
11 4.368367625611853
12 4.366838932141482
13 4.365908135393455
14 4.364585742393687
15 4.363390444066045
16 4.362215506571774
17 4.36072463935444
18 4.359720785244306
19 4.3589940181360705
20 4.357902100886702
21 4.356586500824599
22 4.355225478510252
23 4.353978590593676
24 4.353289641322612
25 4.35242805864224
26 4.350855049798346
27 4.350006686315445
28 4.348800289102898
29 4.347908739392213
30 4.346854311109708
31 4.345690851158113
32 4.3441867842776425
33 4.343136190990537
34 4.341731491713901
35 4.340638995340734
36 4.33961885504469
37 4.338146741467068
38 4.336395171316676
39 4.335329829729953
40 4.333922853105768
41 4.332599397363089
42 4.331664510587729
43 4.33007013609923
44 4.329186260329093
45 4.328576148694626
46 4.327059746184109
47 4.325122861823042
48 4.32

404 3.887535061230558
405 3.8872846421163816
406 3.8863308641600067
407 3.8853412616648066
408 3.8845687295248412
409 3.883757030148111
410 3.8838215469516943
411 3.882911774564161
412 3.882566732612579
413 3.882436345398776
414 3.8818441515116513
415 3.881730590383202
416 3.8819977585174184
417 3.8813126325343545
418 3.881059646798455
419 3.8805615860710856
420 3.880549125271455
421 3.879634931346605
422 3.8793888266030256
423 3.879446240245813
424 3.8791518843985004
425 3.8785296289382862
426 3.878333790824099
427 3.8774745430212194
428 3.8774802960963393
429 3.87720219355217
430 3.876328514036155
431 3.8765213223811106
432 3.8765913778475536
433 3.876250970140219
434 3.87651991379329
435 3.8758138371496633
436 3.8755996175128016
437 3.8760612942087644
438 3.876349536113116
439 3.8757050831388615
440 3.875374755265453
441 3.8751976814901847
442 3.874843745446365
443 3.8748269252518726
444 3.8742236842838427
445 3.8745828361793015
446 3.8744302654703073
447 3.8742751524376895
448 3.87

793 4.051722303714124
794 4.051350190798029
795 4.051819468616236
796 4.051911193126892
797 4.051791595954092
798 4.052228138362634
799 4.052677527333243
800 4.0520937147574365
801 4.051684135439083
802 4.0528506403763345
803 4.053309778172126
804 4.052877068679071
805 4.053082030013066
806 4.053156300121234
807 4.053672238396002
808 4.054058925148974
809 4.054425559280493
810 4.0545039738479245
811 4.055294922579488
812 4.056215287159458
813 4.057367745154055
814 4.0567909598895335
815 4.057522862735857
816 4.057817410313541
817 4.058080643505706
818 4.058838104930276
819 4.059155905925436
820 4.059333994406478
821 4.05902056577313
822 4.059659298957331
823 4.059707650711726
824 4.060542824910032
825 4.060249743440857
826 4.0607375617124335
827 4.061767797205167
828 4.061647109031527
829 4.062330801811435
830 4.062937028755206
831 4.063366562520927
832 4.06423585073977
833 4.064567115690899
834 4.0657626617138485
835 4.064903359499966
836 4.0640798251670285
837 4.064519890721957
838 4

In [6]:
y_pred = []
for j in range(len(test_x)):
    t = test_x[j]
    h1 = (np.matmul(W1,t.reshape(-1,1)).reshape(-1,1)+Bi1).reshape(-1,1)
    h1_r = (relu(h1)).reshape(-1,1)
    h2 = (np.matmul(W2,h1_r).reshape(-1,1)+Bi2).reshape(-1,1)
    h2_r = (relu(h2)).reshape(-1,1)
    out = (np.matmul(W3,h2_r.reshape(-1,1))+Bi3).reshape(-1,1)
    y = (softmax(out)).reshape(-1,1)
    y_pred.append(y)

In [7]:
a = np.array(y_pred)
a = np.squeeze(a)
eww = []
for i in a:
    eww.append(np.argmax(i))
print(np.array(eww))
print(np.squeeze(test_y))
print(accuracy_score(np.squeeze(test_y), np.array(eww)))

[2 0 2 2 1 0 1 2 1 1 2 0 0 0 0 2 2 1 2 2 0 2 0 2 2 2 2 2 0 0]
[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]
0.8666666666666667
