In [1]:
import numpy as np
import matplotlib.pyplot as plt 
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

np.random.seed(42)

In [2]:
iris_data = load_iris() 
x = iris_data.data
y_ = iris_data.target.reshape(-1, 1)

encoder = OneHotEncoder(sparse=False)
train_x, test_x, train_y, test_y = train_test_split(x, y_, test_size=0.20)
train_y = encoder.fit_transform(train_y)

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [3]:
def relu(x):
    x[x<0]=0.0
    return x

def softmax(arr):
    arr = arr/np.max(arr)
    return np.exp(arr)/(np.sum(np.exp(arr),axis=0))

def diff_relu(x):
    x[x>0]=1.0
    x[x<=0]=0.0
    return x

def glorot_initializer(out,inp):
    limit = np.sqrt(6*1.0/(inp+out))
    return np.random.uniform(-limit,limit,(out,inp))

In [4]:
## Architecture and weights initializations
inp = 4
hidden1 = 10
hidden2 = 10
output = 3

W1 = glorot_initializer(hidden1,inp)
Bi1 = glorot_initializer(hidden1,1)
W2 = glorot_initializer(hidden2,hidden1)
Bi2 = glorot_initializer(hidden2,1)
W3 = glorot_initializer(output,hidden2)
Bi3 = glorot_initializer(output,1)

In [5]:
epochs = 1000
lr = 1e-4
alpha = 1e-6
f = open("Nes.txt", "w")

for i in range(epochs):
    loss = 0
#     train_x, train_y = shuffle(train_x, train_y)
    grads = [0,0,0,0,0,0]
    for j in range(len(train_x)):
        #Forward pass
        
        Bi3 += alpha*grads[0]
        Bi2 += alpha*grads[1]
        Bi1 += alpha*grads[2]
        W3  += alpha*grads[3]
        W2  += alpha*grads[4]
        W1  += alpha*grads[5]
        
        t = train_x[j]
        t1 = train_y[j]
        h1 = (np.matmul(W1,t.reshape(-1,1)).reshape(-1,1)+Bi1).reshape(-1,1)
        h1_r = (relu(h1)).reshape(-1,1)
        h2 = (np.matmul(W2,h1_r).reshape(-1,1)+Bi2).reshape(-1,1)
        h2_r = (relu(h2)).reshape(-1,1)
        out = (np.matmul(W3,h2_r.reshape(-1,1))+Bi3).reshape(-1,1)
        y = (softmax(out)).reshape(-1,1)
        
        #Backprop
        d3 = y - (t1).reshape(-1,1)
        d2 = np.matmul(W3.T,d3) * diff_relu(h2)
        d1 = np.matmul(W2.T,d2) * diff_relu(h1)
        
        grads[0] = alpha*grads[0] - lr*d3
        grads[1] = alpha*grads[1] - lr*d2
        grads[2] = alpha*grads[2] - lr*d1
        grads[3] = alpha*grads[3] - lr*np.matmul(d3,h2_r.T)
        grads[4] = alpha*grads[4] - lr*np.matmul(d2,h1_r.T)
        grads[5] = alpha*grads[5] - lr*np.matmul(d1,t.reshape(-1,1).T)
        
        Bi3 += grads[0]
        Bi2 += grads[1]
        Bi1 += grads[2]
        W3  += grads[3]
        W2  += grads[4]
        W1  += grads[5]
        
        loss -= np.sum(t1*np.log(y))
    
    print(i,loss/len(train_x))
    f.write("%s" %(loss/len(train_x)))
    f.write("\n")
    
    
f.close()

0 4.761387573336614
1 4.74832937437309
2 4.734194739899456
3 4.7189398180413935
4 4.702415911245396
5 4.684975035651811
6 4.666924882076943
7 4.6481391015485976
8 4.627585358994122
9 4.604380237629339
10 4.575842403164914
11 4.539592097020679
12 4.494300141650255
13 4.43787568580804
14 4.369225123660359
15 4.282728454887475
16 4.169856778462066
17 4.07416583125978
18 3.9965681394120014
19 3.932145757825913
20 3.881266055861475
21 3.8443813951001102
22 3.818164766070549
23 3.7977263795207525
24 3.7809011978210663
25 3.7662665577396086
26 3.753880326341505
27 3.7434830074555494
28 3.7338434780501024
29 3.7246075418115354
30 3.716157978536392
31 3.7084368120406896
32 3.7012545085323287
33 3.694558541762052
34 3.688473994072713
35 3.682965464202655
36 3.6779185724423553
37 3.6732231695781583
38 3.668824363821781
39 3.664536675652864
40 3.660645272345523
41 3.6570482631551027
42 3.653752084519877
43 3.6507118278653863
44 3.6479548259980734
45 3.645354515875679
46 3.643148268227303
47 3.6411

415 3.7328814715561474
416 3.7331922634021004
417 3.7334030034755608
418 3.733695524846617
419 3.7339777992917162
420 3.734283659524262
421 3.734464529810049
422 3.7346955107145434
423 3.7349611773817104
424 3.7352179349550614
425 3.7354587264432157
426 3.7356299509865
427 3.7358837576128887
428 3.7360762108666568
429 3.73628049549663
430 3.736481279676822
431 3.7366707438275664
432 3.7367963144177248
433 3.73701662865147
434 3.737266885735373
435 3.7375513643852796
436 3.7379191120897826
437 3.738279185557263
438 3.738615064302332
439 3.739013618044418
440 3.7393862021688102
441 3.739752274122262
442 3.7401287910027814
443 3.7404849207787767
444 3.7407642312443556
445 3.7411106027944148
446 3.7415495940857557
447 3.7418839268562656
448 3.742206480601699
449 3.742517448398373
450 3.742903164910871
451 3.7431865110835707
452 3.743453665415835
453 3.743775181367835
454 3.7440296161545246
455 3.7443547219794184
456 3.744615629241592
457 3.7448790359961737
458 3.7451980922721377
459 3.7454

797 3.8435667328088816
798 3.843846150441714
799 3.844070967437477
800 3.8443744039706362
801 3.8446246645192566
802 3.844905864648223
803 3.8451838006572694
804 3.8454763564755945
805 3.845737895095085
806 3.846020719357827
807 3.8462126214337777
808 3.846505770402682
809 3.846782737431714
810 3.847073983752291
811 3.847334379910308
812 3.8475873559927867
813 3.847861508859683
814 3.8481156177808398
815 3.8483650721718674
816 3.848651072690169
817 3.8488715976834995
818 3.8491371088062465
819 3.8493924124920182
820 3.849642959850742
821 3.849914567961403
822 3.8501761830365955
823 3.8504354077324154
824 3.850679699671743
825 3.850938692900295
826 3.8512042479664528
827 3.851414788640525
828 3.8516526104296456
829 3.851922914125991
830 3.8521630627470524
831 3.8524206736081834
832 3.852692715685865
833 3.85293134391077
834 3.8531893977693468
835 3.8534618930458993
836 3.853700980600017
837 3.853959449451205
838 3.8541958821301647
839 3.854430855982852
840 3.854684777007374
841 3.854950

In [6]:
y_pred = []
for j in range(len(test_x)):
    t = test_x[j]
    h1 = (np.matmul(W1,t.reshape(-1,1)).reshape(-1,1)+Bi1).reshape(-1,1)
    h1_r = (relu(h1)).reshape(-1,1)
    h2 = (np.matmul(W2,h1_r).reshape(-1,1)+Bi2).reshape(-1,1)
    h2_r = (relu(h2)).reshape(-1,1)
    out = (np.matmul(W3,h2_r.reshape(-1,1))+Bi3).reshape(-1,1)
    y = (softmax(out)).reshape(-1,1)
    y_pred.append(y)

In [7]:
a = np.array(y_pred)
a = np.squeeze(a)
eww = []
for i in a:
    eww.append(np.argmax(i))
print(np.array(eww))
print(np.squeeze(test_y))
print(accuracy_score(np.squeeze(test_y), np.array(eww)))

[2 0 2 2 1 0 1 2 2 1 2 0 0 0 0 1 2 1 2 2 0 2 0 2 2 2 2 2 0 0]
[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]
0.8666666666666667
