In [1]:
import numpy as np
import matplotlib.pyplot as plt 
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

np.random.seed(42)

In [2]:
iris_data = load_iris() 
x = iris_data.data
y_ = iris_data.target.reshape(-1, 1)

encoder = OneHotEncoder(sparse=False)
train_x, test_x, train_y, test_y = train_test_split(x, y_, test_size=0.20)
train_y = encoder.fit_transform(train_y)

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [3]:
def relu(x):
    x[x<0]=0.0
    return x

def softmax(arr):
    arr = arr/np.max(arr)
    return np.exp(arr)/(np.sum(np.exp(arr),axis=0))

def diff_relu(x):
    x[x>0]=1.0
    x[x<=0]=0.0
    return x

def glorot_initializer(out,inp):
    limit = np.sqrt(6*1.0/(inp+out))
    return np.random.uniform(-limit,limit,(out,inp))

def glorot_normal(out,inp):
    limit = np.sqrt(2*1.0/(inp+out))
    return np.random.normal(0,limit,(out,inp))

In [4]:
## Architecture and weights initializations
inp = 4
hidden1 = 10
hidden2 = 10
output = 3

W1 = glorot_normal(hidden1,inp)
Bi1 = glorot_normal(hidden1,1)
W2 = glorot_normal(hidden2,hidden1)
Bi2 = glorot_normal(hidden2,1)
W3 = glorot_normal(output,hidden2)
Bi3 = glorot_normal(output,1)

In [5]:
epochs = 125
beta1 = 0.9
beta2 = 0.999
eps = 1e-8
eta = 1e-4

f = open("Adam.txt","w")

for i in range(epochs):
    loss = 0
    train_x, train_y = shuffle(train_x, train_y)
    m = [0,0,0,0,0,0]
    v = [0,0,0,0,0,0]
    m1 = [0,0,0,0,0,0]
    v1 = [0,0,0,0,0,0]
    for j in range(len(train_x)):
        #Forward pass
        t = train_x[j]
        t1 = train_y[j]
        h1 = (np.matmul(W1,t.reshape(-1,1)).reshape(-1,1)+Bi1).reshape(-1,1)
        h1_r = (relu(h1)).reshape(-1,1)
        h2 = (np.matmul(W2,h1_r).reshape(-1,1)+Bi2).reshape(-1,1)
        h2_r = (relu(h2)).reshape(-1,1)
        out = (np.matmul(W3,h2_r.reshape(-1,1))+Bi3).reshape(-1,1)
        y = (softmax(out)).reshape(-1,1)
        
        #Backprop
        d3 = y - (t1).reshape(-1,1)
        d2 = np.matmul(W3.T,d3) * diff_relu(h2)
        d1 = np.matmul(W2.T,d2) * diff_relu(h1)
                
        m[0] = beta1*m[0] + (1-beta1)*d3
        m[1] = beta1*m[1] + (1-beta1)*d2
        m[2] = beta1*m[2] + (1-beta1)*d1
        m[3] = beta1*m[3] + (1-beta1)*np.matmul(d3,h2_r.T)
        m[4] = beta1*m[4] + (1-beta1)*np.matmul(d2,h1_r.T)
        m[5] = beta1*m[5] + (1-beta1)*np.matmul(d1,t.reshape(-1,1).T)
        
        v[0] = beta1*v[0] + (1-beta1)*d3**2
        v[1] = beta1*v[1] + (1-beta1)*d2**2
        v[2] = beta1*v[2] + (1-beta1)*d1**2
        v[3] = beta1*v[3] + (1-beta1)*np.matmul(d3,h2_r.T)**2
        v[4] = beta1*v[4] + (1-beta1)*np.matmul(d2,h1_r.T)**2
        v[5] = beta1*v[5] + (1-beta1)*np.matmul(d1,t.reshape(-1,1).T)**2
        
        for k in range(6):
            m1[k] = m[k]/(1-beta1)
            v1[k] = v[k]/(1-beta2)
        
        Bi3 -= (eta*m1[0])/(np.sqrt(v1[0]) + eps)
        Bi2 -= (eta*m1[1])/(np.sqrt(v1[1]) + eps)
        Bi1 -= (eta*m1[2])/(np.sqrt(v1[2]) + eps)
        W3  -= (eta*m1[3])/(np.sqrt(v1[3]) + eps)
        W2  -= (eta*m1[4])/(np.sqrt(v1[4]) + eps)
        W1  -= (eta*m1[5])/(np.sqrt(v1[5]) + eps)
        
        loss -= np.sum(t1*np.log(y))
    
    print(i,loss/len(train_x))
    f.write("%s" %(loss/len(train_x)))
    f.write("\n")
    
f.close()

0 4.378104127729373
1 4.36965679280707
2 4.360862274407947
3 4.354183847268706
4 4.346034680015597
5 4.339035492374108
6 4.33082606702232
7 4.323367834787538
8 4.31470595054574
9 4.306881023036941
10 4.298829424957227
11 4.292041819769149
12 4.283744227699581
13 4.276391211393271
14 4.267972617899551
15 4.2599310628102005
16 4.2523252230809465
17 4.243374609350607
18 4.2356679218467
19 4.227495167207048
20 4.218942351180894
21 4.2113325506662855
22 4.200879788914637
23 4.1918097170771285
24 4.1814341996877475
25 4.170422330909153
26 4.160861101370539
27 4.150629842456359
28 4.140370422070384
29 4.130539114572956
30 4.120612832797992
31 4.111252381426879
32 4.102671904484993
33 4.0938503781837
34 4.083144226486038
35 4.074537612696919
36 4.064495284212141
37 4.0553819376168265
38 4.045305866078055
39 4.036623309840601
40 4.02725191941563
41 4.020459678712711
42 4.0139780100029325
43 4.004538884681258
44 3.99713931615052
45 3.9888786019681857
46 3.981078813244728
47 3.974122573568251
48 

In [6]:
y_pred = []
for j in range(len(test_x)):
    t = test_x[j]
    h1 = (np.matmul(W1,t.reshape(-1,1)).reshape(-1,1)+Bi1).reshape(-1,1)
    h1_r = (relu(h1)).reshape(-1,1)
    h2 = (np.matmul(W2,h1_r).reshape(-1,1)+Bi2).reshape(-1,1)
    h2_r = (relu(h2)).reshape(-1,1)
    out = (np.matmul(W3,h2_r.reshape(-1,1))+Bi3).reshape(-1,1)
    y = (softmax(out)).reshape(-1,1)
    y_pred.append(y)

In [7]:
a = np.array(y_pred)
a = np.squeeze(a)
eww = []
for i in a:
    eww.append(np.argmax(i))
print(np.array(eww))
print(np.squeeze(test_y))
print(accuracy_score(np.squeeze(test_y), np.array(eww)))

[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]
[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]
1.0
