In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle

iris = datasets.load_iris()
X = iris.data
y = iris.target

def one_hot(a, num_classes):
    return np.squeeze(np.eye(num_classes)[a.reshape(-1)])


x_train, x_test, y_train, y_test = train_test_split(X,y,test_size=0.2)
y_train = one_hot(y_train,3)

In [3]:
hidden_1 = 10
hidden_2 = 10
input_size = 4
output_size = 3
x = x_train
np.random.seed(40)
def layer(x,W,b):
    return np.matmul(W,x)+b

def relu(x):
    x[x<0]=0.0
    return x

def diff_relu(x):
    x[x>0]=1.0
    x[x<=0]=0.0
    return x

def categ_cross_ent(y,y1):
    return -np.sum(y*np.log(y1))

def softmax(x):
    x = x/np.max(x)
    return np.exp(x)/np.sum(np.exp(x))

# Glorot initialization

lim1 = (6.0 / (4.0 + 10.0))**0.5
lim2 = (6.0 / (10.0 + 10.0))**0.5
lim3 = (6.0 / (10.0 + 3.0))**0.5

In [None]:
W1 = np.random.uniform(-lim1,lim1,(hidden_1,input_size))
Bi1 = np.random.uniform(-lim1,lim1,(hidden_1,1))
W2 = np.random.uniform(-lim2,lim2,(hidden_2,hidden_1))
Bi2 = np.random.uniform(-lim2,lim2,(hidden_2,1))
W3 = np.random.uniform(-lim3,lim3,(output_size,hidden_2))
Bi3 = np.random.uniform(-lim3,lim3,(output_size,1))

epochs = 2000
lr = 1e-6
alpha = 1e-7
for i in range(epochs):
    x,y_train = shuffle(x,y_train,random_state = 42)
    loss = 0
    grads = [0,0,0,0,0,0]
    for j in range(len(x)):
        w1 = w2 = w3 = 0
        b1 = b2 = b3 = 0
        #forward pass
        t = x[j]
        t = t[..., np.newaxis]
        hid1 = layer(t,W1,Bi1)
        rel_hid1 = relu(hid1)
        hid2 = layer(rel_hid1,W2,Bi2)
        rel_hid2  = relu(hid2)
        out = layer(rel_hid2,W3,Bi3)
        y = softmax(out)
        #BACKPROPAGATION
        del1 = y - y_train[j].reshape(y.shape)
        b3 = del1
        del1 = del1.reshape(y.shape)
        w3 = np.matmul(del1,rel_hid2.T)
    
        s1 = np.matmul(W3.T,del1) * diff_relu(hid2)
        b2 = s1
        w2 = np.matmul(s1.T,rel_hid1)
        
        s2 = np.matmul(W2.T,s1) * diff_relu(hid1)
        b1 = s2
        w1 = np.matmul(s2,t.T)
        loss = np.sum(categ_cross_ent(y_train[j],y))
        W1 -= lr*w1 - alpha*grads[0]
        W2 -= lr*w2 - alpha*grads[1]
        W3 -= lr*w3 - alpha*grads[2]
        Bi1 -= lr*b1 - alpha*grads[3]
        Bi2 -= lr*b2 - alpha*grads[4]
        Bi3 -= lr*b3 - alpha*grads[5]
        
        grads[0] += lr*w1
        grads[1] += lr*w2
        grads[2] += lr*w3
        grads[3] += lr*b1
        grads[4] += lr*b2
        grads[5] += lr*b3
    if i%50 == 0:
        print(i,loss/len(x))