In [21]:
#
# David Laziuk
# Basic Neural Network from scratch
#

In [22]:
#Importing Tools
import math
import numpy as np
import pandas as pd
import random
from math import log

In [23]:
#Importing Data
data=pd.read_csv('iris.data',names=['sl','sw','pl','pw','class'])
#Formatting Class to numerical
data.loc[data['class']=='Iris-setosa','class']=0
data.loc[data['class']=='Iris-versicolor','class']=1
data.loc[data['class']=='Iris-virginica','class']=2
data['class']=data['class'].astype('int64')
data.head()

Unnamed: 0,sl,sw,pl,pw,class
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [24]:
#Shuffling Entries
data=data.sample(frac=1).reset_index(drop=True)
#70/30 split into train/test data
train_size=int(np.floor(data.shape[0]*.7))
features=(data.shape[1]-1)
X_train=data.iloc[0:train_size,0:features].reset_index(drop=True)
X_test=data.iloc[train_size:,0:features].reset_index(drop=True)
y_train=data.iloc[0:train_size,features].reset_index(drop=True)
y_test=data.iloc[train_size:,features].reset_index(drop=True)
#Converting Data to ndarray from dataframe
X_train=X_train.to_numpy()
X_test=X_test.to_numpy()
y_train=y_train.to_numpy()
y_test=y_test.to_numpy()
del features,train_size

In [25]:
#Defining sigmoid 
def sig(x):
    return 1/(1+np.exp(-x))

In [26]:
#Forawrd Pass Func.
def forward(X):
    Z1=np.add((np.dot(X,W1)),b1.T)
    H1=sig(Z1)
    Z2=np.add((np.dot(H1,W2)),b2.T)
    H2=sig(Z2)
    return Z1,H1,Z2,H2

In [27]:
#Cross entropy loss function for non-binary classification
def crossloss(pred,true):
    loss=0
    for i in range(3):
        #Try catch to prevent log(0)=undefined
        try:
            loss+=( -true[0,i]*log(pred[0,i]) - (1-true[0,i]) * log(1-pred[0,i]) )
        #If this is the case loss must be 0
        except ValueError:
            loss+=0
    return loss 

In [28]:
#Backprop func
def backward(W1,b1,W2,b2,H1,H2,X,Y,lr):
    #Calc gradient
    dz2=(H2-Y).reshape((3,1))
    dW2=np.dot((H1.T).reshape((Hsize,1)),dz2.T)
    db2=dz2
    dz1=np.multiply(np.dot(W2,dz2),((H1*(1-H1)).reshape((Hsize,1))))
    dW1=np.dot((X.T).reshape((4,1)),(dz1.T))
    db1=dz1
    #Update weights
    W1=W1-(lr*dW1)
    b1=b1-(lr*db1)
    W2=W2-(lr*dW2)
    b2=b2-(lr*db2)

    return W1,b1,W2,b2

In [29]:
#Main Function
print("Training:")
Hsize=8  #Size of hidden layer
OutSize=3#Must be 3 for this data
Learning_Rate=.001
EPOCHS=1000
Xrows=X_train.shape[0]#105
Xcols=X_train.shape[1]#4
#Randomly initializing weights close to 0
W1=np.random.normal(0, 0.01, (Xcols,Hsize))
W2=np.random.normal(0, 0.01, (Hsize,OutSize))
#Initialize bias to 0
b1=np.zeros((Hsize)).reshape((Hsize,1))
b2=np.zeros((OutSize)).reshape((OutSize,1))

#Training
for e in range(EPOCHS):
    meanLoss=0
    meanAcc=0
    #SGD (one random sample at a time)
    for s in range(Xrows):
        #Extracting random sample with replacement
        p=random.randint(0,Xrows-1)
        #Since selection is with replacemnt not all samples will be considered in each epoch
        X=X_train[p,:].reshape((1,Xcols))
        #One-Hot encoding individual sample class instead of whole data
        Y=np.zeros((3))
        for i in range(3):
            if(y_train[p]==i):
                Y[i]=1
        Y=Y.reshape((1,OutSize))
        #Foward Pass
        z1,H1,z2,H2=forward(X)
        #Loss calc
        meanLoss+=crossloss(H2,Y)
        #Extracting prediction & updating accuracy
        pred=np.argmax(H2)
        if(pred==y_train[p]):
            meanAcc+=1
        #Backward Pass
        W1,b1,W2,b2=backward(W1,b1,W2,b2,H1,H2,X,Y,Learning_Rate)

    #Display loss & acc every 50 epochs
    meanLoss/=Xrows
    meanAcc/=Xrows
    if(e%50==0 or e==(EPOCHS-1)):
        print(f'Epoch {e+0:05}: | Loss: {meanLoss:.4f} | Acc: {meanAcc:.4f}')
del Hsize,OutSize,Learning_Rate,EPOCHS,Xrows,meanLoss,meanAcc,e,s,p,i,X,Y,z1,H1,z2,H2,pred

Training:
Epoch 00000: | Loss: 2.0617 | Acc: 0.3524
Epoch 00050: | Loss: 1.8993 | Acc: 0.3905
Epoch 00100: | Loss: 1.8948 | Acc: 0.3333
Epoch 00150: | Loss: 1.7888 | Acc: 0.4000
Epoch 00200: | Loss: 1.5336 | Acc: 0.6952
Epoch 00250: | Loss: 1.2745 | Acc: 0.6762
Epoch 00300: | Loss: 1.1261 | Acc: 0.6952
Epoch 00350: | Loss: 1.0516 | Acc: 0.7143
Epoch 00400: | Loss: 1.0774 | Acc: 0.6381
Epoch 00450: | Loss: 0.9408 | Acc: 0.7810
Epoch 00500: | Loss: 0.9995 | Acc: 0.7143
Epoch 00550: | Loss: 0.9330 | Acc: 0.8381
Epoch 00600: | Loss: 0.8019 | Acc: 0.9333
Epoch 00650: | Loss: 0.8092 | Acc: 1.0000
Epoch 00700: | Loss: 0.7373 | Acc: 0.9333
Epoch 00750: | Loss: 0.6267 | Acc: 1.0000
Epoch 00800: | Loss: 0.6904 | Acc: 0.9238
Epoch 00850: | Loss: 0.5952 | Acc: 0.9810
Epoch 00900: | Loss: 0.5306 | Acc: 0.9714
Epoch 00950: | Loss: 0.5927 | Acc: 0.9524
Epoch 00999: | Loss: 0.4411 | Acc: 0.9905


In [30]:
acc=0
#Iterating through all testing samples
for i in range(X_test.shape[0]):
    X=X_test[i,:].reshape((1,Xcols))
    #Pass sample through network
    z1,H1,z2,H2=forward(X)
    #Extract prediction
    pred=np.argmax(H2)
    #Update accuracy
    if(pred==y_test[i]):
        acc+=1
acc/=X_test.shape[0]
print("Test accuracy: "+str(round(acc,4)))

Test accuracy: 0.9778
