In [1]:
#ANN on Iris Dataset with adam optimizer from numpy and pandas

In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [15]:
#dataset fetching
dataset = pd.read_csv("Iris.csv")
X = dataset.iloc[:,:-1].values
y = dataset.iloc[:,4:5].values

In [16]:
dataset.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


In [17]:
X[0:5,:]

array([[ 5.1,  3.5,  1.4,  0.2],
       [ 4.9,  3. ,  1.4,  0.2],
       [ 4.7,  3.2,  1.3,  0.2],
       [ 4.6,  3.1,  1.5,  0.2],
       [ 5. ,  3.6,  1.4,  0.2]])

In [18]:
y[0:5,:]

array([['Setosa'],
       ['Setosa'],
       ['Setosa'],
       ['Setosa'],
       ['Setosa']], dtype=object)

In [19]:
#sklearn is used only for data preprocessing
#Changing to onehotencoder
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
label_enc_obj = LabelEncoder()
y = label_enc_obj.fit_transform(y)
y = y.reshape(150,1)
onehot_enc_obj = OneHotEncoder(categorical_features=[0])
y = onehot_enc_obj.fit_transform(y).toarray()

  y = column_or_1d(y, warn=True)


In [20]:
y[0:5,:]

array([[ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.]])

In [21]:
#Train-Test Split
#sklearn is used only for data preprocessing
from sklearn.cross_validation import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)



In [22]:
print("X-train",X_train[:5,:])

X-train [[ 5.9  3.2  4.8  1.8]
 [ 4.6  3.1  1.5  0.2]
 [ 4.4  3.2  1.3  0.2]
 [ 5.6  3.   4.1  1.3]
 [ 5.2  4.1  1.5  0.1]]


In [23]:
#Feature Scaling
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)


In [24]:
X_test[:5,]

array([[ 1.17532237,  0.02980188,  0.60885268,  0.35610778],
       [-0.11003432, -0.92385828,  0.72005865,  0.86941629],
       [ 0.94162116, -1.40068836,  1.10927953,  0.74108916],
       [ 0.35736811, -0.68544324,  0.55324969,  0.74108916],
       [-1.04483919,  1.22187708, -1.33725177, -1.31214488]])

In [42]:
#Parameters Initialization
class Layer_Utils:
    
    def __init__(self,input_shape,n_layers,layers_dims,output_shape):
        self.input_shape = input_shape
        self.n_layers = n_layers
        self.layers_dims = layers_dims
        self.output_shape = output_shape
        
    
    #Create layer_dims array
    def input_layers(self):
        
        self.layers_dims = [self.input_shape[1]]+self.layers_dims
        self.layers_dims.append(self.output_shape[1])
            
        return self.layers_dims
    
    #Parameter Initialization
    def initialization(self):
        parameters = {}
        v ={}
        s = {}
        for i in range(1,self.n_layers+2):
            parameters["W"+str(i)] = np.random.randn(self.layers_dims[i-1],self.layers_dims[i])*np.sqrt(1/(self.layers_dims[i-1]+self.layers_dims[i]))
            parameters["b"+str(i)] = np.zeros((1,self.layers_dims[i]))
            
            v["dw"+str(i)] = np.zeros((self.layers_dims[i-1],self.layers_dims[i]))
            v["db"+str(i)] = np.zeros((1,self.layers_dims[i]))
            
            s["dw"+str(i)] = np.zeros((self.layers_dims[i-1],self.layers_dims[i]))
            s["db"+str(i)] = np.zeros((1,self.layers_dims[i]))
            
        return v,s,parameters
    

In [48]:
#ANN Model 

class ANN:
    
    def __init__(self,n_layers):
        self.n_layers = n_layers
        
    
    def relu(self,z):
        return np.maximum(0,z)
    
    def softmax(self,z):
        return np.exp(z)/np.sum(np.exp(z),axis=1)[:,np.newaxis]
    
    def relu_derivative(self,z):
        return z > 0  
    
    
    def fp_one_layer(self,X,W,b,activation=True):
        
        z = np.dot(X,W)+b
        a = self.relu(z)
        
        if activation==False:
            return z,z
        
        return a,z
    
    def forward_prop(self,X_input,parameters,n_layers):
        
        a = X_input
        activations = {}
        forward_z = {}
        activations["a0"] = a
        forward_z["z0"] = a
        
        for i in range(1,n_layers+1):
            a,z = self.fp_one_layer(a,parameters["W"+str(i)],parameters["b"+str(i)],activation=True)
            activations["a"+str(i)] = a
            forward_z["z"+str(i)] = z
        
        z,a = self.fp_one_layer(a,parameters["W"+str(n_layers+1)],parameters["b"+str(n_layers+1)],activation=False)
        y_pred = self.softmax(z)      
        
        return forward_z,activations,z,y_pred

    
    
    def loss(self,y,y_pred):
        
        m = X_train.shape[0]
        categorical_entropy_loss = (1/m)*np.sum(np.multiply(-y,np.log(y_pred)))
        
        return categorical_entropy_loss
            
    
    #Calculating dl/dz 
    def loss_der_wrt_z(self,y,y_pred):
        return y_pred-y

    
    def backprop_one_step(self,dz,z,a,w,b):
        
        m = a.shape[0]
        da = np.dot(dz,w.T)
        dw = np.dot(a.T,dz)
        db = (1/m)*np.sum(dz,axis=0)
        z_der = np.multiply(da,self.relu_derivative(z))
        
        return z_der,dw,db
    
    
    def update_parameters_adam(self,dz,parameters,forward_z,activations,t,v,s,beta1=0.9,beta2=0.99,learning_rate=0.01,epsilon=1e-8):
        
        for i in range(n_layers+1,0,-1):
            dz,dw,db = self.backprop_one_step(dz,forward_z["z"+str(i-1)],activations["a"+str(i-1)],parameters["W"+str(i)],parameters["b"+str(i)])
            
            v["dw"+str(i)] = (beta1*v["dw"+str(i)])+(1-beta1)*dw
            v["db"+str(i)] = (beta1*v["db"+str(i)])+(1-beta1)*db
            v_corrected_w = v["dw"+str(i)]/(1-np.power(beta1,t))
            v_corrected_b = v["db"+str(i)]/(1-np.power(beta1,t))
            
            s["dw"+str(i)] = (beta2*s["dw"+str(i)])+(1-beta2)*np.square(dw)
            s["db"+str(i)] = (beta2*s["db"+str(i)])+(1-beta2)*np.square(db)
            s_corrected_w = s["dw"+str(i)]/(1-np.power(beta2,t))
            s_corrected_b = s["db"+str(i)]/(1-np.power(beta2,t))
            
            parameters["W"+str(i)] = parameters["W"+str(i)]-( learning_rate* ( v_corrected_w/np.sqrt(s_corrected_w+epsilon) ) )
            parameters["b"+str(i)] = parameters["b"+str(i)]-( learning_rate* ( v_corrected_b/np.sqrt(s_corrected_b+epsilon) ) )
            
        
        return parameters
        
        
        
    def model(self,X_train,y_train,parameters,v,s,n_epochs = 1000):
        
        for i in range(1,n_epochs+1):
            forward_z,activations,z,y_pred = self.forward_prop(X_train,parameters,self.n_layers)
            total_loss = self.loss(y_train,y_pred)
            print("Loss in epoch " + str(i) + "=" +str(total_loss))
            dz = self.loss_der_wrt_z(y_train,y_pred)
            parameters = self.update_parameters_adam(dz,parameters,forward_z,activations,i,v,s)
            
        return parameters
    
    
    def prediction(self,parameters,X_test):
        _,_,_,a = self.forward_prop(X_test,parameters,self.n_layers)
        b = np.zeros_like(a)
        b[np.arange(len(a)), a.argmax(1)] = 1
        
        return b
    
    def check_accuracy(self,y,y_pred):
        m = y.shape[0]
        temp = np.equal(y,y_pred)
        temp = np.sum(temp,axis=1)
        sum = np.count_nonzero(temp==3)
        
        probab = sum/m
        
        return probab*100

In [49]:
#Taking Input from users regarding number of layers and layer_dims

n_layers = int(input("Enter Number of Layers:->"))
layer_dims = []
for i in range(0,n_layers):
    layer_dims.append(int(input("Enter number of neurons in " + str(i+1) + ":->")))

Enter Number of Layers:->3
Enter number of neurons in 1:->10
Enter number of neurons in 2:->12
Enter number of neurons in 3:->10


In [50]:
#Initializing parameters by making object of Layer_util_Class

layer_util_obj = Layer_Utils(X_train.shape,n_layers,layer_dims,y_train.shape)
layer_dims = layer_util_obj.input_layers()
v,s,parameters = layer_util_obj.initialization()

In [51]:
#Making object of ANN model

ann = ANN(n_layers)
parameters = ann.model(X_train,y_train,parameters,v,s,100)
y_pred = ann.prediction(parameters,X_test)


Loss in epoch 1=1.0836151613
Loss in epoch 2=1.06170508091
Loss in epoch 3=1.04048750273
Loss in epoch 4=1.01801473018
Loss in epoch 5=0.993269370213
Loss in epoch 6=0.964894893719
Loss in epoch 7=0.932700765753
Loss in epoch 8=0.897008549699
Loss in epoch 9=0.858096347009
Loss in epoch 10=0.816791125014
Loss in epoch 11=0.773404338563
Loss in epoch 12=0.728918979994
Loss in epoch 13=0.684087468799
Loss in epoch 14=0.640429018097
Loss in epoch 15=0.598976590997
Loss in epoch 16=0.560547820046
Loss in epoch 17=0.525851164909
Loss in epoch 18=0.495145179488
Loss in epoch 19=0.468759682646
Loss in epoch 20=0.44694477637
Loss in epoch 21=0.429322443393
Loss in epoch 22=0.413956621363
Loss in epoch 23=0.39930676441
Loss in epoch 24=0.384257292662
Loss in epoch 25=0.369249713977
Loss in epoch 26=0.355513029467
Loss in epoch 27=0.34390256035
Loss in epoch 28=0.333910158688
Loss in epoch 29=0.324692640644
Loss in epoch 30=0.315752962172
Loss in epoch 31=0.30818547573
Loss in epoch 32=0.3021901

In [52]:
print("Accuracy for test set is:-> ",ann.check_accuracy(y_test,y_pred))

Accuracy for test set is:->  93.33333333333333
