In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [2]:
iris = load_iris()
print(iris.DESCR)

Iris Plants Database

Notes
-----
Data Set Characteristics:
    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20  0.76     0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :Date: July, 1988

This is a copy of UCI ML iris d

In [3]:
def to_oneHotLabels(targets,nClasses):
    oneHotLabels = np.zeros((len(targets),nClasses))
    for i in range(len(targets)):
        oneHotLabels[i][targets[i]] = 1.0
    return oneHotLabels

In [4]:
nClasses = len(iris.target_names)
nFeatures = len(iris.feature_names)
data,targets = MinMaxScaler().fit_transform(iris.data),to_oneHotLabels(iris.target,nClasses)
print("Total Samples: %d"%(len(data)))

Total Samples: 150


In [5]:
xtrain,xtest,ytrain,ytest = train_test_split(data,targets,test_size=0.3,shuffle=True)
print("Training Samples: {} | Testing Samples: {}".format(len(data),xtrain.shape[0],xtest.shape[0]))

Training Samples: 150 | Testing Samples: 105


In [28]:
class Neural_Network:
    def __init__(self,params,inputSize,learning_rate=1.0,learning_rate_decay=0.99,relu_alpha=0.0):
        self.model = self.create_neuralNetwork(params,inputSize)
    
        self.learning_rate = learning_rate
        self.learning_rate_decay = learning_rate_decay
        self.relu_alpha = relu_alpha
        
    def create_neuralNetwork(self,params,inputSize):
        layers = []
        nNeurons = 0
        for param in params:
            layers.append([np.random.normal(size=(inputSize,param)), np.zeros((1,param))])
            inputSize = param
        print("Created a neural network with %d hidden layers and %d neurons "%(len(layers)-1,sum(params)))
        return layers
    
    def predict(self,xInput):
        xInput = np.expand_dims(xInput,axis=0)
        for weights,bias in self.model[:-1]:
            xInput = self.relu(np.dot(xInput,weights) + bias)
        return self.sigmoid(np.dot(xInput,self.model[-1][0])+ self.model[-1][1])
    
    def compute(self,xInput):
        # z = weights * xInput + bias or z = w*x + b
        zValues = []
        
        # a is either sigmoid(z) or # relu(z)
        aValues = [] # holds activated z values by sigmoid or relu functions
        
        xInput = np.expand_dims(xInput,axis=0)
        for weights,bias in self.model[:-1]:
            z = np.dot(xInput,weights) + bias
            zValues.append(z)
            
            a = self.relu(z)
            aValues.append(a)
            xInput = a
            
        z = np.dot(xInput,self.model[-1][0])+self.model[-1][1]
        zValues.append(z)
        aValues.append(self.sigmoid(z))

        return aValues,zValues
    
    def predict_batch(self,batch,final_prediction=False):
        
        if final_prediction:
            predictions = []
            for x in batch:
                predictions.append(self.predict(x))
            return predictions
        
        else:
            batchValues = {"a":[],"z":[]}
            for x in batch:
                aValues,zValues = self.compute(x)
                batchValues["a"].append(aValues)
                batchValues["z"].append(zValues)
                
            return batchValues
            
            
        
    def relu(self,z,deriv=False):
        if deriv:
            z[z<0] = 0 
            z[z>0] = 1
            return z
        return np.maximum(z,self.relu_alpha)

    def sigmoid(self,z,deriv=False):
        out = 1/(1+np.exp(-z))
        if deriv:
            return out*(1-out)
        return out
    
    def backprop(self,batchValues,xtrain,ytrain):
         # delta  = dCost / dWeights = dC/dW = dC/dA * dA/dZ * dZ/dW
        delta = [[np.zeros(weights.shape),np.zeros(bias.shape)] for weights,bias in self.model]
        
        cost = 0
        for i in range(len(xtrain)):
            iw = len(self.model)
            err = 0
            for _ in range(iw):
                iw -= 1
                
                if iw != 0:
                    layer_xInput = batchValues["a"][i][iw-1]
                else:
                    layer_xInput = xtrain[i].reshape(1,4)
                    
                layer_zValue = batchValues["z"][i][iw]
                layer_aValue = batchValues["a"][i][iw]
                
                if iw == len(self.model)-1:
                    cost += self.mean_squared_error(layer_aValue,ytrain[i])
                    err = self.mean_squared_error(layer_aValue,ytrain[i],True)*self.sigmoid(layer_zValue,True)
                else:
                    err = np.dot(err,self.model[iw+1][0].T) * self.relu(layer_zValue,True)

                delta[iw][0] += np.dot(layer_xInput.T,err)
                delta[iw][1] += err
                
        return np.sum(cost)*(1/(2*len(xtrain))),[[d[0]/len(xtrain),d[1]/len(xtrain)] for d in delta]
    
    def train(self,xtrain,ytrain):
        batchValues = self.predict_batch(xtrain)
        
        cost,delta = self.backprop(batchValues,xtrain,ytrain)
        
        # updating the weights and bias
        for i in range(len(self.model)):
            self.model[i][0] += -self.learning_rate * delta[i][0]
            self.model[i][1] += -self.learning_rate * delta[i][1]
            
        self.learning_rate *= self.learning_rate_decay   
        
        return cost
    
    def accuracy(self,predictions,targets):
        acc = 0
        for pred,ans in zip(predictions,targets):
            if np.argmax(ans) == np.argmax(pred[0]):
                acc += 1
                
        return round((100*acc)/len(predictions),3)
    
    def mean_squared_error(self,prediction,answer,deriv=False):   
        if deriv:
            return 2*(prediction-answer)
        return (answer-prediction)**2

In [37]:
model = Neural_Network([4,6,4,nClasses],xtrain.shape[-1],0.041,0.999,0.04)

Created a neural network with 3 hidden layers and 17 neurons 


In [38]:
cost = 0
interval = 100
epochs = 10000
for e in range(epochs):
    cost += model.train(xtrain,ytrain)
    if e%interval == 0:
        trainPreds = model.predict_batch(xtrain,True)
        testPreds = model.predict_batch(xtest,True)
        if e != 0:
            cost /= interval
        print("Epoch: {} | Cost: {} | Train acc: {}% | Test acc: {}% | LR: {}".format(e,cost,model.accuracy(trainPreds,ytrain),model.accuracy(testPreds,ytest),round(model.learning_rate,5)))
        cost = 0

Epoch: 0 | Cost: 0.510827191197938 | Train acc: 33.333% | Test acc: 8.889% | LR: 0.04096
Epoch: 100 | Cost: 0.30219308554549085 | Train acc: 40.952% | Test acc: 35.556% | LR: 0.03706
Epoch: 200 | Cost: 0.24954179876244229 | Train acc: 75.238% | Test acc: 84.444% | LR: 0.03353
Epoch: 300 | Cost: 0.2140304841622157 | Train acc: 87.619% | Test acc: 97.778% | LR: 0.03034
Epoch: 400 | Cost: 0.18567458822794117 | Train acc: 93.333% | Test acc: 97.778% | LR: 0.02745
Epoch: 500 | Cost: 0.16319973196656917 | Train acc: 94.286% | Test acc: 100.0% | LR: 0.02484
Epoch: 600 | Cost: 0.14529499311218522 | Train acc: 95.238% | Test acc: 100.0% | LR: 0.02247
Epoch: 700 | Cost: 0.12875198864190668 | Train acc: 96.19% | Test acc: 100.0% | LR: 0.02033
Epoch: 800 | Cost: 0.10845225789628588 | Train acc: 96.19% | Test acc: 100.0% | LR: 0.0184
Epoch: 900 | Cost: 0.09823752596959354 | Train acc: 95.238% | Test acc: 100.0% | LR: 0.01665
Epoch: 1000 | Cost: 0.09014068164033309 | Train acc: 95.238% | Test acc: 1

Epoch: 8900 | Cost: 0.046562906752613265 | Train acc: 97.143% | Test acc: 100.0% | LR: 1e-05
Epoch: 9000 | Cost: 0.046562060930892143 | Train acc: 97.143% | Test acc: 100.0% | LR: 1e-05
Epoch: 9100 | Cost: 0.046561295665256124 | Train acc: 97.143% | Test acc: 100.0% | LR: 0.0
Epoch: 9200 | Cost: 0.04656060327702059 | Train acc: 97.143% | Test acc: 100.0% | LR: 0.0
Epoch: 9300 | Cost: 0.04655997683409129 | Train acc: 97.143% | Test acc: 100.0% | LR: 0.0
Epoch: 9400 | Cost: 0.046559410046297725 | Train acc: 97.143% | Test acc: 100.0% | LR: 0.0
Epoch: 9500 | Cost: 0.04655889723157744 | Train acc: 97.143% | Test acc: 100.0% | LR: 0.0
Epoch: 9600 | Cost: 0.04655843324956015 | Train acc: 97.143% | Test acc: 100.0% | LR: 0.0
Epoch: 9700 | Cost: 0.04655801345207315 | Train acc: 97.143% | Test acc: 100.0% | LR: 0.0
Epoch: 9800 | Cost: 0.04655763363046174 | Train acc: 97.143% | Test acc: 100.0% | LR: 0.0
Epoch: 9900 | Cost: 0.04655728997423556 | Train acc: 97.143% | Test acc: 100.0% | LR: 0.0
