In [1]:
'''Simple project with the aim of creating a basic multi-layer neural net using only numpy.'''

import numpy as np
import math

class NeuralNet():
    
    def __init__(self,neurons_per_layer = [1,4,4,1], eta = 0.2, epochs = 2000000,activations = ["sigmoid","sigmoid","sigmoid"]):
        
        self.neurons_per_layer = neurons_per_layer
        self.size = len(neurons_per_layer)-1 #size of A.N.N. (1st layer is input layer so ignore)
        self.eta = eta 
        self.epochs = epochs
        self.activations = activations
        self.W = [] #to store layerwise weights 
        self.b = [] #to store layerwise biases
        self.A = [] #to cache layerwise activation values
        self.Z = [] #to cache layerwise Z values
        self.dW = [] #store layerwise gradients for W 
        self.db = [] #store layerwise gradients for b
        self.lambd = 0
        
    def setup_params(self):  #setup the weights and biases for each layer
        for l in range(1, self.size+1):
            self.W.append(np.random.rand(self.neurons_per_layer[l], self.neurons_per_layer[l-1]))
            self.b.append(np.zeros((self.neurons_per_layer[l], 1)))
        
        
    def train(self,X,Y,type="normal", mini_batch_size = 1, regularization=None):
        self.setup_params() 
        self.m = len(X) #no. of training examples 
        self.regularization = regularization #None,"L2"
        
        if self.regularization == "L2":
            self.lambd = 0.01
            
        if type=="normal":            
            self.X = X.T
            self.Y = Y
            for iters in range(self.epochs):
                Al = self.forward_prop(self.X)
                self.J = self.compute_cost(Al)
                db, dW = self.back_prop()
                self.gradient_descent(db, dW)
                if iters/5000 - iters//5000 == 0:
                    print("Cost at {0} iterations is {1}".format(iters,self.J))
            print("Cost at {0} iterations is {1}".format(iters,self.J))
        
        if type=="minibatch":
            if mini_batch_size==1:
                type="stochastic"
            else:
                print(len(X)," ",len(Y[0]))
                X = np.array_split(X,math.ceil(len(X)/mini_batch_size), axis=0)
                Y = np.array_split(Y,math.ceil(len(Y[0])/mini_batch_size), axis=1)
                for iters in range(self.epochs):
                    for i in range(len(X)):
                        if len(X[i]) > 0:
                            
                            self.X = X[i].T
                            self.Y = Y[i]
                            Al = self.forward_prop(self.X)
                            self.J = self.compute_cost(Al)
                            db, dW = self.back_prop()
                            self.gradient_descent(db, dW)
                            if iters/50000 - iters//50000 == 0:
                                print("Cost at {0} iterations is {1}".format(iters,self.J))
                print("Cost at {0} iterations is {1}".format(iters,self.J))
        
                
        if type=="stochastic":
            for iters in range(self.epochs):
                for i in range(self.m):
                    self.X = X.T[:, i:i+1]
                    self.Y = Y[:,i:i+1]
                    Al = self.forward_prop(self.X)
                    self.J = self.compute_cost(Al)
                    db, dW = self.back_prop(Al)
                    self.gradient_descent(db, dW)
                    if iters/5000 - iters//5000 == 0:
                        print("Cost at {0} iterations is {1}".format(iters,self.J))
            print("Cost at {0} iterations is {1}".format(iters,self.J))
       
        if type=="grad_check":  #compute numerical and analytical gradients to check if backprop working correctly.
            self.X = X.T
            self.Y = Y
            for iters in range(1):
                self.grad_check()
    
    def forward_prop(self, X):
        A = X
        self.Z = []
        self.A = [A]
        for l in range(0, self.size):
            Z = np.dot(self.W[l], A) + self.b[l]
            if self.activations[l] == "sigmoid": #if hidden layer
                A = self.sigmoid(Z)
            elif self.activations[l] == "relu":
                A = self.relu(Z)
            elif self.activations[l] == "tanh":
                A = self.tanh(Z)
            self.Z.append(Z)
            self.A.append(A)
        return A
    
    def back_prop(self):
        dW = []
        db = []
        dA = -1*(self.Y/self.A[-1]) + (1-self.Y)/(1-self.A[-1])
        dA = 2*(self.A[-1]-self.Y)
        for l in range(self.size-1, -1, -1):
            if self.activations[l]=="sigmoid":                
                dZ = dA * self.sigmoid_prime(self.Z[l])
            elif self.activations[l]=="tanh":
                dZ = np.multiply(dA, self.tanh_prime(self.Z[l]))
            elif self.activations[l]=="relu":
                dZ = np.multiply(dA, self.relu_prime(self.Z[l]))

            dwL = (np.dot(dZ,self.A[l].T))/self.m + self.lambd/self.m*self.W[l]
            dbL = np.sum(dZ, axis=1, keepdims = True)/self.m
            
            dA = np.dot(self.W[l].T, dZ)
            dW.insert(0,dwL)
            db.insert(0,dbL)
        return db, dW
    
    def gradient_descent(self, db, dW):
        for l in range(0, self.size):
            self.W[l] -= self.eta*(dW[l])
            self.b[l] -= self.eta*(db[l])
            
    def compute_cost(self, Al):
        cost = np.sum(-1*(self.Y*np.log(Al) + (1-self.Y)*np.log(1-Al)))/self.m    #cross entropy 
        cost = np.square(Al-self.Y).sum()/self.m
        if self.regularization == "L2":
            l2 = 0
            for l in range(0,self.size):
                l2 += np.sum(np.square(self.W[l]))
            l2 = self.lambd/(2*self.m) * l2
            cost += l2
        return cost
    
    def sigmoid(self, Z):
        return 1/(1+np.exp(-Z))
    
    def relu(self, Z):
        return np.maximum(Z,0,Z)
    
    def tanh(self, Z):
        return (np.exp(Z)-np.exp(-Z))/(np.exp(Z)+np.exp(-Z))
        
    def sigmoid_prime(self, Z):
        return self.sigmoid(Z) * (1-self.sigmoid(Z))
    
    def relu_prime(self,Z):
        return np.where(Z>0,1.0,np.where(Z==0,0.05,0))
        
    def tanh_prime(self, Z):
        return 1-np.square(self.tanh(Z),self.tanh(Z))
    
    def predict(self, X):
        return self.forward_prop(X)
    
    def grad_check(self):        
        epsilon = 1e-5
        for l in range(self.size-1,-1,-1):
            for i in range(len(self.W[l])):
                theta = self.W[l]            
                self.W[l] = theta+ epsilon
                Al = self.forward_prop(self.X)
                Jplus = self.compute_cost(Al)
                
                self.W[l] = theta - epsilon
                Al = self.forward_prop(self.X)
                Jminus = self.compute_cost(Al)
                
                approx_dW = (Jplus-Jminus)/(2.*epsilon)
                print("1: layer{0} : {1}".format(l+1,approx_dW))
                
                self.W[l] = theta
                Al = self.forward_prop(self.X)
                db, dW = self.back_prop()
                print((dW[l]))
                dW = np.ravel(dW[l])
                
                print("2: layer{0} : {1}\n".format(l+1,np.sum(dW)))

In [2]:
net = NeuralNet() 

#train neural net on examples of the form <x/90, sin(x)> where x ranges from 0 to 90.
X = np.array([ 
    [0], [5], [10], [14], [21], [27], [31], [39], [45], [49], [58], [60], [70], [74], [79], [82], [90]
   ]) 

y = np.array([[0, 0.087, 0.1736, 0.2419, 0.3583, 0.4539, 0.515, 0.6293, 0.7071, 0.7547, 0.848, 0.866, 0.9396, 0.9612, 0.9816, 0.9902, 1]]) 

net.train(X/90, y, regularization=None)



Cost at 0 iterations is 0.1310717389522621
Cost at 5000 iterations is 0.002296906241812371
Cost at 10000 iterations is 0.0019227436231382261
Cost at 15000 iterations is 0.0018185726334933202
Cost at 20000 iterations is 0.0017296958364130047
Cost at 25000 iterations is 0.0016508500871946041
Cost at 30000 iterations is 0.0015794963208841822
Cost at 35000 iterations is 0.0015138573742518977
Cost at 40000 iterations is 0.0014526651077714373
Cost at 45000 iterations is 0.001395014516204131
Cost at 50000 iterations is 0.0013402611940052176
Cost at 55000 iterations is 0.0012879420700006563
Cost at 60000 iterations is 0.001237713678021959
Cost at 65000 iterations is 0.00118930949537656
Cost at 70000 iterations is 0.0011425153897439303
Cost at 75000 iterations is 0.0010971573651299658
Cost at 80000 iterations is 0.0010530942652801332
Cost at 85000 iterations is 0.0010102105533777977
Cost at 90000 iterations is 0.0009684083516689675
Cost at 95000 iterations is 0.0009276006948781948
Cost at 10000

Cost at 805000 iterations is 7.534184154439487e-05
Cost at 810000 iterations is 7.515722142271164e-05
Cost at 815000 iterations is 7.4973790752915e-05
Cost at 820000 iterations is 7.479152708870009e-05
Cost at 825000 iterations is 7.461040851042712e-05
Cost at 830000 iterations is 7.443041361145622e-05
Cost at 835000 iterations is 7.425152148489494e-05
Cost at 840000 iterations is 7.40737117107381e-05
Cost at 845000 iterations is 7.389696434340598e-05
Cost at 850000 iterations is 7.372125989964252e-05
Cost at 855000 iterations is 7.354657934677433e-05
Cost at 860000 iterations is 7.337290409132785e-05
Cost at 865000 iterations is 7.32002159679687e-05
Cost at 870000 iterations is 7.302849722876674e-05
Cost at 875000 iterations is 7.285773053278701e-05
Cost at 880000 iterations is 7.268789893596247e-05
Cost at 885000 iterations is 7.251898588127602e-05
Cost at 890000 iterations is 7.235097518921625e-05
Cost at 895000 iterations is 7.21838510485032e-05
Cost at 900000 iterations is 7.20175

Cost at 1600000 iterations is 5.356370017422795e-05
Cost at 1605000 iterations is 5.345317796050221e-05
Cost at 1610000 iterations is 5.334287866500736e-05
Cost at 1615000 iterations is 5.3232801820503854e-05
Cost at 1620000 iterations is 5.3122946976396364e-05
Cost at 1625000 iterations is 5.30133136983677e-05
Cost at 1630000 iterations is 5.29039015680184e-05
Cost at 1635000 iterations is 5.27947101825049e-05
Cost at 1640000 iterations is 5.2685739154190045e-05
Cost at 1645000 iterations is 5.257698811029262e-05
Cost at 1650000 iterations is 5.246845669254057e-05
Cost at 1655000 iterations is 5.236014455682537e-05
Cost at 1660000 iterations is 5.225205137287194e-05
Cost at 1665000 iterations is 5.214417682389543e-05
Cost at 1670000 iterations is 5.203652060627612e-05
Cost at 1675000 iterations is 5.1929082429228194e-05
Cost at 1680000 iterations is 5.182186201447692e-05
Cost at 1685000 iterations is 5.1714859095936323e-05
Cost at 1690000 iterations is 5.160807341939642e-05
Cost at 16

In [103]:
#Test net on on all values of x where x ranges from 0 to 90. 

for i in range(0,91):
    print("sin({0}) = {1}".format(i,np.sin(np.deg2rad(i))))
    print("predicted_sin({0}) = {1}".format(i,net.predict(np.array(i).T/90)))


sin(0) = 0.0
predicted_sin(0) = [[0.03739954]]
sin(1) = 0.01745240643728351
predicted_sin(1) = [[0.0444105]]
sin(2) = 0.03489949670250097
predicted_sin(2) = [[0.05251426]]
sin(3) = 0.052335956242943835
predicted_sin(3) = [[0.06178584]]
sin(4) = 0.0697564737441253
predicted_sin(4) = [[0.07227841]]
sin(5) = 0.08715574274765817
predicted_sin(5) = [[0.08401768]]
sin(6) = 0.10452846326765347
predicted_sin(6) = [[0.09699745]]
sin(7) = 0.12186934340514748
predicted_sin(7) = [[0.11117733]]
sin(8) = 0.13917310096006544
predicted_sin(8) = [[0.12648283]]
sin(9) = 0.15643446504023087
predicted_sin(9) = [[0.14280798]]
sin(10) = 0.17364817766693033
predicted_sin(10) = [[0.16002036]]
sin(11) = 0.1908089953765448
predicted_sin(11) = [[0.17796792]]
sin(12) = 0.20791169081775934
predicted_sin(12) = [[0.19648685]]
sin(13) = 0.224951054343865
predicted_sin(13) = [[0.21540984]]
sin(14) = 0.24192189559966773
predicted_sin(14) = [[0.23457363]]
sin(15) = 0.25881904510252074
predicted_sin(15) = [[0.25382566]]
