# Implementing 3 layer neural network architecture


In [1]:
import numpy as np

In [14]:
# Model parameters
input_size = 2
layers = [4,3]
output_size = 2
def softmax(a):
    e_pa = np.exp(a)
    ans = e_pa/np.sum(e_pa, axis=1, keepdims=True)
    return ans

In [20]:
class NeuralNetwork:
    def __init__(self, input_size, layers, output_size):
        np.random.seed(0)
        
        model = {}  #dictionary
        
        model['W1'] = np.random.randn(input_size, layers[0])  # Initialises random weight matrix
        model['b1'] = np.zeros((1, layers[0]))
        
        model['W2'] = np.random.randn(layers[0], layers[1])  # Initialises random weight matrix
        model['b2'] = np.zeros((1, layers[1]))
        
        model['W3'] = np.random.randn(layers[1], output_size)  # Initialises random weight matrix
        model['b3'] = np.zeros((1, output_size))
        
        self.model = model
    
    
    
    def forward(self, X):
        W1,W2,W3 = self.model['W1'], self.model['W2'], self.model['W3']
        b1,b2,b3 = self.model['b1'], self.model['b2'], self.model['b3']
        
        z1 = np.dot(X, W1) + b1
        a1 = np.tanh(z1)
        
        z2 = np.dot(a1,W2) + b2
        a2 = np.tanh(z2)
        
        z3 = np.dot(a2,W3) + b3
        y_ = softmax(z3)
        self.activation_outputs = (a1,a2,y_)
        
    def backward(self, x,y,learning_rate=0.001):
        W1,W2,W3 = self.model['W1'], self.model['W2'], self.model['W3']
        b1,b2,b3 = self.model['b1'], self.model['b2'], self.model['b3']
        a1,a2,y_ = self.activation_outputs
        
        delta3 = y_ - y
        
        m = x.shape[0]
        dw3 = np.dot(a2.T, delta3)
        db3 = np.sum(delta3, axis=0)/float(m)
        
        delta2 = (1-np.square(a2))*np.dot(delta3, W3.T)
        dw2 = np.dot(a1.T, delta2)
        db2 = np.sum(dealta,axis=0)/float(m)
        
        delta1 = (1-np.square(a1))*np.dot(delta2,W2.T)
        dw1 = np.dot(X.T, delta1)
        db1 = np.sum(delta1, axis = 0)/(float(m))
        
        #update the model parameters using gradient descent
        self.model['W1'] -=learning_rate*dw1
        self.model['b1'] -= learning_rate*db1
        
        self.model['W2'] -=learning_rate*dw2
        self.model['b2'] -= learning_rate*db2
        
        self.model['W3'] -=learning_rate*dw3
        self.model['b3'] -= learning_rate*db3
        
    def predict(self, X):
        y_out = self.forward(X)
        return np.argmax(y_out, axis = 1)
    
    def summary(self):
        W1,W2,W3 = self.model['W1'], self.model['W2'], self.model['W3']
        a1,a2,y_ = self.activation_outputs
        
        print("W1: ", W1.shape)
        print("A1: ", a1.shape)
        
        print("W2: ", W2.shape)
        print("A2: ", a2.shape)
        
        print("W3: ", W3.shape)
        print("Y_: ", y_.shape)
        


In [21]:
def loss(y_oht,p):
    l = - np.mean(y_oht*np.log(p))
    return l

def one_hot(y, depth):
    m = y.shape[0]
    y_oht = np.zeros((m,depth))
    y_oht[np.arrange(m),y] = 1
    
    return y_oht

In [22]:
model = NeuralNetwork(input_size=2, layers = [10,5], output_size=2)

In [23]:
from sklearn.datasets import make_blobs, make_moons
X, y = make_moons(n_samples=1000, noise = 0.1,random_state = 1)
print(X.shape, y.shape)

(1000, 2) (1000,)


In [27]:
model.forward(X)

In [28]:
# Wrong neural network built

ValueError: operands could not be broadcast together with shapes (1000,2) (1000,) 