In [154]:
import numpy as np

# define the number of iterations.
num_itr = 1000

# define batch size.
batchSize = 5

# define the input data dimension.
inputSize = 2

# define the output dimension.
outputSize = 1

# define the dimension of the hidden layer.
hiddenSize = 5



class Neural_Network():
    def __init__(self):     
        #weights
        self.U = np.random.randn(inputSize, hiddenSize) 
        self.W = np.random.randn(hiddenSize, outputSize) 
        self.e = np.random.randn(hiddenSize) 
        self.f = np.random.randn(outputSize) 


    def fully_connected(self, X, U, e):
        '''
        fully connected layer.
        inputs:
            U: weight 
            e: bias
        outputs:
            X * U + e
        '''
        return np.dot(X, U) + e


    def sigmoid(self, s):
        '''
        sigmoid activation function. 
        inputs: s
        outputs: sigmoid(s)  
        '''
        return 1/(1+np.exp(-s))


    def sigmoidPrime(self, s):
        '''
        derivative of sigmoid (Written section, Part a).
        inputs: 
            s = sigmoid(x)
        outputs: 
            derivative sigmoid(x) as a function of s 
        '''
        d_sigmoid = s*(1-s)
        return d_sigmoid


    def forward(self, X):
        '''
        forward propagation through the network.
        inputs:
            X: input data (batchSize, inputSize) 
        outputs:
            c: output (batchSize, outputSize)
        '''
        self.X = X
        self.z = self.fully_connected(self.X, self.U, self.e)
        self.b = self.sigmoid(self.z)
        self.h = self.fully_connected(self.b, self.W, self.f)
        self.c = self.sigmoid(self.h)
        return self.c


    def d_loss_o(self, gt, o):
        '''
        computes the derivative of the L2 loss with respect to 
        the network's output.
        inputs:
            gt: ground-truth (batchSize, outputSize)
            o: network output (batchSize, outputSize)
        outputs:
            d_o: derivative of the L2 loss with respect to the network's 
            output o. (batchSize, outputSize)
        '''
        d_o = o-gt
        return d_o


    def error_at_layer2(self, d_o, o):
        '''
        computes the derivative of the loss with respect to layer2's output
        (Written section, Part b).
        inputs:
            d_o: derivative of the loss with respect to the network output (batchSize, outputSize)
            o: the network output (batchSize, outputSize)
        returns 
            delta_k: the derivative of the loss with respect to the output of the second
            fully connected layer (batchSize, outputSize).
        '''
        delta_k = np.dot(d_o, self.sigmoidPrime(o).T)
        return delta_k


    def error_at_layer1(self, delta_k, W, b):
        '''
        computes the derivative of the loss with respect to layer1's output (Written section, Part e).
        inputs:
            delta_k: derivative of the loss with respect to the output of the second
            fully connected layer (batchSize, outputSize). 
            W: the weights of the second fully connected layer (hiddenSize, outputSize).
            b: the input to the second fully connected layer (batchSize, hiddenSize).
        returns:
            delta_j: the derivative of the loss with respect to the output of the second
            fully connected layer (batchSize, hiddenSize).
        '''
        delta_j = np.dot(delta_k, np.dot(self.sigmoidPrime(b), self.W))
        return delta_j


    def derivative_of_w(self, b, delta_k):
        '''
        computes the derivative of the loss with respect to W (Written section, Part c).
        inputs:
            b: the input to the second fully connected layer (batchSize, hiddenSize).
            delta_k: the derivative of the loss with respect to the output of the second
            fully connected layer's output (batchSize, outputSize).
        returns:
            d_w: the derivative of loss with respect to W  (hiddenSize ,outputSize).
        '''
        d_w = np.dot(delta_k.T,b)
        d_w = np.array(np.sum(d_w, axis=0)).reshape((-1,1))
        return d_w


    def derivative_of_u(self, X, delta_j):
        '''
        computes the derivative of the loss with respect to U (Written section, Part f).
        inputs:
            X: the input to the network (batchSize, inputSize).
            delta_j: the derivative of the loss with respect to the output of the first
            fully connected layer's output (batchSize, hiddenSize).
        returns:
            d_u: the derivative of loss with respect to U (inputSize, hiddenSize).
        '''
        #print(delta_j.shape)
        d_u = np.dot(X.T,delta_j)
        #print(d_u.shape)
        return d_u


    def derivative_of_e(self, delta_j):
        '''
        computes the derivative of the loss with respect to e (Written section, Part g).
        inputs:
            delta_j: the derivative of the loss with respect to the output of the first
            fully connected layer's output (batchSize, hiddenSize).
        returns:
            d_e: the derivative of loss with respect to e (hiddenSize).
        '''
        d_e = np.sum(delta_j, axis=0)
        return d_e


    def derivative_of_f(self, delta_k):
        '''
        computes the derivative of the loss with respect to f (Written section, Part d).
        inputs:
            delta_k: the derivative of the loss with respect to the output of the second
            fully connected layer's output (batchSize, outputSize).
        returns:
            d_f: the derivative of loss with respect to f (outputSize).
        '''
        d_f = np.sum(delta_k)
        return d_f


    def backward(self, X, gt, o):
        '''
        backpropagation through the network.
        Task: perform the 8 steps required below.
        inputs: 
            X: input data (batchSize, inputSize)
            y: ground truth (batchSize, outputSize)
            o: network output (batchSize, outputSize)        
        '''

        # 1. Compute the derivative of the loss with respect to c.
        # Call: d_loss_o
        d_o = self.d_loss_o( gt, o)
        
        # 2. Compute the error at the second layer (Written section, Part b).
        # Call: error_at_layer2
        delta_k = self.error_at_layer2(d_o, o)

        # 3. Compute the derivative of W (Written section, Part c).
        # Call: derivative_of_w
        d_W = self.derivative_of_w(self.b, delta_k)

        # 4. Compute the derivative of f (Written section, Part d).
        # Call: derivative_of_f
        d_f = self.derivative_of_f(delta_k)
        

        # 5. Compute the error at the first layer (Written section, Part e).
        # Call: error_at_layer1 
        delta_j = self.error_at_layer1(delta_k, self.W, self.b)
        

        # 6. Compute the derivative of U (Written section, Part f).
        # Call: derivative_of_u
        d_U = self.derivative_of_u(self.X, delta_j)
        

        # 7. Compute the derivative of e (Written section, Part g).
        # Call: derivative_of_e
        d_e =  self.derivative_of_e(delta_j)
           

        # 8. Update the parameters
        lr = 0.2
        #print(self.W.shape, d_W.shape)
        self.W -= lr*d_W
        self.f -= lr*d_f
        self.U -= lr*d_U
        self.e -= lr*d_e
        
        

    def train (self, X, y):
        o = self.forward(X)
        self.backward(X, y, o)
      


In [155]:

def main():
    """ Main function """
    # generate random input data of dimension (batchSize, inputSize). 
    a = np.random.randint(0, high=10, size=[3,2], dtype='l')

    # generate random ground truth.
    t = np.random.randint(0, high=100, size=[3,1], dtype='l')

    # scale the input and output data.
    a = a/np.amax(a, axis=0) 
    t = t/100 
    error = []

    # create an instance of Neural_Network.
    NN = Neural_Network()
    for i in range(num_itr): 
        print("Input: \n" + str(a)) 
        print("Actual Output: \n" + str(t))
        print("Predicted Output: \n" + str(NN.forward(a)))
        print("Loss: \n" + str(np.mean(np.square(t - NN.forward(a)))))
        print("\n")
        NN.train(a, t)
        error.append(np.mean(np.square(t - NN.forward(a))))
    return error

                     
if __name__ == "__main__":
    error = main()
    

Input: 
[[ 0.          1.        ]
 [ 1.          0.66666667]
 [ 0.          0.55555556]]
Actual Output: 
[[ 0.98]
 [ 0.2 ]
 [ 0.76]]
Predicted Output: 
[[ 0.44251348]
 [ 0.40314625]
 [ 0.39954105]]
Loss: 
0.153363602623


Input: 
[[ 0.          1.        ]
 [ 1.          0.66666667]
 [ 0.          0.55555556]]
Actual Output: 
[[ 0.98]
 [ 0.2 ]
 [ 0.76]]
Predicted Output: 
[[ 0.50642244]
 [ 0.45643582]
 [ 0.45867129]]
Loss: 
0.126944676097


Input: 
[[ 0.          1.        ]
 [ 1.          0.66666667]
 [ 0.          0.55555556]]
Actual Output: 
[[ 0.98]
 [ 0.2 ]
 [ 0.76]]
Predicted Output: 
[[ 0.55649759]
 [ 0.49879451]
 [ 0.50583493]]
Loss: 
0.111077442844


Input: 
[[ 0.          1.        ]
 [ 1.          0.66666667]
 [ 0.          0.55555556]]
Actual Output: 
[[ 0.98]
 [ 0.2 ]
 [ 0.76]]
Predicted Output: 
[[ 0.59392119]
 [ 0.53077907]
 [ 0.54163769]]
Loss: 
0.102051247207


Input: 
[[ 0.          1.        ]
 [ 1.          0.66666667]
 [ 0.          0.55555556]]
Actual Output: 
[[

In [156]:
error

[0.12694467609673438,
 0.11107744284363841,
 0.10205124720674567,
 0.096946658618795409,
 0.093983621199345724,
 0.092175859411628092,
 0.090991815954297908,
 0.090144813401710697,
 0.089478576481777441,
 0.088906851926071076,
 0.088381543343839908,
 0.087875665707969272,
 0.087374067498225974,
 0.086868293741766578,
 0.0863536991536311,
 0.085827806653500718,
 0.0852893664992697,
 0.084737814951985582,
 0.084172963183402463,
 0.083594819834359332,
 0.083003491446118918,
 0.082399128255977075,
 0.081781896287546632,
 0.081151964509418556,
 0.080509500450102287,
 0.079854670386706447,
 0.07918764184434933,
 0.078508587104470617,
 0.077817686989077386,
 0.077115134522430503,
 0.076401138266107282,
 0.075675925234354208,
 0.074939743358211894,
 0.074192863499523129,
 0.073435581031926336,
 0.072668217012640868,
 0.07189111897061766,
 0.071104661336012911,
 0.070309245534383683,
 0.069505299767306031,
 0.068693278499676214,
 0.067873661672948463,
 0.06704695366303158,
 0.066213682001468682