## XOR implemented in a single layer shallow nn

This nn build is a takeway from the previous notebook, where we were only able to predict xor test case with accuracy of 50%. Here we will build a shallow nn with and try to improve the accuracy of the predicition. 

In [1]:
import numpy as np

In [2]:
#Create the Dataset. x_orig is the training set. y_orig contains the label

x_orig = np.array([[[0],[0]],[[0],[1]], [[1],[0]],[[1],[1]]])
y_orig = np.array([[0],[1],[1],[0]])

In [4]:
# Change the input to fit the model

x = x_orig.reshape(x_orig.shape[0], -1).T

y = y_orig.reshape(y_orig.shape[0], -1).T

In [6]:
# Setting up the parameters. Here we will use two-layer nn. The hidden layer will be 4 unit.

def init_params(n_x,n_h,n_y):
    """
     Arguments:
     n_x : the size of the input layer
     n_h : the size of the hidden layer
     n_y : the size of the output layer
     
     Will return a dictionary containing the parameters W1, b1, W2, b2
    """
    
    np.random.seed(2)
    
    W1 = np.random.randn(n_h,n_x) * 0.01
    b1 = np.zeros((n_h,1))
    W2 = np.random.randn(n_y,n_h) * 0.01
    b2 = np.zeros(((n_y,1)))
    
    paramters = {"W1": W1,
                 "b1": b1,
                 "W2": W2,
                 "b2": b2
                }
    
    return parameters


In [7]:
# define Sigmoid function

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [8]:
# forward propogation

def forward(parameters, X):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    
    Z1 = np.dot(W1,X) + b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(W2,A1) + b2
    A2 = sigmoid(Z2)
    
    cache = {"Z1": Z1,
             "A1": A1,
             "Z2": Z2,
             "A2": A2
            }
    return A2, cache

In [10]:
# compute the cost

def compute_cost(A2,Y):
    logprobs = np.multiply(np.log(A2), Y) + np.multiply((1 - Y), np.log(1 - A2))
    cost = - np.sum(logprobs) / m
    
    return cost

In [13]:
# backward propogation

def backward(X,Y,parameters,cache):
    m = X.shape[1]
    
    W1 = parameters["W1"]
    W2 = parameters["W2"]
    A1 = cache["A1"]
    A2 = cache["A2"]
    
    dZ2 = A2 -Y
    dW2 = np.dot(dZ2, A1.T) / m 
    db2 = np.sum(dZ2, axis=1,keepdims=True) / m
    dZ1 = np.multiply(np.dot(W2.T,dZ2) , (1 - np.power(A1,2)))
    dW1 = np.dot(dZ1,X.T) /m
    db1 = np.sum(dZ1,axis=1,keepdims= True)
    
    grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2}
    
    return grads

In [12]:
#update parameters

def update_parameters(parameters, grads, learning_rate = 1.2):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    dW1 = grads["dW1"]
    db1 = grads["db1"]
    dW2 = grads["dW2"]
    db2 = grads["db2"]

    W1 = W1 - ( learning_rate * dW1)
    b1 = b1 - ( learning_rate * db1)
    W2 = W2 - ( learning_rate * dW2)
    b2 = b2 - ( learning_rate * db2)
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters
