In [2]:
import numpy as np
import pandas as pd
import math

### Q.1) Write python code to implement a neural network with one hidden layer forclassifying an XOR gate. Implement the backpropagation algorithm for this case from scratch. Use two neurons for the hidden layer.

In [3]:
X=np.array([[1,1],[1,0],[0,1],[0,0]])
x_train=X.T
print(x_train.shape)
print(x_train)

(2, 4)
[[1 1 0 0]
 [1 0 1 0]]


In [4]:
Y=np.array([0,1,1,0]).reshape(4,1)
y_train=Y.T
print(y_train.shape)
print(y_train)

(1, 4)
[[0 1 1 0]]


In [5]:
def relu(z):
    return np.maximum(0,z)

In [6]:
def derivative_relu(z):
    return 1*(z>0)

In [7]:
def sigmoid_function(z):
    return 1/(1+np.exp(-z))


In [8]:
def derivative_sigmoid(z):
    return sigmoid(z)*(1-sigmoid(z))

In [9]:
def binary_cross_entropy(y,ycap):
    m=y.shape[1]
    cost=-(1/m)*np.sum(y*np.log(ycap))

    return cost

In [10]:
def random_initialization(layer_attributes):
    
    L=len(layer_attributes)-1
    W=[]
    B=[]
    for i in range(1,L+1):
        weight_i = np.random.randn(layer_attributes[i],layer_attributes[i-1])
        bias_i=np.zeros((layer_attributes[i],1))
        W.append(weight_i)
        B.append(bias_i)
    return W,B
        

In [11]:
#FORWARD PROPOGATION
def forward_propagation(x,w,b):

    A=[]
    Z=[]
    length=len(w)
    A.append(x)
    #hidden layer
    for i in range(length-1):
        z_i=np.dot(w[i],A[-1])+b[i]
        Z.append(z_i)
        a_i = relu(z_i)
        A.append(a_i)
    #output layer
    z_l = np.dot(w[-1],A[-1]) + b[-1]
    a_l = sigmoid_function(z_l)
    A.append(a_l)
    Z.append(z_l)

    return Z,A

In [12]:
#BACK PROPOGATION
def back_prop(A,y,W,B,Z):
    m=y.shape[1]
    L=len(W)
    dW=[]
    dB=[]
    dZ=[]
    #Output Layer
    dZ.append((A[-1]-y))
    dB.append((1/m)*np.sum(dZ[-1],axis=1,keepdims=True))
    dW.append((1/m)*(np.dot(dZ[-1],A[-2].T)))
   
    #Hidden layers
    l=L-1
    while l >0:
        dz_l = (1/m)*np.dot(W[l].T,dZ[-1])*derivative_relu(A[l])
        db_l = (1/m)*np.sum(dz_l,axis=1,keepdims=True)
        dw_l = (1/m)*np.dot(dz_l,A[l-1].T)
        dW.append(dw_l)
        dB.append(db_l)
        dZ.append(dz_l)
        l=l-1
            
    return dZ[::-1],dW[::-1],dB[::-1]

In [13]:
def gradient_descent(W,B,dW,dB,learning_rate):
    
    alpha=learning_rate
    length=len(W)
    for i in range(length):
        W[i] = W[i] - alpha*dW[i]
        B[i] = B[i] - alpha*dB[i]
    return W,B

In [14]:
def neural_network(x_train,y_train,learning_rate = 0.1,epochs = 2000,num_hidden_layers = 1,neurons=3):

    layer=[]
    n,m=x_train.shape
    J_train=[]
    layer.append(x_train.shape[0])
    for i in range(num_hidden_layers):
        layer.append(neurons)
    layer.append(y_train.shape[0])
    print(f'neuron configuration: {layer}')
    w,b=random_initialization(layer)
    for j in range(epochs):
        z,a = forward_propagation(x_train,w,b)
        dz,dw,db=back_prop(a,y_train,w,b,z)
        w,b=gradient_descent(w,b,dw,db,learning_rate)
        cost_train=binary_cross_entropy(y_train,a[-1])
        J_train.append(cost_train)
        if j%(epochs/10)==0:
            print(f' \n epoch:{j:4d}  Train error: {J_train[-1]:8.2f} ')
            
    return w,b,a  

In [21]:
w,b,a=neural_network(x_train,y_train,neurons=2,epochs=10000)


neuron configuration: [2, 2, 1]
 
 epoch:   0  Train error:     0.06 
 
 epoch:1000  Train error:     0.23 
 
 epoch:2000  Train error:     0.03 
 
 epoch:3000  Train error:     0.01 
 
 epoch:4000  Train error:     0.01 
 
 epoch:5000  Train error:     0.00 
 
 epoch:6000  Train error:     0.00 
 
 epoch:7000  Train error:     0.00 
 
 epoch:8000  Train error:     0.00 
 
 epoch:9000  Train error:     0.00 


In [22]:
z,a=forward_propagation(x_train,w,b)
print(a[-1])
y_pred_actual=a[-1]
b=np.zeros((a[-1].shape[1]))
for i in range(b.shape[0]):
    if a[-1][:,i]>=0.5:
        b[i]=1
    elif a[-1][:,i]<0.5:
        b[i]=0
print(b)

    

[[0.00169547 0.99758522 0.99758524 0.00836327]]
[0. 1. 1. 0.]


In [23]:
y_train=np.squeeze(y_train)
y_pred_actual=np.squeeze(y_pred_actual)

In [24]:
'''TRUTH TABLE'''
p=X[:,0]
q=X[:,1]
outputdf = pd.DataFrame({'X1':p,'X2':q,'Y_TRUE':y_train,'SIGMOID OUTPUT':y_pred_actual,'Y_PRED':b})
outputdf

Unnamed: 0,X1,X2,Y_TRUE,SIGMOID OUTPUT,Y_PRED
0,1,1,0,0.001695,0.0
1,1,0,1,0.997585,1.0
2,0,1,1,0.997585,1.0
3,0,0,0,0.008363,0.0


### INFERENCES
The neural network to find the Truth table for XOR gate has been implemented from scratch without the use of any automatic-differentiation packages such as Tensorflow or PyTorch.
The activation function used in the hidden layer is ReLu function and in the output layer is Sigmoid function.
#### NOTE: Since the data points are very less(4) and the given problem is a classification problem, it was found out that the output depends heavily on the weight initializations. So, using np.random.randn() function, there are chances that in some initializations, the neural network wont converge to required results which can be seen from the cost value as it will be high and constant with increasing epochs. To rectify this, the weights should be initialized in such a way that the relu function in the 2 neurons of the input layer can form a non-linear decision boundary which seperates the given 4 datapoints.