<a href="https://colab.research.google.com/github/aasem/cvisionmcs/blob/main/mlp_XOR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Solving XOR (2 Layer MLP)


**Define the inputs and expected output**

In [2]:
import numpy as np
X = np.array([[0,0],[0,1],[1,0],[1,1]]) # input data
Y = np.array([[0,1,1,0]]).T # target output
print('input data:\n', X)
print('output data:\n', Y)
print('input dimension: ', np.shape(X))
print('output dimension: ', np.shape(Y))

input data:
 [[0 0]
 [0 1]
 [1 0]
 [1 1]]
output data:
 [[0]
 [1]
 [1]
 [0]]
input dimension:  (4, 2)
output dimension:  (4, 1)


**Definition of Network and Backpropagation Algorithm**

In [45]:
def cost_fn(y, t):
  return t - y

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def fwdpass(X, W1, W2, b1, b2):
  L1 = sigmoid(np.dot(X,W1) + b1)       # layer 1
  L2 = sigmoid(np.dot(L1,W2) + b2)       # layer 2 
  return L1, L2

def sigmoid_derivative(x):
    return x * (1 - x)

def backpass(L1, L2, L2_error, W2):
  delta_L2 = L2_error * sigmoid_derivative(L2)
  L1_error = np.dot(delta_L2, W2.T)
  delta_L1 = L1_error * sigmoid_derivative(L1)
  return delta_L1, delta_L2, L1_error

def init(input_size, hidden_size, output_size, seed=0):
  init_model = {}
  np.random.seed(seed)
  init_model['W1'] = np.random.random((input_size, hidden_size))
  init_model['W2'] = np.random.random((hidden_size, output_size))
  init_model['b1'] = np.array([0]) 
  init_model['b2'] = np.array([0])
  return init_model

def train(X, Y, init_model, num_epochs, lr=1.0, seed=0):
  final_model = {}
  for epoch_n in range(num_epochs):
    W1 = init_model['W1']
    W2 = init_model['W2']
    b1 = init_model['b1']
    b2 = init_model['b2']
    L1, L2 = fwdpass(X, W1, W2, b1, b2)
    L2_error = cost_fn(L2, Y)
    delta_L1, delta_L2, L1_error = backpass(L1, L2, L2_error, W2)
    W2 += lr * np.dot(L1.T, delta_L2)
    W1 += lr * np.dot(X.T, delta_L1)
  
  final_model['W1'] = W1
  final_model['W2'] = W2
  final_model['b1'] = b1
  final_model['b2'] = b2
  return final_model

def predict(X, Y, model):
  for x, y in zip(X, Y):
    W1, b1 = model['W1'], model['b1']
    W2, b2 = model['W2'], model['b2']
    L1_prediction = sigmoid(np.dot(W1.T, x))
    prediction = sigmoid(np.dot(W2.T, L1_prediction))
    print('prediction = %f; ground truth = %d' % (prediction, y))

**Inititialization and Training**

In [46]:
init_model = init(input_size=2, hidden_size=2, output_size=1, seed=0)
model = train(X, Y, init_model, num_epochs=10000, lr=1.0, seed=0)
for param, value in model.items():
    print(param, ' : \n', value)
    print('dimension: ', np.shape(value))

W1  : 
 [[0.92657028 7.72488708]
 [0.92657052 7.72501832]]
dimension:  (2, 2)
W2  : 
 [[-33.17273965]
 [ 26.38316554]]
dimension:  (2, 1)
b1  : 
 [0]
dimension:  (1,)
b2  : 
 [0]
dimension:  (1,)


In [47]:
predict(X, Y, model)

prediction = 0.032459; ground truth = 0
prediction = 0.931328; ground truth = 1
prediction = 0.931328; ground truth = 1
prediction = 0.091578; ground truth = 0
