<a href="https://colab.research.google.com/github/amysen/BR_Algothon/blob/master/Simple_MLP_XOR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
# Import the necessary libraries
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

'''
Set the seed for generating random numbers in PyTorch.
Ensures that any random number generated after this point follows a specific sequence / is reproducible.
This is crucial for debugging, comparing results, and ensuring consistency in experiments.
'''
torch.manual_seed(42)

<torch._C.Generator at 0x7b11b427c2b0>

Video explaining mathematics behind linear layers [[link]](https://www.youtube.com/watch?v=QpyXyenmtTA)

In [22]:
# Define a simple Multi-Layer Perceptron (MLP) class that inherits from nn.Module
class simpleMLP(nn.Module):

  def __init__(self):
    # Call the parent class's constructor
    super(simpleMLP, self).__init__()

    # Define the first fully connected layer (fc1)
    # This layer takes an input of size 2 and outputs a size of 4
    self.fc1 = nn.Linear(2, 4)

    # Define the second fully connected layer (fc2)
    # This layer takes an input of size 4 and outputs a size of 2
    self.fc2 = nn.Linear(4, 2)

    # Define the third fully connected layer (fc3)
    # This layer takes an input of size 2 and outputs a size of 1
    self.fc3 = nn.Linear(2, 1)

  # Define the forward pass of the network
  # This method defines how the input data flows through the network
  def forward(self, x):
    # activation functions introduce non-linearity in our network
    x = torch.relu(self.fc1(x))
    x = torch.relu(self.fc2(x))
    x = torch.sigmoid(self.fc3(x))
    return x

In [23]:
# Create an instance of the simpleMLP class
model = simpleMLP()
print(model)

simpleMLP(
  (fc1): Linear(in_features=2, out_features=4, bias=True)
  (fc2): Linear(in_features=4, out_features=2, bias=True)
  (fc3): Linear(in_features=2, out_features=1, bias=True)
)


In [24]:
def weights_init(model):
  for m in model.modules():
    if isinstance(m, nn.Linear):
      # initialize the weight tensor, using a normal distribution
      m.weight.data.normal_(0,1)

weights_init(model)

In [7]:
# Create a sample input tensor with 2 features (as the first layer takes an input of size 2)
input_tensor = torch.tensor([[1.0, 2.0]])

# Perform a forward pass through the network with the input tensor
output = model(input_tensor)

#The output is the result of linear transformations applied to the input data.
print(output)

tensor([[-0.1369]], grad_fn=<AddmmBackward0>)


# Training network to act as an XOR Gate

In [25]:
# XOR Data
X = torch.tensor([[0,0],[0,1],[1,0],[1,1]], dtype=torch.float32)
y = torch.tensor([[0],[1],[1],[0]], dtype=torch.float32)

# create TensorDataset
dataset = TensorDataset(X, y)

# create DataLoader for batch processing
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

In [14]:
dataloader.dataset.tensors

(tensor([[0., 0.],
         [0., 1.],
         [1., 0.],
         [1., 1.]]),
 tensor([[0.],
         [1.],
         [1.],
         [0.]]))

# Training the model

In [27]:
loss_function = nn.MSELoss() # Mean Squared Error Loss
optimizer = torch.optim.SGD(model.parameters(), lr=0.01) # Stochastic Gradient Descent

# Training loop
for epoch in range(10000):

  loss_ith_epoch = 0.0

  for batch_idx, (X_batch, y_batch) in enumerate(dataloader):
    # Forward pass: Computer predicted y
    y_pred = model(X_batch)

    # Compute loss
    loss = loss_function(y_pred, y_batch)

    optimizer.zero_grad()

    # perform a backward pass (calculate ∂L/∂w -> L: loss, w: weights)
    loss.backward()

    # update the weights: w = w - α(∂L/∂w) [α is the learning rate of 0.01]
    optimizer.step()

    loss_ith_epoch += loss.item()

  # Logging
  if (epoch+1) % 1000 == 0:
    print(f'Epoch [{epoch+1}/10000], Loss: {loss_ith_epoch/len(dataloader):.4f}')
    loss_ith_epoch = 0.0


Epoch [1000/10000], Loss: 0.2355
Epoch [2000/10000], Loss: 0.0932
Epoch [3000/10000], Loss: 0.0265
Epoch [4000/10000], Loss: 0.0130
Epoch [5000/10000], Loss: 0.0083
Epoch [6000/10000], Loss: 0.0059
Epoch [7000/10000], Loss: 0.0046
Epoch [8000/10000], Loss: 0.0037
Epoch [9000/10000], Loss: 0.0031
Epoch [10000/10000], Loss: 0.0026


In [28]:
# Threshold the outputs to get binary predictions
threshold = 0.5
predictions = (model(X) > threshold).float()

print('*****Truth Table*****')
# Print the input-output pairs
for input, pred in zip(X, predictions):
  print(f'XOR Input: {input.numpy()}, Output: {pred.item()}')

*****Truth Table*****
XOR Input: [0. 0.], Output: 0.0
XOR Input: [0. 1.], Output: 1.0
XOR Input: [1. 0.], Output: 1.0
XOR Input: [1. 1.], Output: 0.0
