## A.I. Assignment 5

## Learning Goals

By the end of this lab, you should be able to:
* Get more familiar with tensors in pytorch 
* Create a simple multilayer perceptron model with pytorch
* Visualise the parameters


### Task

Build a fully connected feed forward network that adds two bits. Determine the a propper achitecture for this network (what database you use for this problem? how many layers? how many neurons on each layer? what is the activation function? what is the loss function? etc)

Create at least 3 such networks and compare their performance (How accurate are they? How fast are they trained to get at 1 accuracy?)

Display for the best one the weights for each layer.


In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from collections import OrderedDict

In [100]:
model1 = nn.Sequential(OrderedDict([
    ('hidden', nn.Linear(2, 4)),
    ('activation', nn.Sigmoid()),
    ('output', nn.Linear(4, 1)),
    ('sigmoid', nn.Sigmoid())
]))

model2 = nn.Sequential(OrderedDict([
    ('hidden', nn.Linear(2, 8)),
    ('activation', nn.ReLU()),
    ('output', nn.Linear(8, 1)),
    ('softmax', nn.Softmax(dim=1))
]))

model3 = nn.Sequential(OrderedDict([
    ('fct1', nn.Linear(2, 16)),
    ('relu1', nn.ReLU()),
    ('fct2', nn.Linear(16, 8)),
    ('relu2', nn.ReLU()),
    ('fct3', nn.Linear(8, 4)),
    ('relu3', nn.ReLU()),
    ('fct4', nn.Linear(4, 1)),
    ('softmax', nn.Softmax(dim=1))
]))

In [101]:
print(model1)
print(model2)
print(model3)

Sequential(
  (hidden): Linear(in_features=2, out_features=4, bias=True)
  (activation): Sigmoid()
  (output): Linear(in_features=4, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)
Sequential(
  (hidden): Linear(in_features=2, out_features=8, bias=True)
  (activation): ReLU()
  (output): Linear(in_features=8, out_features=1, bias=True)
  (softmax): Softmax(dim=1)
)
Sequential(
  (fct1): Linear(in_features=2, out_features=16, bias=True)
  (relu1): ReLU()
  (fct2): Linear(in_features=16, out_features=8, bias=True)
  (relu2): ReLU()
  (fct3): Linear(in_features=8, out_features=4, bias=True)
  (relu3): ReLU()
  (fct4): Linear(in_features=4, out_features=1, bias=True)
  (softmax): Softmax(dim=1)
)


In [102]:
data_in = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32)

print(data_in)

tensor([[0., 0.],
        [0., 1.],
        [1., 0.],
        [1., 1.]])


In [103]:
data_target = torch.tensor([[0], [1], [1], [0]], dtype=torch.float32)
    
print(data_target)

tensor([[0.],
        [1.],
        [1.],
        [0.]])


In [111]:
criterion = nn.BCEWithLogitsLoss()

optimizer1 = optim.SGD(model1.parameters(), lr=0.1)
optimizer2 = optim.SGD(model2.parameters(), lr=0.1)
optimizer3 = optim.SGD(model3.parameters(), lr=0.1)

In [112]:
# Train the model

num_epochs = 1000

# First model
print("1st model:")
for epoch in range(num_epochs):
    # Forward pass
    outputs_pred = model1(data_in)
    loss = criterion(outputs_pred, data_target)
    
    # Backward and optimize
    optimizer1.zero_grad()
    loss.backward()
    optimizer1.step()
    
    # Print training progress
    if (epoch+1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Second model
print("\n2nd model:")
for epoch in range(num_epochs):
    # Forward pass
    outputs_pred = model2(data_in)
    loss = criterion(outputs_pred, data_target)
    
    # Backward and optimize
    optimizer2.zero_grad()
    loss.backward()
    optimizer2.step()
    
    # Print training progress
    if (epoch+1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Third model
print("\n3rd model:")
for epoch in range(num_epochs):
    # Forward pass
    outputs_pred = model3(data_in)
    loss = criterion(outputs_pred, data_target)
    
    # Backward and optimize
    optimizer3.zero_grad()
    loss.backward()
    optimizer3.step()
    
    # Print training progress
    if (epoch+1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

1st model:
Epoch [100/1000], Loss: 0.6938
Epoch [200/1000], Loss: 0.6937
Epoch [300/1000], Loss: 0.6937
Epoch [400/1000], Loss: 0.6936
Epoch [500/1000], Loss: 0.6936
Epoch [600/1000], Loss: 0.6936
Epoch [700/1000], Loss: 0.6935
Epoch [800/1000], Loss: 0.6935
Epoch [900/1000], Loss: 0.6935
Epoch [1000/1000], Loss: 0.6934

2nd model:
Epoch [100/1000], Loss: 0.8133
Epoch [200/1000], Loss: 0.8133
Epoch [300/1000], Loss: 0.8133
Epoch [400/1000], Loss: 0.8133
Epoch [500/1000], Loss: 0.8133
Epoch [600/1000], Loss: 0.8133
Epoch [700/1000], Loss: 0.8133
Epoch [800/1000], Loss: 0.8133
Epoch [900/1000], Loss: 0.8133
Epoch [1000/1000], Loss: 0.8133

3rd model:
Epoch [100/1000], Loss: 0.8133
Epoch [200/1000], Loss: 0.8133
Epoch [300/1000], Loss: 0.8133
Epoch [400/1000], Loss: 0.8133
Epoch [500/1000], Loss: 0.8133
Epoch [600/1000], Loss: 0.8133
Epoch [700/1000], Loss: 0.8133
Epoch [800/1000], Loss: 0.8133
Epoch [900/1000], Loss: 0.8133
Epoch [1000/1000], Loss: 0.8133


In [113]:
# Visualize results

# First model
with torch.no_grad():
    outputs = model1(data_in)
    predicted = (outputs >= 0.5).float()
    accuracy = (predicted == data_target).float().mean()
    print(f'1st model training accuracy: {accuracy.item():.4f}')
    
# Second model
with torch.no_grad():
    outputs = model2(data_in)
    predicted = (outputs >= 0.5).float()
    accuracy = (predicted == data_target).float().mean()
    print(f'2nd model training accuracy: {accuracy.item():.4f}')
    
# Third model
with torch.no_grad():
    outputs = model3(data_in)
    predicted = (outputs >= 0.5).float()
    accuracy = (predicted == data_target).float().mean()
    print(f'3rd model training accuracy: {accuracy.item():.4f}')

1st model training accuracy: 0.5000
2nd model training accuracy: 0.5000
3rd model training accuracy: 0.5000


In [114]:
# Print model weights

# First model
print("1st model:")
for name, param in model1.named_parameters():
    if param.requires_grad:
        print(name, param.data)
        
# First model
print("\n2nd model:")
for name, param in model2.named_parameters():
    if param.requires_grad:
        print(name, param.data)
        
# First model
print("\n3rd model:")
for name, param in model3.named_parameters():
    if param.requires_grad:
        print(name, param.data)

1st model:
hidden.weight tensor([[-0.4038, -0.5261],
        [-0.3002,  0.8728],
        [-0.6567, -0.4243],
        [-0.0779, -0.2058]])
hidden.bias tensor([-0.3078,  0.0435,  0.2972,  0.1101])
output.weight tensor([[-0.6715, -1.1204, -0.2738, -1.0208]])
output.bias tensor([-1.1413])

2nd model:
hidden.weight tensor([[-0.5544,  0.6763],
        [-0.6055, -0.1584],
        [-0.5447, -0.0417],
        [ 0.2396, -0.4130],
        [ 0.0035, -0.4673],
        [-0.5746, -0.6365],
        [-0.0218,  0.2395],
        [ 0.5668,  0.0653]])
hidden.bias tensor([ 0.0156, -0.0953,  0.6489, -0.3200,  0.2998,  0.1583,  0.2450,  0.2611])
output.weight tensor([[ 0.0423,  0.0156, -0.0244,  0.3095, -0.0535,  0.1309,  0.1520, -0.3511]])
output.bias tensor([0.3312])

3rd model:
fct1.weight tensor([[-0.3742,  0.0128],
        [-0.6855, -0.4097],
        [-0.2969,  0.3090],
        [ 0.6011,  0.5011],
        [ 0.6920,  0.2614],
        [ 0.4503, -0.4065],
        [-0.2840, -0.5224],
        [ 0.3873,  0.361