In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from collections import OrderedDict

# Define the dataset (data_in, data_target)
bits = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float)
sums = torch.tensor([[0,0], [0,1], [0,1], [1,0]], dtype=torch.float)

print("--------------------------------------------")
print("DATASET")
print("--------------------------------------------")
print("DATA IN")
print(bits)
print("--------------------------------------------")
print("DATA TARGET")
print(sums)
print("--------------------------------------------")
print("MODELS")
print("--------------------------------------------")
print("MODEL 1")
# Define the models
model1 = nn.Sequential(OrderedDict([
             ('fc1', nn.Linear(2, 8)),
            ('relu1', nn.ReLU()),
            ('fc2', nn.Linear(8, 4)),
            ('relu2', nn.ReLU()),
            ('fc3', nn.Linear(4, 6)),
            ('relu3', nn.ReLU()),
            ('fc4', nn.Linear(6, 2)),
            ('sigmoid', nn.Sigmoid())
        ]))
print(model1)
print("--------------------------------------------")
print("MODEL 2")
model2 = nn.Sequential(OrderedDict([            
            ('fc1', nn.Linear(2, 4)),            
            ('relu1', nn.ReLU()),            
            ('fc2', nn.Linear(4, 2)),            
            ('relu2', nn.ReLU()),            
            ('fc3', nn.Linear(2, 2)),            
            ('sigmoid1', nn.Sigmoid())        
        ]))
print(model2)
print("--------------------------------------------")
print("MODEL 3")
model3 =  nn.Sequential(OrderedDict([
            ('fc1', nn.Linear(2, 16)),
            ('relu1', nn.ReLU()),
            ('fc2', nn.Linear(16, 8)),
            ('relu2', nn.ReLU()),
            ('fc3', nn.Linear(8, 4)),
            ('relu3', nn.ReLU()),
            ('fc4', nn.Linear(4, 2)),
            ('sigmoid', nn.Sigmoid())
        ]))
print(model3)

# Define the criterion and optimizer
criterion = nn.MSELoss()
optimizer1 = optim.SGD(model1.parameters(), lr=0.1)
optimizer2 = optim.SGD(model2.parameters(), lr=0.1)
optimizer3 = optim.SGD(model3.parameters(), lr=0.1)

# Train the models
num_epochs = 10000

print("--------------------------------------------")
print("EPOCHS")

for epoch in range(num_epochs):
    # Train model1
    optimizer1.zero_grad()
    y_pred1 = model1(bits)
    loss1 = criterion(y_pred1, sums)
    loss1.backward()
    optimizer1.step()

    # Train model2
    optimizer2.zero_grad()
    y_pred2 = model2(bits)
    loss2 = criterion(y_pred2, sums)
    loss2.backward()
    optimizer2.step()

    # Train model3
    optimizer3.zero_grad()
    y_pred3 = model3(bits)
    loss3 = criterion(y_pred3, sums)
    loss3.backward()
    optimizer3.step()
    if epoch % 1000 == 0:
        print("--------------------------------------------")
        print(f"Epoch {epoch}, loss1: {loss1.item()}, loss2: {loss2.item()}, loss3: {loss3.item()}")

# Test the models
with torch.no_grad():
    y_pred1 = model1(bits)
    y_pred1 = torch.round(y_pred1)
    y_pred2 = model2(bits)
    y_pred2 = torch.round(y_pred2)
    y_pred3 = model3(bits)
    y_pred3 = torch.round(y_pred3)

    
    acc1 = (y_pred1 == sums).sum().item() / (len(sums) * len(sums[0]))
    acc2 = (y_pred2 == sums).sum().item() / (len(sums) * len(sums[0]))
    acc3 = (y_pred3 == sums).sum().item() / (len(sums) * len(sums[0]))
    
    print("--------------------------------------------")
    print("ACCURACY")
    print("--------------------------------------------")
    print(f"Accuracy1: {acc1*100}%")
    print(f"Accuracy2: {acc2*100}%")
    print(f"Accuracy3: {acc3*100}%")
    print("--------------------------------------------")

print("WEIGHTS")
# Weights for all models
for name, param in model1.named_parameters():
    if param.requires_grad:
        print("--------------------------------------------")
        print(name, param.data)

for name, param in model2.named_parameters():
    if param.requires_grad:
        print("--------------------------------------------")
        print(name, param.data)

for name, param in model3.named_parameters():
    if param.requires_grad:
        print("--------------------------------------------")
        print(name, param.data)

# Print the model weights for the best model
if acc1 > acc2 and acc1 > acc3:
    best_model = model1
elif acc2 > acc1 and acc2 > acc3:
    best_model = model2
else:
    best_model = model3

print("BEST MODEL")
for name, param in best_model.named_parameters():
    if param.requires_grad:
        print("--------------------------------------------")
        print(name, param.data)

--------------------------------------------
DATASET
--------------------------------------------
DATA IN
tensor([[0., 0.],
        [0., 1.],
        [1., 0.],
        [1., 1.]])
--------------------------------------------
DATA TARGET
tensor([[0., 0.],
        [0., 1.],
        [0., 1.],
        [1., 0.]])
--------------------------------------------
MODELS
--------------------------------------------
MODEL 1
Sequential(
  (fc1): Linear(in_features=2, out_features=8, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=8, out_features=4, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=4, out_features=6, bias=True)
  (relu3): ReLU()
  (fc4): Linear(in_features=6, out_features=2, bias=True)
  (sigmoid): Sigmoid()
)
--------------------------------------------
MODEL 2
Sequential(
  (fc1): Linear(in_features=2, out_features=4, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=4, out_features=2, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=2, out_features=2,