In [9]:
import torch
import torch.nn as nn
from collections import OrderedDict

model1 = nn.Sequential(OrderedDict([
    ('hidden_net', nn.Linear(2,32)),
    ('hidden_act', nn.Sigmoid()),
    ('output_net', nn.Linear(32,2)),
    ('output_act', nn.Sigmoid())
]))
model2 = nn.Sequential(OrderedDict([
    ('hidden_net', nn.Linear(2,8)),
    ('hidden_act', nn.ReLU()),
    ('output_net', nn.Linear(8,2)),
    ('output_act', nn.Sigmoid())
]))
model3 = nn.Sequential(OrderedDict([
    ('hidden_net', nn.Linear(2,16)),
    ('hidden_act', nn.Sigmoid()),
    ('output_net', nn.Linear(16,2)),
    ('output_act', nn.Sigmoid())
]))

print(model1)
print(model2)
print(model3)

data_in = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float)
print(data_in)

data_target = torch.tensor([[0, 0], [0, 1], [0, 1], [1, 0]], dtype=torch.float)
print(data_target)

criterion1 = nn.MSELoss() 
optimizer1 = torch.optim.Adam(model1.parameters(), lr=0.01)
criterion2 = nn.MSELoss()
optimizer2 = torch.optim.SGD(model2.parameters(), lr=0.01, momentum=0.9)
criterion3 = nn.MSELoss()
optimizer3 = torch.optim.SGD(model3.parameters(), lr=0.01, momentum=0.9)

def train(model, inputs, outputs, criterion, optimizer):
    for epoch in range(100):
        optimizer.zero_grad()
        loss = criterion(model(inputs), outputs)
        loss.backward()
        optimizer.step()

for model in [model1, model2, model3]:
    if model == model1:
        criterion = criterion1
        optimizer = optimizer1
    if model == model2:
        criterion = criterion2
        optimizer = optimizer2
    if model == model3:
        criterion = criterion3
        optimizer = optimizer3
    train(model, data_in, data_target, criterion, optimizer)
    outputs = model(data_in)
    predicted = (outputs >=0.5).float()
    print(predicted)
    accuracy = (predicted == data_target).float().mean()
    print(f'Training Accuracy: {accuracy.item()*100}')

print('Weight of network1 :\n',model1[0].weight)
print('Weight of network2 :\n',model2[0].weight)
print('Weight of network3 :\n',model3[0].weight)

Sequential(
  (hidden_net): Linear(in_features=2, out_features=32, bias=True)
  (hidden_act): Sigmoid()
  (output_net): Linear(in_features=32, out_features=2, bias=True)
  (output_act): Sigmoid()
)
Sequential(
  (hidden_net): Linear(in_features=2, out_features=8, bias=True)
  (hidden_act): ReLU()
  (output_net): Linear(in_features=8, out_features=2, bias=True)
  (output_act): Sigmoid()
)
Sequential(
  (hidden_net): Linear(in_features=2, out_features=16, bias=True)
  (hidden_act): Sigmoid()
  (output_net): Linear(in_features=16, out_features=2, bias=True)
  (output_act): Sigmoid()
)
tensor([[0., 0.],
        [0., 1.],
        [1., 0.],
        [1., 1.]])
tensor([[0., 0.],
        [0., 1.],
        [0., 1.],
        [1., 0.]])
tensor([[0., 0.],
        [0., 1.],
        [0., 1.],
        [1., 1.]])
Training Accuracy: 87.5
tensor([[0., 0.],
        [0., 1.],
        [0., 0.],
        [0., 1.]])
Training Accuracy: 62.5
tensor([[0., 1.],
        [0., 0.],
        [0., 1.],
        [0., 0.]]