In [17]:
### Defining the model
import numpy as np
import torch
import torch.nn as nn

xor_mlp_pytorch = nn.Sequential(
    nn.Linear(2, 3),
    nn.ReLU(),
    nn.Linear(3,3),
    nn.ReLU(),
    nn.Linear(3, 3),
    nn.ReLU(),
    nn.Linear(3, 1),
    nn.Sigmoid()
)

In [4]:
### Initialize weights

def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)

### Apply the initialization to the model
xor_mlp_pytorch.apply(init_weights)

Sequential(
  (0): Linear(in_features=2, out_features=3, bias=True)
  (1): ReLU()
  (2): Linear(in_features=3, out_features=3, bias=True)
  (3): ReLU()
  (4): Linear(in_features=3, out_features=3, bias=True)
  (5): ReLU()
  (6): Linear(in_features=3, out_features=1, bias=True)
  (7): Sigmoid()
)

In [5]:
### Loss function
loss_fn = nn.BCELoss()

In [6]:
### Optimizer SGD
optimizer = torch.optim.SGD(xor_mlp_pytorch.parameters(), lr=0.05)

In [10]:
### Training

### Define our data
input_data_tensor = torch.tensor([[0, 0], [0, 1], [1, 1], [1, 0]], dtype=torch.float)
targets_tensor = torch.unsqueeze(
    torch.tensor([0, 1, 1, 0], dtype=torch.float), 1
)

### Training loop
epochs = 3000
for epoch in range(epochs):
    optimizer.zero_grad()
    if epoch < 2:
        print("*************** Values after zero_grad **************")
        print(xor_mlp_pytorch[0].weight)
        print(xor_mlp_pytorch[0].weight.grad)

    output = xor_mlp_pytorch(input_data_tensor)

    if epoch < 2:
        print("*************** Values after forward ***************")
        print(xor_mlp_pytorch[0].weight)
        print(xor_mlp_pytorch[0].weight.grad)
   
    loss = loss_fn(output, targets_tensor)

    if epoch < 2:
        print("*************** Values after loss calculation ***************")
        print(xor_mlp_pytorch[0].weight)
        print(xor_mlp_pytorch[0].weight.grad)

    loss.backward

    if epoch < 2:
        print("*************** Values after backward ***************")
        print(xor_mlp_pytorch[0].weight)
        print(xor_mlp_pytorch[0].weight.grad)

    optimizer.step

    if epoch < 2:
        print("*************** Values after weight update ")
        print(xor_mlp_pytorch[0].weight)
        print(xor_mlp_pytorch[0].weight.grad)
        
        

*************** Values after zero_grad **************
Parameter containing:
tensor([[-0.8245,  0.1991],
        [ 0.5883,  0.0505],
        [-0.6782,  0.3601]], requires_grad=True)
None
*************** Values after forward ***************
Parameter containing:
tensor([[-0.8245,  0.1991],
        [ 0.5883,  0.0505],
        [-0.6782,  0.3601]], requires_grad=True)
None
*************** Values after loss calculation ***************
Parameter containing:
tensor([[-0.8245,  0.1991],
        [ 0.5883,  0.0505],
        [-0.6782,  0.3601]], requires_grad=True)
None
*************** Values after backward ***************
Parameter containing:
tensor([[-0.8245,  0.1991],
        [ 0.5883,  0.0505],
        [-0.6782,  0.3601]], requires_grad=True)
None
*************** Values after weight update 
Parameter containing:
tensor([[-0.8245,  0.1991],
        [ 0.5883,  0.0505],
        [-0.6782,  0.3601]], requires_grad=True)
None
*************** Values after zero_grad **************
Parameter containin

In [14]:
### Follow the loss in the backward direction, using its .grad_fn attribute too see the computation graph:

print(loss.grad_fn)
print(loss.grad_fn.next_functions[0][0])
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])

<BinaryCrossEntropyBackward0 object at 0x000001A032266C50>
<SigmoidBackward0 object at 0x000001A0313B63B0>
<AddmmBackward0 object at 0x000001A032C023E0>


In [15]:
def accuracy(predictions, targets, threshold=0.5):
    predictions = (predictions >= threshold).astype(int)
    accuracy = np.mean(predictions == targets)
    return accuracy

In [18]:
### Evaluation

predictions = xor_mlp_pytorch(input_data_tensor)
accuracy_value = accuracy(
    predictions.detach().numpy(), targets_tensor.numpy(), threshold=0.5
)
print(accuracy_value)

0.5
