In [29]:
import torch
import torch.nn as nn

In [2]:
x = torch.tensor([[0,0], [0,1], [1,0], [1,1]], dtype=torch.float32)
y = torch.tensor([0,1,1,0], dtype=torch.long)

In [3]:
print("data tensor:", x)
print("data size:", x.shape)

data tensor: tensor([[0., 0.],
        [0., 1.],
        [1., 0.],
        [1., 1.]])
data size: torch.Size([4, 2])


In [4]:
print("target tensor:", y)
print("target size:", y.shape)

target tensor: tensor([0, 1, 1, 0])
target size: torch.Size([4])


In [5]:
Y = torch.tensor([[0],[1],[1],[0]], dtype=torch.long)
print("target tensor:", Y)
print("target size:", Y.shape)

target tensor: tensor([[0],
        [1],
        [1],
        [0]])
target size: torch.Size([4, 1])


In [6]:
model = nn.Sequential(nn.Linear(2, 2),
                      nn.Sigmoid(),
                      nn.Linear(2, 1))

In [7]:
print(model.parameters)
for w in model.parameters():
    print("original:", w.data)
    print("modified:", w.data.sub(0.001))

<bound method Module.parameters of Sequential(
  (0): Linear(in_features=2, out_features=2, bias=True)
  (1): Sigmoid()
  (2): Linear(in_features=2, out_features=1, bias=True)
)>
original: tensor([[-0.1363,  0.3613],
        [ 0.4285,  0.4860]])
modified: tensor([[-0.1373,  0.3603],
        [ 0.4275,  0.4850]])
original: tensor([0.2516, 0.7032])
modified: tensor([0.2506, 0.7022])
original: tensor([[-0.4276, -0.3512]])
modified: tensor([[-0.4286, -0.3522]])
original: tensor([0.6786])
modified: tensor([0.6776])


In [27]:

lr = 0.01
epochs = 1000

for epoch in range(epochs):
    #forward pass
    out = model.forward(x)
#     print("the size of the model output", out.size())
    
    #calculate residuals using MSE
#     print("residuals:", out - Y)
#     print("residuals squared:", (out-Y)**2)
#     print("residuals mean error", torch.mean((out - Y)**2))
    
    loss = torch.mean((out - Y)**2)
#     print(loss)
#     print("the size of the loss", loss.size())
    
    #backpropogate error
    loss.backward()
    
    #update model parameters using gradient descent
    for w in model.parameters():
#         print ("the size of the gradient:", w.grad.data.size())
#         print ("the size of the parameters:", w.data.size())
#         print ("the size of the parameters - learning rate:", (w.data -lr).size())
        w.data = (w.data - lr) * (w.grad.data)
#         print ("the size of the parameters after update:", w.data.size())
        w.grad.data = torch.zeros_like(w.grad)
#         print ("the size of the gradient after update:", w.grad.data.size())
        
    if epoch % 100 == 0:
        print(f"epoch {epoch} has loss {loss.item()}")

epoch 0 has loss 0.432595431804657
epoch 100 has loss 0.47262778878211975
epoch 200 has loss 0.4823967218399048
epoch 300 has loss 0.48690512776374817
epoch 400 has loss 0.48921656608581543
epoch 500 has loss 0.49043384194374084
epoch 600 has loss 0.4910796284675598
epoch 700 has loss 0.49142301082611084
epoch 800 has loss 0.49160557985305786
epoch 900 has loss 0.4917028248310089


In [28]:
test_data = torch.tensor([0,1],dtype=torch.float32)
model(test_data)

tensor([0.0083], grad_fn=<AddBackward0>)