## Import libraries

In [1]:
# Import pytorch
import torch
import numpy as np

## 1. Build the model
- Input (x1,x2): 2 nodes
- First hidden layer: 10 nodes, with weights (w) and bias (b), sigmoid activation function
- Second hidden layer: 10 nodes, with weights (w) and bias (b), sigmoid activation function
- Output (predict): 1 node

In [11]:
from torch import nn

# Building Neural Network using nn.Sequential
# Hyperparameters for our network
input_size = 2
hidden_sizes = [10,10]
output_size = 1

model = nn.Sequential(
    # Input with 2 nodes to first hidden layer with 10 nodes
    nn.Linear(input_size, hidden_sizes[0]), 
    # Pass through Sigmoid activation function
    nn.Sigmoid(),
    # First hidden layer with 10 nodes to second hidden layer with 10 nodes
    nn.Linear(hidden_sizes[0], hidden_sizes[1]),
    # Pass through Sigmoid activation function
    nn.Sigmoid(),
    # Second hidden layer with 10 nodes to output layer with 1 node
    nn.Linear(hidden_sizes[1], output_size),
)

print('Model:\n',model)

Model:
 Sequential(
  (0): Linear(in_features=2, out_features=10, bias=True)
  (1): Sigmoid()
  (2): Linear(in_features=10, out_features=10, bias=True)
  (3): Sigmoid()
  (4): Linear(in_features=10, out_features=1, bias=True)
)


## 2. Generate the random number x1, x2

In [12]:
torch.manual_seed(100)

# Batch size of 1
x = torch.rand(1,2)
print("input(x1,x2):\n ",x)

input(x1,x2):
  tensor([[0.1117, 0.8158]])


## 3. Generate the label y_true

In [13]:
# Generate y-labels
y_true = torch.empty(1,1)
y_true[0,0] = (x[0][0]**2 + x[0][1]**2)/2
print('The value of y_true is:\n', y_true)

The value of y_true is:
 tensor([[0.3390]])


## 4. Build a loss function L = (y_predict - y_true)^2

In [14]:
def loss_fn(y_true, y_pred):
    return torch.sum((y_pred - y_true) ** 2)

## 5. Forward / Backward propagation

In [15]:
for i in range(1):
    # Forward propogation
    y_pred = model(x)
    # Show loss at each epoch
    loss = loss_fn(y_pred, y_true)
    print(f"Epoch = {i + 1} | Loss=%f" % (loss.item())) 
    # Backward propagation and update the gradients
    loss.backward()

Epoch = 1 | Loss=0.905356


In [16]:
print(y_pred)
print(y_true)

tensor([[-0.6125]], grad_fn=<AddmmBackward>)
tensor([[0.3390]])


## 6a. Calculate the gradients of the loss wrt weights and bias

In [17]:
# Check the weights and biases of first hidden layer
print('Weights gradient of first hidden layer:\n',model[0].weight.grad.t())
print('Bias gradient of first hidden layer:\n',model[0].bias.grad)

Weights gradient of first hidden layer:
 tensor([[-0.0001,  0.0012, -0.0006,  0.0019,  0.0014,  0.0005,  0.0007,  0.0010,
         -0.0016,  0.0005],
        [-0.0011,  0.0090, -0.0043,  0.0139,  0.0103,  0.0035,  0.0050,  0.0069,
         -0.0113,  0.0039]])
Bias gradient of first hidden layer:
 tensor([-0.0013,  0.0110, -0.0052,  0.0170,  0.0126,  0.0043,  0.0062,  0.0085,
        -0.0139,  0.0047])


In [18]:
# Check the weights and biases of second hidden layer
print('Weights gradient of second hidden layer:\n',model[2].weight.grad.t())
print('Bias gradient of second hidden layer:\n',model[2].bias.grad)

Weights gradient of second hidden layer:
 tensor([[ 0.0584,  0.0033,  0.0085,  0.0147,  0.0071, -0.0085, -0.0718,  0.0740,
          0.0567,  0.0014],
        [ 0.0844,  0.0048,  0.0123,  0.0212,  0.0102, -0.0122, -0.1037,  0.1069,
          0.0819,  0.0020],
        [ 0.0398,  0.0022,  0.0058,  0.0100,  0.0048, -0.0058, -0.0489,  0.0504,
          0.0386,  0.0009],
        [ 0.0620,  0.0035,  0.0091,  0.0155,  0.0075, -0.0090, -0.0762,  0.0785,
          0.0601,  0.0015],
        [ 0.0735,  0.0042,  0.0108,  0.0184,  0.0089, -0.0106, -0.0903,  0.0931,
          0.0713,  0.0017],
        [ 0.0678,  0.0038,  0.0099,  0.0170,  0.0082, -0.0098, -0.0833,  0.0858,
          0.0658,  0.0016],
        [ 0.0603,  0.0034,  0.0088,  0.0151,  0.0073, -0.0087, -0.0740,  0.0763,
          0.0585,  0.0014],
        [ 0.0761,  0.0043,  0.0111,  0.0191,  0.0092, -0.0110, -0.0935,  0.0964,
          0.0738,  0.0018],
        [ 0.0511,  0.0029,  0.0075,  0.0128,  0.0062, -0.0074, -0.0628,  0.0647,
     

In [19]:
# Check the weights and biases of output layer
print('Weights gradient of output layer:\n',model[4].weight.grad.t())
print('Bias gradient of output layer:\n',model[4].bias.grad)

Weights gradient of output layer:
 tensor([[-0.8713],
        [-0.9655],
        [-1.0034],
        [-1.3373],
        [-1.1203],
        [-0.8077],
        [-0.7448],
        [-1.1001],
        [-1.2675],
        [-1.0747]])
Bias gradient of output layer:
 tensor([-1.9030])


## 6b. Write to torch_autograd.dat

## 7a. Implement forward propagation and backpropagation algorithm from scratch

### Build the model

In [20]:
# Build the model

class TwoLayerNet(torch.nn.Module):
    
    def __init__(self, D_in, H1,H2, D_out):
        
        super(TwoLayerNet, self).__init__()
        self.linear1 = torch.nn.Linear(D_in, H1)
        self.linear2 = torch.nn.Linear(H1, H2)
        self.linear3 = torch.nn.Linear(H2, D_out)

    def forward(self, x):
        sigmoid = torch.nn.Sigmoid()        
        h1_sigmoid = sigmoid(self.linear1(x))       
        h2_sigmoid = sigmoid(self.linear2(h1_sigmoid))  
        y_pred = self.linear3(h2_sigmoid)
        return y_pred

### Construct the model

In [21]:
# Construct our model by instantiating the class defined above
model = TwoLayerNet(2, 10, 10, 1)

# Save the gradients of the weights and biases
b3 = model.linear3.bias
w3 = model.linear3.weight.t()
b2 = model.linear2.bias
w2 = model.linear2.weight.t()
b1 = model.linear1.bias
w1 = model.linear1.weight.t()

In [22]:
def sigmoid(x):
        return 1.0 / (1 + np.exp(-x))
    
def sigmoid_derivationx(y):
    return y * (1 - y)

In [23]:
learning_rate = 0.1

for t in range(1):
    # Forward Propagation 
    h1 = x.mm(w1) + b1  
    h1_sigmoid = torch.sigmoid(h1)   
    h2 = h1_sigmoid.mm(w2) + b2   
    h2_sigmoid = torch.sigmoid(h2)  
    y_pred  = h2_sigmoid.mm(w3) + b3 
         
    # Compute and print loss
    loss = (y_pred - y_true).pow(2).sum().item()
    print(f"Epoch = {i + 1} | Loss=%f" % (loss))
    
    # Backward Propagation
    grad_y_pred = -2.0 *(y_true - y_pred)    
    delta = grad_y_pred#N*D_out                           
    grad_w3 = h2_sigmoid.T.mm(delta)                   
    grad_b3 = delta  
    
    delta = delta.mm(w3.T)*sigmoid_derivationx(h2_sigmoid)
    grad_w2 = h1_sigmoid.T.mm(delta)
    grad_b2 = delta  
    
    delta = delta.mm(w2.T)*sigmoid_derivationx(h1_sigmoid)
    grad_w1 = x.T.mm(delta)
    grad_b1 = delta 
     
    print("First hidden layer weights gradient: \n",grad_w1)
    print("First hidden layer bias gradient: \n", grad_b1)
    print("Second hidden layer weight gradient:\n", grad_w2)
    print("Second hidden layer bias gradient: \n",grad_b2)
    print("Output layer bias gradient: \n",grad_b3)   
    print("Output layer weight gradient: \n",grad_w3)  
    
    # Update weights
    w3 = w3 - learning_rate * grad_w3  
    w2 = w2 - learning_rate * grad_w2
    w1 = w1 - learning_rate * grad_w1
    
    b3 = b3 - learning_rate * grad_b3
    b2 = b2 - learning_rate * grad_b2
    b1 = b1 - learning_rate * grad_b1  
    
print(y_pred)

Epoch = 1 | Loss=0.905356
First hidden layer weights gradient: 
 tensor([[-0.0001,  0.0012, -0.0006,  0.0019,  0.0014,  0.0005,  0.0007,  0.0010,
         -0.0016,  0.0005],
        [-0.0011,  0.0090, -0.0043,  0.0139,  0.0103,  0.0035,  0.0050,  0.0069,
         -0.0113,  0.0039]], grad_fn=<MmBackward>)
First hidden layer bias gradient: 
 tensor([[-0.0013,  0.0110, -0.0052,  0.0170,  0.0126,  0.0043,  0.0062,  0.0085,
         -0.0139,  0.0047]], grad_fn=<MulBackward0>)
Second hidden layer weight gradient:
 tensor([[ 0.0584,  0.0033,  0.0085,  0.0147,  0.0071, -0.0085, -0.0718,  0.0740,
          0.0567,  0.0014],
        [ 0.0844,  0.0048,  0.0123,  0.0212,  0.0102, -0.0122, -0.1037,  0.1069,
          0.0819,  0.0020],
        [ 0.0398,  0.0022,  0.0058,  0.0100,  0.0048, -0.0058, -0.0489,  0.0504,
          0.0386,  0.0009],
        [ 0.0620,  0.0035,  0.0091,  0.0155,  0.0075, -0.0090, -0.0762,  0.0785,
          0.0601,  0.0015],
        [ 0.0735,  0.0042,  0.0108,  0.0184,  0.00


## 7b. Write to my_autograd.dat