In [None]:
# https://github.com/LukeDitria/pytorch_tutorials/blob/main/section02_pytorch_basics/notebooks/Tutorial1_Pytorch_Basics.ipynb

In [None]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt

In [None]:
class SimpleFunction(nn.Module):
    def __init__(self):
        super(SimpleFunction, self).__init__()

    def forward(self, x):
        return x * 2 + 4

In [None]:
simple_function = SimpleFunction()
# perform the forward pass
output = simple_function(10)
print(output)

In [None]:
class LinearModel(nn.Module):
    """
    Takes the input x and returns the output x * w^t + b
    """
    def __init__(self, input_size, output_size):
        super(LinearModel, self).__init__()
        self.w = nn.Parameter(torch.randn(output_size, input_size))
        self.b = nn.Parameter(torch.randn(1, output_size))

    def forward(self, x):
        return torch.matmul(x, self.w.t()) + self.b

In [None]:
input_data = torch.randn(10, 5)
print(input_data)

In [None]:
linear_model = LinearModel(5, 1)
output = linear_model(input_data)

print(output.shape)
print(output.detach())

**Pytorch inbuilt Neural Network Layers**

In [None]:
# Build a linear layer aka a "fully connected" layer aka a "Perceptron" layer
# nn.Linear(Number of inputs, Number of outputs) 
linear = nn.Linear(3, 1) 

# Lets have a look at the parameters of this layer
# The "weights" are what is multipied by the input data
print ('w:\n', linear.weight.data)
# The bias is then added on!
print ('b:\n', linear.bias.data)

print ('w shape:\n', linear.weight.data.shape)
print ('b shape:\n', linear.bias.data.shape)

In [None]:
# Lets have a look at the gradients of these parameters
print ('w:\n', linear.weight.grad)
print ('b:\n', linear.bias.grad)
# Note: Pytorch initialises the grad of the tensors to "None" NOT 0!
# They only get created after the first backwards pass.

In [None]:
# Create a random data input tensor
data = torch.randn(100, 3)
# Create some noisey target data
target = data.sum(1, keepdims=True) + 0.01*torch.randn(data.shape[0], 1)
print ('Input data:\n', data[:10])
print ('Output data:\n', target[:10])

In [None]:
# Remember! To perform a forward pass of our model, we just need to "call" our network
# Pytorch's nn.Module class will automatically pass it to the "forward" function in the layer class
target_pred = linear(data)
print("Network output:\n", target_pred.data[:10])
print("Network output shape:", target_pred.shape)

**Loss Functions and Optimizers**

In [None]:
# Lets perform a regression with a mean square error loss
loss_function = nn.MSELoss()

# Lets create a Stochastic gradient descent optimizer with a learning rate of 0.01
# (the way we will be using it, it is just normal GD) 
# When we create the optimizer we need to tell it WHAT it needs to optimize, so the first thing 
# We pass it are the linear layer's "parameters"
optimizer = torch.optim.SGD(linear.parameters(), lr=0.01) 

In [None]:
# Plotting the first dimension of the input vs the output

# Use the outputs of the model from a few cells ago
plt.scatter(data[:, 0], target_pred.detach())
# Use the Ground Truth data
plt.scatter(data[:, 0], target, marker="x")
plt.legend(["Predictions", "Ground Truth Data"])
plt.xlabel("Inputs")
plt.ylabel("Ouputs")

In [None]:
loss = loss_function(target_pred, target)
print('loss:', loss.item())

In [None]:
# Backward pass.
loss.backward()
# Print out the gradients.
print ('dL/dw: ', linear.weight.grad) 
print ('dL/db: ', linear.bias.grad)
# Note for every backwards pass of the model we must first perform a forward pass
# as data from parts of the computational graph have been deleted upon the backward pass to save memory.
# We can tell Pytorch to hold onto this data, but, in many cases it needs to be recalculated anyway

In [None]:
optimizer.step()

# Perform another forward pass of the model to check the new loss
target_pred = linear(data)
loss = loss_function(target_pred, target)
print('loss after 1 step optimization: ', loss.item())

**The Training Loop**
For MOST tasks (but not all) a simgle training iteration in Pytorch can be summarised in the following 5 steps:
- Forward pass of our model with the data.
- Calculate the loss.
- Reset the current stored gradients to 0
- Backpropagate the loss to calculate the new gradients.
- Perform an optimization step.

In [None]:
loss_logger = []

for i in range(1000):
    # Perform a forward pass of our data
    target_pred = linear(data)
    
    # Calculate the loss
    loss = loss_function(target_pred, target)
    
    # .zero_grad sets the stored gradients to 0
    # If we didn't do this they would be added to the 
    # Gradients from the previous step!
    optimizer.zero_grad()
    
    # Calculate the new gradients
    loss.backward()
    
    # Perform an optimization step!
    optimizer.step()

    loss_logger.append(loss.item())
    
print("loss:", loss.item())

In [None]:
plt.plot(loss_logger)

In [None]:
# Plotting the first dimension of the input vs the output
plt.scatter(data[:, 0], target_pred.detach())
plt.scatter(data[:, 0], target, marker="x")
plt.legend(["Predictions", "Ground Truth"])
plt.xlabel("Inputs")
plt.ylabel("Ouputs")