In [None]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

### PyTorch Basics

In [None]:
# tensor
a = torch.tensor([5.])
print(a)

b = np.random.randint(1, 10, size=(2, 5))
print("Numpy:\n", b)

c = torch.tensor(b)
print("Tensor:\n", c)

print("Numpy:\n", c.numpy())

In [None]:
# reshape - view
temp = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=torch.float)
print(temp)
print(temp.view(2, 5))
print(temp.view(-1, 1))

In [None]:
# squeeze - unsqueeze
h = torch.randn(5, 3)
print(h.shape, "\n", h, "\n")
h = h.unsqueeze_(1)
print(h.shape, "\n", h, "\n")
h = h.squeeze_(1)
print(h.shape, "\n", h, "\n")

In [None]:
# Device Check
if torch.cuda.is_available():
    device="cuda:0"
else:
    device="cpu"

In [None]:
temp.to(device)

In [None]:
torch.tensor([2, 4, 6], dtype=torch.float, device=device)

### Training a basic model

In [None]:
x = torch.rand(50)
y = 5. * x + 3.

In [None]:
plt.plot(x, y)

Build a linear model: $y = wx + b$

Our aim is to find $w$ and $b$ (parameters)

requires_grad=True -> calculate derivatives with respect to $w$ and $b$

In [None]:
# Parameters
w = torch.randn(1, requires_grad=True)
b = torch.randn(1, requires_grad=True)

In [None]:
# Optimizer - SGD/Adam
'''
lr: learning rate
params: parameters to be updated
weight_decay: L2 regularization
'''
optimizer = torch.optim.SGD(params=[w, b],
                            lr=1e-2,
                            weight_decay=0)

In [None]:
# Loss function
criterion = nn.MSELoss()

In [None]:
# Training Loop
num_epoch = 30
size = x.shape[0]
for epoch in range(num_epoch):
    R = np.random.permutation(size)
    epoch_loss = 0.0
    for i in range(size):
        optimizer.zero_grad() # reset derivatives
        
        x_t = x[R[i]]
        y_t = y[R[i]].view(-1)
        
        y_prediction = w * x_t + b
        loss = criterion(y_prediction, y_t)
     
        loss.backward() # calculate gradients (w.grad, b.grad)
        optimizer.step() # update parameters with respect to gradients and learning rate
        
        epoch_loss += loss.item()
    print("Epoch: ", epoch+1, "Loss: ", epoch_loss/size)

In [None]:
print("w: ", w.detach(), "b: ", b.detach())

In [None]:
print("δE/δw: ", w.grad)
optimizer.zero_grad()
print("δE/δw: ", w.grad)

In [None]:
# prediction
w.detach().item() * 0.54 + b.detach().item()

1. Create model (parameters)
2. Initialize optimizer,define parameters that will be updated
3. Determine loss function (criterion)
4. Training Loop
    a. reset gradients: optimizer.zero_grad()
    b. make prediction: y_prediction = w * x_t + b
    c. calculate loss: loss = criterion(y_prediction, y_t)
    d. calculate gradients: loss.backward()
    e. update parameters: optimizer.step()

### Training a Neural Network

In [None]:
x = 2 * np.pi * torch.rand(50)
y = torch.sin(x)

In [None]:
plt.scatter(x, y)

In [None]:
model = nn.Sequential(nn.Linear(1,7), nn.ReLU(), nn.Linear(7,1))

In [None]:
print(model)

In [None]:
print("First Layer:\n", model[0])

In [None]:
print("Weights in the first layer: ", model[0].weight)

In [None]:
optimizer = torch.optim.Adam(lr=0.01, 
                             params=model.parameters())
criterion = nn.MSELoss()

In [None]:
num_epoch = 30
size = x.shape[0]
batch_size = 1
for epoch in range(num_epoch):
    R = np.random.permutation(size)
    epoch_loss = 0.0
    for i in range(size//batch_size):
        optimizer.zero_grad()
        
        x_t = x[R[i*batch_size:(i+1)*batch_size]].view(-1, 1)
        y_t = y[R[i*batch_size:(i+1)*batch_size]].view(-1, 1)
        
        y_prediction = model(x_t)
        loss = criterion(y_prediction,y_t)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
    print("Epoch: ", epoch+1, "Loss: ", epoch_loss/size)

In [None]:
prediction = model (x.view(-1, 1)).detach()

In [None]:
plt.scatter(x, y)
plt.scatter(x, prediction)
plt.legend(["Truth","Approximated"])

### torch.nn.Module

In [None]:
class MLP_(torch.nn.Module):
    def __init__(self, layer_dims: list, dropout_rate: float):
        super(MLP_, self).__init__()

        self.num_layers = len(layer_dims) - 1
        layers = []
        for i in range(self.num_layers):
            layers.append(torch.nn.Dropout(p=dropout_rate))
            layers.append(torch.nn.Linear(layer_dims[i], layer_dims[i + 1]))
            if i != self.num_layers-1:
                layers.append(torch.nn.ReLU())
        self.layers = torch.nn.ModuleList(layers)

    def forward(self, x):
        for i in range(len(self.layers)):
            x = self.layers[i](x)
        return x

In [None]:
model = MLP_(layer_dims=[10, 10, 20, 2], dropout_rate=0.5)

In [None]:
print(model)

In [None]:
x = torch.rand(20, 10)

In [None]:
prediction = model(x)