# 3. Modules & Losses

In [1]:
import torch
import torch.nn as nn

## Creation and Usage

Instantiate a Linear module from the `torch.nn` library

In [2]:
linear_regression_model = nn.Linear(in_features=5, out_features=2)  # Linear <=> Fully Connected

In [3]:
linear_regression_model.weight  # Parameters of a module are randomly initialised

Parameter containing:
tensor([[-0.2165,  0.2280, -0.3565,  0.4171, -0.4020],
        [ 0.0169,  0.2558, -0.0035,  0.0329, -0.3696]], requires_grad=True)

In [4]:
type(linear_regression_model.weight)

torch.nn.parameter.Parameter

A `Parameter` is a Tensor which is automatically added to the list of parameters when used within a model.

Check/Get all module parameters

In [5]:
for name, tensor in linear_regression_model.named_parameters():
    print("{:6s}  -  {}".format(name, tensor.shape))

weight  -  torch.Size([2, 5])
bias    -  torch.Size([2])


In [None]:
list(linear_regression_model.parameters())

Calling a module on an input Tensor

In [6]:
# Pytorch Modules operate on batches. It allows to process multiple datapoints in parallel
# All modules in torch.nn are written to produce outputs for a batch of multiple inputs at the same time.

batch_size = 3
feature_size = 5
x = torch.randn(batch_size, feature_size) # batch of 3 samples with 5 features each

print(x, "\n\n", x.shape)

tensor([[ 0.2010,  2.4010, -0.1684, -0.0029,  0.9251],
        [-0.4166, -0.0140, -1.0766, -0.6129,  0.9381],
        [ 1.1492,  0.4060,  1.1247,  1.1949,  0.4755]]) 

 torch.Size([3, 5])


In [7]:
predicted_y = linear_regression_model(x)  # Note, you do not call explicitely forward
print(predicted_y, "\n\n", predicted_y.shape)

tensor([[ 0.4268,  0.5173],
        [ 0.0740, -0.1325],
        [-0.0142,  0.2242]], grad_fn=<AddmmBackward>) 

 torch.Size([3, 2])


---

## Building Custom modules

In [9]:
import torch.nn.functional as F

class MyNeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(MyNeuralNetwork, self).__init__()
        
        self.linear_1 = nn.Linear(input_size, hidden_size)
        self.linear_2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        out = F.relu(self.linear_1(x))
        out = self.linear_2(out)
        return out

In [10]:
model = MyNeuralNetwork(input_size=10, hidden_size=5,  num_classes=2)

In [11]:
print(model)

MyNeuralNetwork(
  (linear_1): Linear(in_features=10, out_features=5, bias=True)
  (linear_2): Linear(in_features=5, out_features=2, bias=True)
)


In [12]:
x = torch.rand(5, 10)   # the first dimension is reserved for the 'batch_size'
out = model(x)   # this calls model.forward(x)

print(out.shape)

torch.Size([5, 2])


---

## Composing modules with `torch.nn.Sequential`


In [13]:
neural_net = torch.nn.Sequential(
    torch.nn.Linear(10, 5),
    torch.nn.ReLU(),
    torch.nn.Linear(5, 2),
)

print(neural_net)

Sequential(
  (0): Linear(in_features=10, out_features=5, bias=True)
  (1): ReLU()
  (2): Linear(in_features=5, out_features=2, bias=True)
)


In [14]:
# Run the model:
neural_net(x)

tensor([[ 0.2000, -0.0065],
        [ 0.1680,  0.0724],
        [ 0.2319,  0.0500],
        [ 0.2671, -0.0612],
        [ 0.3069, -0.0311]], grad_fn=<AddmmBackward>)

---

## Moving your model to GPU

In [15]:
model.cuda()  # No need to assign it

AssertionError: Torch not compiled with CUDA enabled

In [17]:
device = torch.device("cuda:0")
model.to(device)  # No need to assign it

AssertionError: Torch not compiled with CUDA enabled

---

## Storing and loading models

### The easy way

In [None]:
torch.save(model, "my_model.pt")

In [None]:
my_model_loaded = torch.load("my_model.pt")

In [None]:
print(model.linear_2.bias)
print(my_model_loaded.linear_2.bias)

### The recommended way

In [18]:
torch.save(model.state_dict(), "my_model_state_dict.pt")

In [20]:
model.state_dict()

OrderedDict([('linear_1.weight',
              tensor([[ 0.2251, -0.2794,  0.0399,  0.0028, -0.1762,  0.0183,  0.1639,  0.0341,
                        0.0521,  0.1808],
                      [ 0.1393, -0.2947, -0.2521, -0.2370,  0.0228, -0.0326, -0.1141,  0.2751,
                       -0.1779,  0.1541],
                      [ 0.0779,  0.1091, -0.2656,  0.2230,  0.2212,  0.2420, -0.1818,  0.0710,
                        0.0664, -0.2766],
                      [ 0.1781, -0.1299,  0.1859,  0.2373, -0.3154, -0.2728,  0.2382, -0.3038,
                        0.2106,  0.2648],
                      [-0.1399,  0.2984, -0.0688,  0.1922,  0.2033,  0.1418,  0.1273,  0.2140,
                       -0.2535,  0.0885]])),
             ('linear_1.bias',
              tensor([ 0.0921, -0.0119, -0.1385, -0.2255, -0.2161])),
             ('linear_2.weight',
              tensor([[-0.2288, -0.1627,  0.1571,  0.3059,  0.4440],
                      [ 0.3702,  0.3429, -0.3814,  0.4163,  0.3689]])),
    

In [19]:
my_model_loaded = MyNeuralNetwork(10, 5, 2)
my_model_loaded.load_state_dict(torch.load("my_model_state_dict.pt"))

<All keys matched successfully>

In [21]:
print(model.linear_2.bias)
print(my_model_loaded.linear_2.bias)

Parameter containing:
tensor([0.2934, 0.2514], requires_grad=True)
Parameter containing:
tensor([0.2934, 0.2514], requires_grad=True)


In [26]:
for par in model.parameters():
    print(par.data)

tensor([[ 0.2251, -0.2794,  0.0399,  0.0028, -0.1762,  0.0183,  0.1639,  0.0341,
          0.0521,  0.1808],
        [ 0.1393, -0.2947, -0.2521, -0.2370,  0.0228, -0.0326, -0.1141,  0.2751,
         -0.1779,  0.1541],
        [ 0.0779,  0.1091, -0.2656,  0.2230,  0.2212,  0.2420, -0.1818,  0.0710,
          0.0664, -0.2766],
        [ 0.1781, -0.1299,  0.1859,  0.2373, -0.3154, -0.2728,  0.2382, -0.3038,
          0.2106,  0.2648],
        [-0.1399,  0.2984, -0.0688,  0.1922,  0.2033,  0.1418,  0.1273,  0.2140,
         -0.2535,  0.0885]])
tensor([ 0.0921, -0.0119, -0.1385, -0.2255, -0.2161])
tensor([[-0.2288, -0.1627,  0.1571,  0.3059,  0.4440],
        [ 0.3702,  0.3429, -0.3814,  0.4163,  0.3689]])
tensor([0.2934, 0.2514])


---
## Losses

PyTorch comes with a lot of predefined loss functions :
- `L1Loss`
- `MSELoss`
- `CrossEntropyLoss`
- `NLLLoss`
- `PoissonNLLLoss`
- `KLDivLoss`
- `BCELoss`
- `...`

In [27]:
loss_function = nn.L1Loss()

In [28]:
x = torch.Tensor([1,1,1])
y = torch.Tensor([1,2,3])

loss_function(x, y)

tensor(1.)

---
# Building our training loop (3 / 5)

In [29]:
# INITIALIZATION

import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, ToTensor, RandomCrop
from torchvision.datasets import ImageFolder

device = torch.device("cpu")

transform = Compose((RandomCrop((50, 50)), ToTensor()))
dataset = ImageFolder(root="../alien-vs-predator/", transform=transform)
loader = DataLoader(dataset, batch_size=5, shuffle=True)

model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(7500, 100),
    torch.nn.ReLU(),
    torch.nn.Linear(100, 2),
)
model.to(device)

loss_fn = nn.CrossEntropyLoss()

In [None]:
# TRAINING LOOP

for samples, labels in loader:
    samples = samples.to(device)
    labels = labels.to(device)
    predictions = model(samples)
    loss = loss_fn(predictions, labels)
    # compute gradients
    # update model parameters