In [1]:
import pandas as pd
import numpy as np
import random as rnd

import torch
from torch import nn, optim

## Short introduction to PyTorch

You can find the documentation to PyTorch here: https://pytorch.org/docs/stable/index.html

For this exercise you will need the packages nn and optim of the torch package, but you will also need to transform your data into a Tensor. Here is a short introduction to everything you will need for this exercise.

In [2]:
# Transforming a numpy array into a Tensor.
arr = np.array([1, 2, 3])
print(arr)
arr_tensor = torch.Tensor(arr)
print(arr_tensor)

[1 2 3]
tensor([1., 2., 3.])


In [3]:
# You can also change the type of the tensor.
print(arr_tensor.long())
print(arr_tensor.float())

tensor([1, 2, 3])
tensor([1., 2., 3.])


In [4]:
# To add a dimension at a specific place.
print(arr_tensor.size())
print(arr_tensor.unsqueeze(0).size())
print(arr_tensor.unsqueeze(1).size())

torch.Size([3])
torch.Size([1, 3])
torch.Size([3, 1])


## nn Module

In [5]:
# The nn package includes many different layers and loss functions: https://pytorch.org/docs/stable/nn.html
# For our case we will use the nn.Embedding layer, it will learn a representation of our users and items in
# the same f-dimensional space, to allow us 'compare' these representations with each other,
# estimatimating similarity between users and items directly.

# For example, we have 3 differnt users.
n_ids = 3

# and want to learn a representation of size 2.
factors = 2

# We will create the following layer.
emb = nn.Embedding(n_ids, factors)

# which then can look up vectors (size 2) associated with a specific user_id from (0, 1, 2)
emb(torch.Tensor([0]).long()) # embedding for user with id 0

tensor([[ 1.4726, -0.0036]], grad_fn=<EmbeddingBackward0>)

In [6]:
# with nn.Module you can create a callable model.

class AddModule(nn.Module):
    def forward(self, a, b):
        return a + b
    
add_module = AddModule()
add_module(1, 2)

3

In [7]:
# and with nn layers, you can create a model, which has learnable parameters, which can be then saved and loaded.

class SomeModule(nn.Module):
    def __init__(self, n_factors=10, n_classes=1):
        super(SomeModule, self).__init__()
        
        self.some_layer = nn.Linear(n_factors, n_classes)
        self.act = nn.Sigmoid()
        
        # to initialize the weight of some layer with a constant value.
        nn.init.constant_(self.some_layer.weight, 0.5)
    
    def forward(self, x):
        return self.act(self.some_layer(x))
        

# initialize module.
some_module = SomeModule()

# lets look at the weight.
print("Weights of Some Layer:")
print(some_module.some_layer.weight)

# saving model.
torch.save(some_module.state_dict(), "some_module.pt")

# loading model.
some_module.load_state_dict(torch.load("some_module.pt"))

Weights of Some Layer:
Parameter containing:
tensor([[0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000,
         0.5000]], requires_grad=True)


<All keys matched successfully>

In [8]:
# If you want to train a model, you need to first set it in train mode.
some_module.train()

print(some_module(torch.Tensor([0] * 10).float()))

# If you want to evaluate a model, same thing.
some_module.eval()

# with torch.no_grad you can make sure, that the output does not have any gradients.
with torch.no_grad():
    print("Do something without gradients.")
    
    print(some_module(torch.Tensor([0] * 10).float()))

tensor([0.5170], grad_fn=<SigmoidBackward0>)
Do something without gradients.
tensor([0.5170])


## Optim Package

In [9]:
# There are two main optimizers that are commonly used.

# The Adam optimizer.
adam = optim.Adam(some_module.parameters(), lr=0.0001)

print(adam)

# The SGD Optimizer.
sgd = optim.SGD(some_module.parameters(), lr=0.0001)

print(sgd)

# We are not going to cover how each of them works, for this we encourage you to inform yourself on this topic
# or enroll in courses on the topic Deep Learning or Machine Learning.

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.0001
    weight_decay: 0
)
SGD (
Parameter Group 0
    dampening: 0
    lr: 0.0001
    momentum: 0
    nesterov: False
    weight_decay: 0
)


## Loss Functions

In [10]:
# For training a model, we need also to define a function which should be optimized.
# There are many different types of losses:

bce_loss = nn.BCELoss()
mse_loss = nn.MSELoss()
l1_loss = nn.L1Loss()

labels = torch.tensor(np.array([0, 1, 0, 1])).float()
output_of_model = torch.tensor(np.array([-1, 3, 5, 0])).float()

print("Correct labels: ", labels)
print("Output of Model: ", output_of_model)

print("-" * 100)

print(l1_loss)

loss_output = l1_loss(output_of_model, labels)

print("Loss: ", loss_output)

print("-" * 100)

print(mse_loss)

loss_output = mse_loss(output_of_model, labels)

print("Loss: ", loss_output)

print("-" * 100)

print(bce_loss)

act_output = nn.Sigmoid()(output_of_model)
print("Activated output: ", act_output)
loss_output = bce_loss(act_output, labels)

print("Loss: ", loss_output)

print("-" * 100)

Correct labels:  tensor([0., 1., 0., 1.])
Output of Model:  tensor([-1.,  3.,  5.,  0.])
----------------------------------------------------------------------------------------------------
L1Loss()
Loss:  tensor(2.2500)
----------------------------------------------------------------------------------------------------
MSELoss()
Loss:  tensor(7.7500)
----------------------------------------------------------------------------------------------------
BCELoss()
Activated output:  tensor([0.2689, 0.9526, 0.9933, 0.5000])
Loss:  tensor(1.5154)
----------------------------------------------------------------------------------------------------


## One Training Episode

In [13]:
# So to train one episode, you need to do the following things..
y = torch.Tensor([1]).float()

# Set modus to train.
some_module.train()

# set optimizer to zero grad.
sgd.zero_grad()

# get output.
x = torch.Tensor([0] * 10).float()
y_hat = some_module(x)

# caluclate loss.
loss = l1_loss(y_hat, y)

# perform backward on loss.
loss.backward()

# do step with optimizer.
sgd.step()

print("Loss in this step: ", loss.item())

Loss in this step:  0.4829416871070862
