# Thinking in tensors, writing in PyTorch

Hands-on training  by [Piotr Migdał](https://p.migdal.pl) (2019). Version for ML in PL 2019.


## Extra: matrix factorization

See:

* [Matrix decomposition viz](http://p.migdal.pl/matrix-decomposition-viz/) for some inspiration.
* Section 4 from [From Customer Segmentation to Recommendation Systems](https://www.aitrends.com/machine-learning/ai-customer-targeting-levels/).

To do: turn it into an exercise.

In [None]:
%matplotlib inline
import pandas as pd
import seaborn as sns
import numpy as np

import torch
from torch import nn
from torch.nn import Parameter

In [None]:
months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
cities = ["Toronto", "Warsaw", "Boston", "London", "San Francisco", "Jerusalem", "Mexico", "Cape Town", "Sydney"]
avg_temp = np.array([
    [-5.8, -3.1, 4.5, 6.7, 14.3, 18.2, 20.1, 20.6, 15.9, 11.2, 3.6, -7.2],
    [-2.9, 3.6, 4.2, 9.7, 16.1, 19.5, 20.0, 18.8, 16.4, 7.6, 3.2, 1.3],
    [0.3, 1.5, 5.9, 8.4, 14.8, 20.2, 24.5, 24.7, 19.7, 13.0, 7.9, 1.9],
    [2.3, 6.5, 8.7, 9.2, 12.3, 15.4, 17.3, 20.0, 14.8, 10.8, 8.7, 6.4],
    [11.5, 13.9, 14.3, 15.7, 16.3, 17.4, 17.2, 17.7, 18.2, 17.4, 14.6, 10.4],
    [9.7, 10.3, 12.7, 15.5, 21.2, 22.1, 24.1, 25.3, 23.5, 20.1, 15.7, 11.8],
    [14.0, 15.6, 17.5, 20.3, 20.6, 18.1, 17.6, 18.2, 17.8, 16.8, 14.9, 16.0],
    [23.1, 23.3, 21.4, 19.0, 17.1, 15.5, 15.4, 15.6, 15.4, 18.6, 20.9, 21.3],
    [23.8, 24.6, 23.4, 20.8, 18.1, 15.1, 14.4, 14.5, 17.3, 19.0, 21.8, 24.3]
])

In [None]:
df = pd.DataFrame(avg_temp, index=cities, columns=months)
sns.heatmap(df, annot=True, fmt='.0f')

## Exercise

Using PyTorch, perform a matrix decomposition, i.e. $M = A B$.

Hints:

* NumPy to PyTorch: `torch.from_numpy(x)`
* PyTorch to NumPy: `x.numpy()` or `x.detach().numpy()`
* make sure or floats are `float32` (for Torch tensors use: `x = x.float()`)
* view results and the training curve

In [None]:
import matplotlib.pyplot as plt

In [None]:
avg_temp_tensor = torch.from_numpy(avg_temp).float()

In [None]:
def show_loss(losses, logy=False):
    print("Minimal loss: {:.3f}".format(losses[-1]))
    if logy:
        plt.semilogy(range(len(losses)), losses)
    else:
        plt.plot(range(len(losses)), losses);
    plt.xlabel("Step")
    plt.ylabel("Loss")

In [None]:
# %load hint_matrix_1.py
class Factorize(nn.Module):
    
    def __init__(self, factors=2):
        super().__init__()
        self.A = Parameter(torch.randn(9, factors))
        self.B = Parameter(torch.randn(factors, 12))
    
    def forward(self):
        output = self.A.matmul(self.B)
        return output
    

In [None]:
class FactorizeBiasA(nn.Module):
    
    def __init__(self, factors=2):
        super().__init__()
        self.A = Parameter(torch.randn(9, factors))
        self.B = Parameter(torch.randn(factors, 12))
        self.bias_A = Parameter(torch.randn(9, 1))
    
    def forward(self):
        output = self.A.matmul(self.B) + self.bias_A
        return output

In [None]:
# %load hint_matrix_2.py
model = Factorize(factors=2)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [None]:
# load hint 3... oh, no - actually, go to the previous notebooks :)

In [None]:
losses = []
for i in range(10000):
    
    output = model()
    loss = criterion(output, avg_temp_tensor)
    losses.append(loss.item())
    
    optimizer.zero_grad()
    loss.backward()        
    optimizer.step()
    
show_loss(losses, logy=True)

In [None]:
df_pred = pd.DataFrame(model().detach().numpy(), index=cities, columns=months)
sns.heatmap(df_pred, annot=True, fmt='.0f')

In [None]:
sns.heatmap(df_pred - df, annot=True, fmt='.0f')

In [None]:
torch.randint_like(avg_temp_tensor, 0, 2)

In [None]:
def train_cv(model, optimizer, epochs=10000):
    losses = []
    losses_val = []
    mask = torch.randint_like(avg_temp_tensor, 0, 2)
    for i in range(epochs):

        output = model()
        loss = (output - avg_temp_tensor).mul(mask).pow(2).sum() / mask.sum()
        losses.append(loss.item())
        
        loss_val = (output - avg_temp_tensor).mul(1 - mask).pow(2).sum() / (1 - mask).sum()
        losses_val.append(loss_val.item())

        optimizer.zero_grad()
        loss.backward()        
        optimizer.step()
    return losses, losses_val

In [None]:
model = Factorize(factors=2)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
losses, losses_val = train_cv(model, optimizer, epochs=10000)
print(losses[-1], losses_val[-1])

In [None]:
dims = [1, 2, 3, 4]
res = []

for d in dims:
    model = Factorize(factors=d)
    optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)
    losses, losses_val = train_cv(model, optimizer, epochs=10000)
    res.append({
        'd': d,
        'loss': losses[-1],
        'losses_val': losses_val[-1]
    })
    
pd.DataFrame(res).set_index('d').plot.bar(logy=True)

In [None]:
dims = [1, 2, 3, 4]
res = []

for d in dims:
    model = FactorizeBiasA(factors=d)
    optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)
    losses, losses_val = train_cv(model, optimizer, epochs=10000)
    res.append({
        'd': d,
        'loss': losses[-1],
        'losses_val': losses_val[-1]
    })
    
pd.DataFrame(res).set_index('d').plot.bar(logy=True)