# Thinking in tensors in PyTorch

Hands-on training  by [Piotr Migdał](https://p.migdal.pl) (2019). Version 0.4 for Uniwersytet Śląski.

**Work in progress**

## RNNs: LSTMs for on-hot encoded data

We use recurrent networks. For wonderful introductions:

* [Understanding LSTM Networks](http://colah.github.io/posts/2015-08-Understanding-LSTMs/) by Chris Olah
* [Exploring LSTMs](http://blog.echen.me/2017/05/30/exploring-lstms/) by Edwin Chen	

See also:

* [Simple diagrams of convoluted neural networks](https://medium.com/inbrowserai/simple-diagrams-of-convoluted-neural-networks-39c097d2925b) by Piotr Migdał
* [The Unreasonable Effectiveness of Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/) by Andrej Kaprathy
* [Repository to track the progress in Natural Language Processing](https://github.com/sebastianruder/NLP-progress) by Sebastian Ruder

And a few technical remarks:

* [Inconsistent dimension ordering for 1D networks - NCL vs NLC vs LNC](https://discuss.pytorch.org/t/inconsistent-dimension-ordering-for-1d-networks-ncl-vs-nlc-vs-lnc/14807)
* [Contiguous() and permute()](https://discuss.pytorch.org/t/contiguous-and-permute/20673)

In [None]:
!pip install livelossplot --quiet

In [1]:
%matplotlib inline
import h5py
import pandas as pd
import numpy as np

import torch
from torch import nn
from torch import optim
from torch.utils.data import TensorDataset, DataLoader

from livelossplot import PlotLosses

In [None]:
with h5py.File("./data/names_dense.h5", 'r') as data:
    X_train = data['X_train'].value
    y_train = data['y_train'].value
    X_test = data['X_test'].value
    y_test = data['y_test'].value
    categories = [x.decode('utf-8') for x in data['categories'].value]
    characters = [x.decode('utf-8') for x in data['characters'].value]

In [None]:
#X_train = X_train.transpose((0, 2, 1)).copy()
#X_test = X_test.transpose((0, 2, 1)).copy()

In [None]:
print(X_train.shape)  # should be: (24783, 26, 16)
print(y_train.shape)  # should be: (24783)
print(X_test.shape)   # should be: (8261, 26, 16)
print(y_test.shape)   # should be: (8261)

##  Loading 

In [None]:
trainloader = DataLoader(TensorDataset(torch.from_numpy(X_train), torch.LongTensor(y_train)),
                         batch_size=32, shuffle=True)
testloader = DataLoader(TensorDataset(torch.from_numpy(X_test), torch.LongTensor(y_test)),
                         batch_size=32, shuffle=False)

dataloaders = {
    "train": trainloader,
    "validation": testloader
}

In [None]:
# train on cuda if available
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

def train_model(model, criterion, optimizer, num_epochs=10):
    liveloss = PlotLosses()
    model = model.to(device)
    
    for epoch in range(num_epochs):
        logs = {}
        for phase in ['train', 'validation']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                _, preds = torch.max(outputs, 1)
                running_loss += loss.detach() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.float() / len(dataloaders[phase].dataset)
            
            prefix = ''
            if phase == 'validation':
                prefix = 'val_'

            logs[prefix + 'log loss'] = epoch_loss.item()
            logs[prefix + 'accuracy'] = epoch_acc.item()
        
        liveloss.update(logs)
        liveloss.draw()
    return model

## Example of networks in PyTorch

In [None]:
class Linear(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc = nn.Linear(16 * 26, 2)
    
    def forward(self, x):
        x = self.fc(x.view(x.size(0), -1))
        return x

In [None]:
class Convolutional(nn.Module):
    def __init__(self):
        super().__init__()
        self.convs = nn.Sequential(
            nn.Conv1d(26, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(1)
        )
        self.fc = nn.Linear(8 * 32, 2)
    
    def forward(self, x):
        x = self.convs(x)
        x = x.view(x.size(0), -1) # flatten
        x = self.fc(x)
        return x

In [None]:
class FullyConvolutional(nn.Module):
    def __init__(self):
        super().__init__()
        self.convs = nn.Sequential(
            nn.Conv1d(26, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.Conv1d(128, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(2)
        ) 
        
        self.fc = nn.Linear(128, 2)
    
    def forward(self, x):
        x = self.convs(x)
        x = x.max(dim=2)[0]  # global max pool
        x = self.fc(x)
        return x

In [None]:
class Recurrent(nn.Module):
    def __init__(self, rnn_size):
        super().__init__()
        self.lstm = nn.LSTM(input_size=26, hidden_size=rnn_size)
        self.fc = nn.Linear(rnn_size, 2)

    def forward(self, x):
        x = x.permute(2, 0, 1)
        output, (hidden, cell) = self.lstm(x)
        res = self.fc(cell).squeeze(0)
        return res

## Network creation and cost function

In [None]:
model = Linear()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [None]:
model

In [None]:
model(torch.from_numpy(X_train[:4])).size()

In [None]:
train_model(model, criterion, optimizer, num_epochs=10)