# Demo

Minimal working examples with [Catalyst](https://github.com/catalyst-team/catalyst).
- CV - mnist classification
- NLP - sentiment analysis
- RecSys - movie recommendations
- GAN - mnist again :)

Comparison with pure [PyTorch](https://github.com/catalyst-team/catalyst) code included.

In [None]:
! pip install -U torch==1.4.0 torchvision==0.5.0 torchtext==0.5.0 catalyst==20.02.4 pandas==1.0.1 tqdm==4.43

In [None]:
# for tensorboard integration
# !pip install tensorflow
# %load_ext tensorboard
# %tensorboard --logdir ./logs

---

# CV

In [None]:
import torch
from torch import nn, optim
import torch.nn.functional as F 

from torchvision import datasets, transforms

### Data

In [None]:
kwargs = {'num_workers': 1, 'pin_memory': True, 'batch_size': 100}

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST(
        './data/cv', train=True, download=True,
        transform=transforms.Compose([
           transforms.ToTensor(),
           transforms.Normalize((0.1307,), (0.3081,))
        ])),
    shuffle=True, **kwargs)
valid_loader = torch.utils.data.DataLoader(
    datasets.MNIST(
        './data/cv', train=False, 
        transform=transforms.Compose([
           transforms.ToTensor(),
           transforms.Normalize((0.1307,), (0.3081,))
       ])),
    shuffle=False, **kwargs)

### Model

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

### Experiment setup

In [None]:
num_epochs = 3
model = Net()
optimizer = optim.Adadelta(model.parameters(), lr=1.0)
scheduler = optim.lr_scheduler.StepLR(
    optimizer, step_size=1, gamma=0.7)

---

### PyTorch

In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

model = model.to(device)

In [None]:
def run_epoch(model, device, loader, optimizer, epoch, is_train=True):
    if is_train:
        model.train()
    else:
        model.eval()
    loss_, accuracy_ = 0, 0
    prefix = 'Train epoch' if is_train else 'Valid epoch'
    
    with torch.set_grad_enabled(is_train):
        for batch_idx, (data, target) in enumerate(loader):
            # dataflow
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = F.nll_loss(output, target)

            # metrics
            loss_ += F.nll_loss(output, target, reduction='sum').item()
            pred = output.argmax(dim=1, keepdim=True)
            accuracy_ += pred.eq(target.view_as(pred)).sum().item()

            # optimization
            if is_train:
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()

            # logging
            if batch_idx % 50 == 0:
                print(
                    '{} {}: [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    prefix, epoch, batch_idx * len(data), len(loader.dataset),
                    100. * batch_idx / len(loader), loss.item()))

        loss_ /= len(loader.dataset)
        print('\{} {}: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        prefix, epoch, loss_, accuracy_, len(loader.dataset),
        100. * accuracy_ / len(loader.dataset)))


In [None]:
for epoch in range(1, num_epochs + 1):
    run_epoch(model, device, train_loader, optimizer, epoch, is_train=True)
    run_epoch(model, device, valid_loader, None, epoch, is_train=False)
    scheduler.step()

---

### Catalyst

In [None]:
from catalyst.dl import SupervisedRunner, \
    AccuracyCallback

runner = SupervisedRunner()
runner.train(
    model=model, 
    criterion=nn.NLLLoss(), # a bit different loss compute
    optimizer=optimizer, 
    scheduler=scheduler,
    loaders={'train': train_loader, 'valid': valid_loader},
    logdir="./logs/cv",
    num_epochs=num_epochs,
    verbose=True,
    callbacks=[AccuracyCallback()]
)

---

# NLP

In [None]:
import torch
from torch import nn, optim
import torch.nn.functional as F 

import torchtext
from torchtext.datasets import text_classification

### Data

In [None]:
NGRAMS = 2
import os
if not os.path.isdir('./data'):
    os.mkdir('./data')
if not os.path.isdir('./data/nlp'):
    os.mkdir('./data/nlp')
train_dataset, valid_dataset = text_classification.DATASETS['AG_NEWS'](
    root='./data/nlp', ngrams=NGRAMS, vocab=None)

In [None]:
VOCAB_SIZE = len(train_dataset.get_vocab())
EMBED_DIM = 32
NUM_CLASS = len(train_dataset.get_labels())
BATCH_SIZE = 32

In [None]:
def generate_batch(batch):
    label = torch.tensor([entry[0] for entry in batch])
    text = [entry[1] for entry in batch]
    offsets = [0] + [len(entry) for entry in text]
    # torch.Tensor.cumsum returns the cumulative sum
    # of elements in the dimension dim.
    # torch.Tensor([1.0, 2.0, 3.0]).cumsum(dim=0)

    offsets = torch.tensor(offsets[:-1]).cumsum(dim=0)
    text = torch.cat(text)
    output = {
        "text": text,
        "offsets": offsets,
        "label": label
    }
    return output

train_loader = torch.utils.data.DataLoader(
    train_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=True,
    collate_fn=generate_batch,
)

valid_loader = torch.utils.data.DataLoader(
    valid_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=False,
    collate_fn=generate_batch,
)

### Model

In [None]:
class TextSentiment(nn.Module):
    def __init__(self, vocab_size, embed_dim, num_class):
        super().__init__()
        self.embedding = nn.EmbeddingBag(vocab_size, embed_dim, sparse=True)
        self.fc = nn.Linear(embed_dim, num_class)
        self.init_weights()

    def init_weights(self):
        initrange = 0.5
        self.embedding.weight.data.uniform_(-initrange, initrange)
        self.fc.weight.data.uniform_(-initrange, initrange)
        self.fc.bias.data.zero_()

    def forward(self, text, offsets):
        embedded = self.embedding(text, offsets)
        return self.fc(embedded)

### Experiment setup

In [None]:
num_epochs = 3

model = TextSentiment(VOCAB_SIZE, EMBED_DIM, NUM_CLASS)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=4.0)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.9)

### PyTorch

In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

model = model.to(device)
criterion = criterion.to(device)

In [None]:
def run_epoch(loader, model, device, criterion, optimizer, scheduler, epoch, is_train=True):
    if is_train:
        model.train()
    else:
        model.eval()
    loss_, accuracy_ = 0, 0
    prefix = 'Train epoch' if is_train else 'Valid epoch'
    
    for batch_idx, batch in enumerate(loader):
        text, offsets, cls = batch["text"], batch["offsets"], batch["label"]
        # dataflow
        text, offsets, cls = text.to(device), offsets.to(device), cls.to(device)
        output = model(text, offsets)
        
        # metrics
        loss = criterion(output, cls)
        loss_ += loss.item()
        accuracy_ += (output.argmax(1) == cls).sum().item()
        
        # optimization
        if is_train:
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        
        # logging
        if batch_idx % 200 == 0:
            print(
                '{} {}: [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                prefix, epoch, batch_idx * len(text), len(loader.dataset),
                100. * batch_idx / len(loader), loss.item()))

    if is_train:
        # Adjust the learning rate
        scheduler.step()
    
    loss_ /= len(loader)
    accuracy_ /= len(loader)

    print('\{} {}: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        prefix, epoch, loss_, accuracy_, len(loader.dataset),
        100. * accuracy_ / len(loader.dataset)))
    

In [None]:
for epoch in range(1, num_epochs + 1):
    run_epoch(train_loader, model, device, criterion, optimizer, scheduler, epoch, is_train=True)
    run_epoch(valid_loader, model, device, criterion, None, None, epoch, is_train=False)

### Catalyst

In [None]:
from catalyst.dl import SupervisedRunner, \
    CriterionCallback, AccuracyCallback

# input_keys - which key from dataloader we need to pass to the model
runner = SupervisedRunner(input_key=["text", "offsets"])

runner.train(
    model=model, 
    criterion=criterion,
    optimizer=optimizer, 
    scheduler=scheduler,
    loaders={'train': train_loader, 'valid': valid_loader},
    logdir="./logs/nlp",
    num_epochs=num_epochs,
    verbose=True,
    # input_key - which key from dataloader we need to pass to criterion as target label
    callbacks=[
        CriterionCallback(input_key="label"),
        AccuracyCallback(input_key="label")
    ]
)

---

# RecSys

In [None]:
import time
import os
import requests
import tqdm

import numpy as np
import pandas as pd
import scipy.sparse as sp

import torch
import torch.nn as nn
import torch.nn.functional as F 
import torch.utils.data as td
import torch.optim as to

import matplotlib.pyplot as pl
import seaborn as sns

In [None]:
# Configuration

# The directory to store the data
data_dir = "data/recsys"

train_rating = "ml-1m.train.rating"
test_negative = "ml-1m.test.negative"

# NCF config
train_negative_samples = 4
test_negative_samples = 99
embedding_dim = 64
hidden_dim = 32

# Training config
batch_size = 256
epochs = 10  # Original implementation uses 20
top_k=10

### Data


Use Movielens 1M data from the NCF paper authors' implementation https://github.com/hexiangnan/neural_collaborative_filtering

In [None]:
if not os.path.isdir('./data'):
    os.mkdir('./data')
if not os.path.isdir('./data/recsys'):
    os.mkdir('./data/recsys')
    
for file_name in [train_rating, test_negative]:
    file_path = os.path.join(data_dir, file_name)
    if os.path.exists(file_path):
        print("Skip loading " + file_name)
        continue
    with open(file_path, "wb") as tf:
        print("Load " + file_name)
        r = requests.get("https://raw.githubusercontent.com/hexiangnan/neural_collaborative_filtering/master/Data/" + file_name, allow_redirects=True)
        tf.write(r.content)

In [None]:
def preprocess_train():
    train_data = pd.read_csv(os.path.join(data_dir, train_rating), sep='\t', header=None, names=['user', 'item'], usecols=[0, 1], dtype={0: np.int32, 1: np.int32})

    user_num = train_data['user'].max() + 1
    item_num = train_data['item'].max() + 1

    train_data = train_data.values.tolist()

    # Convert ratings as a dok matrix
    train_mat = sp.dok_matrix((user_num, item_num), dtype=np.float32)
    for user, item in train_data:
        train_mat[user, item] = 1.0
        
    return train_data, train_mat, user_num, item_num


train_data, train_mat, user_num, item_num = preprocess_train()

In [None]:
def preprocess_test():
    test_data = []
    with open(os.path.join(data_dir, test_negative)) as tnf:
        for line in tnf:
            parts = line.split('\t')
            assert len(parts) == test_negative_samples + 1
            
            user, positive = eval(parts[0])
            test_data.append([user, positive])
            
            for negative in parts[1:]:
                test_data.append([user, int(negative)])

    return test_data


valid_data = preprocess_test()

In [None]:
class NCFDataset(td.Dataset):
    
    def __init__(self, positive_data, item_num, positive_mat, negative_samples=0):
        super(NCFDataset, self).__init__()
        self.positive_data = positive_data
        self.item_num = item_num
        self.positive_mat = positive_mat
        self.negative_samples = negative_samples
        
        self.reset()
        
    def reset(self):
        print("Resetting dataset")
        if self.negative_samples > 0:
            negative_data = self.sample_negatives()
            data = self.positive_data + negative_data
            labels = [1] * len(self.positive_data) + [0] * len(negative_data)
        else:
            data = self.positive_data
            labels = [0] * len(self.positive_data)
            
        self.data = np.concatenate([
            np.array(data), 
            np.array(labels)[:, np.newaxis]], 
            axis=1
        )
        

    def sample_negatives(self):
        negative_data = []
        for user, positive in self.positive_data:
            for _ in range(self.negative_samples):
                negative = np.random.randint(self.item_num)
                while (user, negative) in self.positive_mat:
                    negative = np.random.randint(self.item_num)
                    
                negative_data.append([user, negative])

        return negative_data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        user, item, label = self.data[idx]
        output = {
            "user": user,
            "item": item,
            "label": np.float32(label),
        }
        return output

    
class SamplerWithReset(td.RandomSampler):
    def __iter__(self):
        self.data_source.reset()
        return super().__iter__()

In [None]:
train_dataset = NCFDataset(
    train_data, 
    item_num, 
    train_mat, 
    train_negative_samples
)
train_loader = td.DataLoader(
    train_dataset, 
    batch_size=batch_size, 
    shuffle=False, 
    num_workers=4,
    sampler=SamplerWithReset(train_dataset)
)

valid_dataset = NCFDataset(valid_data, item_num, train_mat)
valid_loader = td.DataLoader(
    valid_dataset, 
    batch_size=test_negative_samples+1, 
    shuffle=False, 
    num_workers=0
)

### Model

In [None]:
class Ncf(nn.Module):
    
    def __init__(self, user_num, item_num, embedding_dim, hidden_dim):
        super(Ncf, self).__init__()
        
        self.user_embeddings = nn.Embedding(user_num, embedding_dim)
        self.item_embeddings = nn.Embedding(item_num, embedding_dim)

        self.layers = nn.Sequential(
            nn.Linear(2 * embedding_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)
        )

        self.initialize()

    def initialize(self):
        nn.init.normal_(self.user_embeddings.weight, std=0.01)
        nn.init.normal_(self.item_embeddings.weight, std=0.01)

        for layer in self.layers:
            if isinstance(layer, nn.Linear):
                nn.init.xavier_uniform_(layer.weight)
                layer.bias.data.zero_()
            
    def forward(self, user, item):
        user_embedding = self.user_embeddings(user)
        item_embedding = self.item_embeddings(item)
        concat = torch.cat((user_embedding, item_embedding), -1)
        return self.layers(concat).view(-1)
    
    def name(self):
        return "Ncf"

### Experiment setup

In [None]:
def hit_metric(recommended, actual):
    return int(actual in recommended)


def dcg_metric(recommended, actual):
    if actual in recommended:
        index = recommended.index(actual)
        return np.reciprocal(np.log2(index + 2))
    return 0

In [None]:
model = Ncf(user_num, item_num, embedding_dim, hidden_dim)
criterion = nn.BCEWithLogitsLoss()
optimizer = to.Adam(model.parameters())

### PyTorch

In [None]:
def metrics(loader, model, device, top_k):
    hits, dcgs = [], []

    for batch in loader:
        user, item, label = batch["user"], batch["item"], batch["label"]
        item = item.to(device)
        
        predictions = model(user.to(device), item)
        _, indices = torch.topk(predictions, top_k)
        recommended = torch.take(item, indices).cpu().numpy().tolist()

        item = item[0].item()
        hits.append(hit_metric(recommended, item))
        dcgs.append(dcg_metric(recommended, item))

    return np.mean(hits), np.mean(dcgs)

In [None]:
def train(model, criterion, optimizer, num_epochs=3):
    history = []
    for epoch in range(num_epochs):
        model.train()

        start_time = time.time()
        for batch in tqdm.tqdm(train_loader):
            user, item, label = batch["user"], batch["item"], batch["label"]
            model.zero_grad()
            prediction = model(user.to(device), item.to(device))
            loss = criterion(prediction, label.to(device))
            loss.backward()
            optimizer.step()

        model.eval()
        hr, dcg = metrics(valid_loader, model, device, top_k)
        elapsed = time.time() - start_time
        history.append({"model": model.name(), "epoch": epoch, "hit_rate": hr, "dcg": dcg, "elapsed": elapsed})

        print("[{model}] epoch: {epoch}, hit rate: {hit_rate}, dcg: {dcg}".format(**history[-1]))
    
    return history

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = model.to(device)
criterion = criterion.to(device)
ncf_history = train(model, criterion, optimizer)

### Catalyst

In [None]:
from catalyst.dl import Callback, CallbackOrder, State

class NdcgLoaderMetricCallback(Callback):
    def __init__(self):
        super().__init__(CallbackOrder.Metric)

    def on_batch_end(self, state: State):
        item = state.input["item"]
        predictions = state.output["logits"]

        _, indices = torch.topk(predictions, top_k)
        recommended = torch.take(item, indices).cpu().numpy().tolist()

        item = item[0].item()

        state.metric_manager.add_batch_value(
            name="hits", value=hit_metric(recommended, item))
        state.metric_manager.add_batch_value(
            name="dcgs", value=dcg_metric(recommended, item))

In [None]:
from catalyst.dl import SupervisedRunner, CriterionCallback

# input_keys - which key from dataloader we need to pass to the model
runner = SupervisedRunner(input_key=["user", "item"])

runner.train(
    model=model, 
    criterion=criterion,
    optimizer=optimizer, 
    loaders={'train': train_loader, 'valid': valid_loader},
    logdir="./logs/recsys",
    num_epochs=3,
    verbose=True,
    # input_key - which key from dataloader we need to pass to criterion as target label
    callbacks=[
        CriterionCallback(input_key="label"),
        NdcgLoaderMetricCallback()
    ]
)

# GAN

In [None]:
import os
from argparse import ArgumentParser
from collections import OrderedDict

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST

In [None]:
batch_size = 32

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5]),
])
dataset = MNIST(
    os.getcwd(), train=True, download=True, transform=transform)
loader = DataLoader(dataset, batch_size=batch_size)

In [None]:
class Generator(nn.Module):
    def __init__(self, latent_dim, img_shape):
        super(Generator, self).__init__()
        self.img_shape = img_shape

        def block(in_feat, out_feat, normalize=True):
            layers = [nn.Linear(in_feat, out_feat)]
            if normalize:
                layers.append(nn.BatchNorm1d(out_feat, 0.8))
            layers.append(nn.LeakyReLU(0.2, inplace=True))
            return layers

        self.model = nn.Sequential(
            *block(latent_dim, 128, normalize=False),
            *block(128, 256),
            *block(256, 512),
            *block(512, 1024),
            nn.Linear(1024, int(np.prod(img_shape))),
            nn.Tanh()
        )

    def forward(self, z):
        img = self.model(z)
        img = img.view(img.size(0), *self.img_shape)
        return img


class Discriminator(nn.Module):
    def __init__(self, img_shape):
        super(Discriminator, self).__init__()

        self.model = nn.Sequential(
            nn.Linear(int(np.prod(img_shape)), 512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(256, 1),
            nn.Sigmoid(),
        )

    def forward(self, img):
        img_flat = img.view(img.size(0), -1)
        validity = self.model(img_flat)

        return validity

In [None]:
mnist_shape = (1, 28, 28)
latent_dim = 128

generator = Generator(latent_dim=latent_dim, img_shape=mnist_shape)
discriminator = Discriminator(img_shape=mnist_shape)

model = {
    "generator": generator,
    "discriminator": discriminator,
}
model = nn.ModuleDict(model)

In [None]:
lr = 0.0001
b1 = 0.5
b2 = 0.999

generator_optimizer = torch.optim.Adam(
    generator.parameters(), lr=lr, betas=(b1, b2))
discriminator_optimizer = torch.optim.Adam(
    discriminator.parameters(), lr=lr, betas=(b1, b2))

optimizer = {
    "generator": generator_optimizer,
    "discriminator": discriminator_optimizer,
}

### PyTorch

In [None]:
# I was too lazy

### Catalyst

In [None]:
from catalyst.dl import OptimizerCallback, SupervisedRunner


class GanRunner(SupervisedRunner):
    
    def forward(self, batch, **kwargs):
        # @TODO add images generation from noise
        pass
    
    def _run_batch_train_step(self, batch):
        state = self.state
        state.loss = {}
        
        images = batch[self.input_key]
        bs = images.shape[0]
        z = torch.randn(bs, latent_dim).to(self.device)
        generated_images = self.model["generator"](z)
        
        # generator step
        ## predictions & labels
        generated_labels = torch.ones(bs, 1).to(self.device)
        generated_pred = self.model["discriminator"](generated_images)

        ## loss
        loss_generator = F.binary_cross_entropy(generated_pred, generated_labels)
        state.loss["loss_generator"] = loss_generator
        state.metric_manager.add_batch_value("loss_generator", loss_generator)
        
        
        # discriminator step
        ## real
        images_labels = torch.ones(bs, 1).to(self.device)
        images_pred = self.model["discriminator"](images)
        real_loss = F.binary_cross_entropy(images_pred, images_labels)

        ## fake
        generated_labels_ = torch.zeros(bs, 1).to(self.device)
        generated_pred_ = self.model["discriminator"](generated_images.detach())
        fake_loss = F.binary_cross_entropy(generated_pred_, generated_labels_)

        ## loss
        loss_discriminator = (real_loss + fake_loss) / 2.0
        state.loss["loss_discriminator"] = loss_discriminator
        state.metric_manager.add_batch_value("loss_discriminator", loss_discriminator)
        

In [None]:
runner = GanRunner()

runner.train(
    model=model, 
    optimizer=optimizer,
    criterion=None,
    loaders={"train": loader},
    callbacks=[
        OptimizerCallback(
            optimizer_key="generator", 
            loss_key="loss_generator"
        ),
        OptimizerCallback(
            optimizer_key="discriminator", 
            loss_key="loss_discriminator"
        ),
    ],
    main_metric="loss_generator",
    num_epochs=5,
    logdir="./logs/gan",
    verbose=True,
)

---
# Resume

[__PyTorch__](https://github.com/catalyst-team/catalyst) is great, but too low-level framework,
- you need to write lots of code
- lack of model saving/selection, visualization tools integration or any Deep Learning best practices like gradient accumulation, fp16 support etc
- no full reproducibility

[__Catalyst__](https://github.com/catalyst-team/catalyst) - modular framework on top of PyTorch, 
- easily extendable for your needs
- allows you to write less boilerplate
- supports many monitoring tools like TensorBoard or Alchemy
- integrates lofs of Deep Learning best practices, distributed training, jit.tracing support and many more
- framework-wise determinism