In [2]:
import pytorch_lightning as pl
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [777]:
import typing as tp

class SimpleTextDataset(Dataset):
    """Reads txt file"""
    
    def __init__(self, txt_files, take_size, tokeniser, encoder):
        self.sentences = []
        self.classes = []
        for i, txt_file in enumerate(txt_files):
            self.classes.append(txt_file.split('.')[0])
            with open(txt_file) as file:
                self.text = file.read()
            sentences = [sentence + '.' for sentence in self.text.split('.')]
            sentences = map(tokeniser, sentences)
            sentences = filter(lambda x: len(x) > 10, sentences)
            sentences = encoder(sentences)
            self.sentences.extend([(sentence, i) for sentence in sentences][:take_size])
        
    def __len__(self):
        return len(self.sentences)
    
    def __getitem__(self, idx: tp.Union[int, list]):
        if isinstance(idx, int):
            return self.sentences[idx]
        items = [self.sentences[i] for i in idx]
        return items

In [778]:
def letter_tokeniser(text):
    import string
    text = text.lower()
    text = "".join(filter(lambda x: x.isalpha() or x.isnumeric() or x.isspace(), text))
    return text

def letter_encoder(sentences):
    if isinstance(sentences, int):
        return list(map(ord, sentences))
    encoded_sentences = [list(map(ord, sentence)) for sentence in sentences]
    return encoded_sentences

def collate_fn(items):
    def _pad_sentence(sentence, size):
        if len(sentence) < size:
            return sentence + [0] * (size - len(sentence))
        else:
            return sentence[:size]
    
    max_length = min(max(len(item) for item, cls in items), 32)
    batch = torch.Tensor([_pad_sentence(item, max_length) for item, cls in items])
    y = torch.LongTensor([cls for item, cls in items])
    return batch.unsqueeze(-1), y

In [779]:
train_dataset = SimpleTextDataset(txt_files=["data/bert.txt", "data/Dostoevsky.txt"], take_size=300,
                                  tokeniser=letter_tokeniser, encoder=letter_encoder)

In [780]:
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=256, collate_fn=collate_fn)

In [953]:
class RNN(nn.Module):
    def __init__(self, input_dimension, output_dimension):
        super().__init__()
        
        self.input_dimension = input_dimension
        self.output_dimension = output_dimension
        self.w_h = nn.Linear(input_dimension, input_dimension)
        self.w_y = nn.Linear(input_dimension, output_dimension)
        self.u = nn.Linear(input_dimension, input_dimension)
        self.activate = nn.ReLU()
        
    def forward(self, x):
        hidden = torch.zeros(x.shape[0], self.input_dimension).to(x.device)
        y = torch.zeros(x.shape[0], self.output_dimension).to(x.device)
        for x_i in x.transpose(0, 1):
            hidden = self.activate(self.w_h(x_i) + self.u(hidden))
        y = self.activate(self.w_y(hidden))
        return y

In [954]:
import torch.nn.functional as F

class LitRNN(pl.LightningModule):
    def __init__(self, input_dimension, hidden_dimension, classes):
        super().__init__()
        self.linear = nn.Linear(input_dimension, hidden_dimension)
        self.rnn = RNN(hidden_dimension, hidden_dimension)
        self.bn = nn.BatchNorm1d(hidden_dimension)
        self.fc = nn.Linear(hidden_dimension, classes)
        self.classifier = nn.LogSoftmax()

    def forward(self, x):
        x = self.linear(x)
        x = self.rnn(x)
        x = self.bn(x)
        x = self.fc(x)
        x = self.classifier(x)
        return x
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.03)

In [955]:
class LitBase(pl.LightningModule):
    def __init__(self, input_dimension, hidden_dimension, classes):
        super().__init__()
        self.rnn = nn.RNN(input_dimension, hidden_dimension, 1)
        self.bn = nn.BatchNorm1d(hidden_dimension)
        self.fc = nn.Linear(hidden_dimension, classes)
        self.classifier = nn.LogSoftmax()
        
    def forward(self, x):
        x, h = self.rnn(x)
        x = x[:, -1, :].squeeze()
        x = self.bn(x)
        x = self.fc(x)
        x = self.classifier(x)
        return x
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.01)

In [956]:
trainer = pl.Trainer(max_epochs=100, accelerator='gpu', gpus=1)
model_base = LitBase(1, 16, 2)
trainer.fit(model_base, train_dataloaders=train_dataloader)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6]

  | Name       | Type        | Params
-------------------------------------------
0 | rnn        | RNN         | 304   
1 | bn         | BatchNorm1d | 32    
2 | fc         | Linear      | 34    
3 | classifier | LogSoftmax  | 0     
-------------------------------------------
370       Trainable params
0         Non-trainable params
370       Total params
0.001     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

  


In [957]:
trainer = pl.Trainer(max_epochs=100, accelerator='gpu', gpus=1)
model = LitRNN(1, 16, 2)
trainer.fit(model, train_dataloaders=train_dataloader)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6]

  | Name       | Type        | Params
-------------------------------------------
0 | linear     | Linear      | 32    
1 | rnn        | RNN         | 816   
2 | bn         | BatchNorm1d | 32    
3 | fc         | Linear      | 34    
4 | classifier | LogSoftmax  | 0     
-------------------------------------------
914       Trainable params
0         Non-trainable params
914       Total params
0.004     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]



In [958]:
class GRU(nn.Module):
    def __init__(self, input_dimension, output_dimension):
        super().__init__()
        
        self.input_dimension = input_dimension
        self.output_dimension = output_dimension
        
        self.w_z = nn.Linear(input_dimension, output_dimension)
        self.u_z = nn.Linear(output_dimension, output_dimension, bias=False)
        self.w_r = nn.Linear(input_dimension, output_dimension)
        self.u_r = nn.Linear(output_dimension, output_dimension, bias=False)
        self.w_h = nn.Linear(input_dimension, output_dimension)
        self.u_h = nn.Linear(output_dimension, output_dimension, bias=False)

        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()
        
    def forward(self, x):
        y = torch.zeros(x.shape[0], self.output_dimension).to(x.device)

        for x_i in x.transpose(0, 1):
            z = self.sigmoid(self.w_z(x_i) + self.u_z(y))
            r = self.sigmoid(self.w_r(x_i) + self.u_r(y))
            h = self.tanh(self.w_h(x_i) + self.u_h(r * y))
            y = (1 - r) * y + z * h
            
        return y

In [959]:
import torch.nn.functional as F

class LitGRU(pl.LightningModule):
    def __init__(self, input_dimension, hidden_dimension, classes):
        super().__init__()
        self.linear = nn.Linear(input_dimension, hidden_dimension)
        self.gru = GRU(hidden_dimension, hidden_dimension)
        self.bn = nn.BatchNorm1d(hidden_dimension)
        self.fc = nn.Linear(hidden_dimension, classes)
        self.classifier = nn.LogSoftmax()

    def forward(self, x):
        x = self.linear(x)
        x = self.gru(x)
        x = self.bn(x)
        x = self.fc(x)
        x = self.classifier(x)
        return x
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.03)

In [960]:
trainer = pl.Trainer(max_epochs=100, accelerator='gpu', gpus=1)
model_gru = LitGRU(1, 16, 2)
trainer.fit(model_gru, train_dataloaders=train_dataloader)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6]

  | Name       | Type        | Params
-------------------------------------------
0 | linear     | Linear      | 32    
1 | gru        | GRU         | 1.6 K 
2 | bn         | BatchNorm1d | 32    
3 | fc         | Linear      | 34    
4 | classifier | LogSoftmax  | 0     
-------------------------------------------
1.7 K     Trainable params
0         Non-trainable params
1.7 K     Total params
0.007     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]



In [961]:
def accuracy(model, b):
    return sum(model(b[0]).argmax(dim=1) == b[1]) / len(b[1])

In [962]:
import numpy as np

class LitTransformer(pl.LightningModule):
    def __init__(self, input_dimension, hidden_dimension, classes):
        super().__init__()
        
        self.input_dimension = input_dimension
        self.hidden_dimension = hidden_dimension
        
        self.linear = nn.Linear(input_dimension, hidden_dimension)
        self.w_q = torch.nn.Parameter(torch.randn(hidden_dimension, hidden_dimension))
        self.w_k = torch.nn.Parameter(torch.randn(hidden_dimension, hidden_dimension))
        self.w_v = torch.nn.Parameter(torch.randn(hidden_dimension, hidden_dimension))
        self.relu = nn.ReLU()
        self.bn = nn.BatchNorm1d(hidden_dimension)
        self.fc = nn.Linear(hidden_dimension, classes)
        self.classifier = nn.LogSoftmax()

    def forward(self, x):
        x = self.linear(x)
        q = torch.einsum('bnk,kd->bnd', x, self.w_q)
        k = torch.einsum('bmk,kd->bmd', x, self.w_k)
        v = torch.einsum('bmk,kd->bmd', x, self.w_v)
        sigma = self.relu(torch.einsum('bnd,bmd->bnm', q, k) / np.sqrt(self.hidden_dimension))
        x = torch.einsum('bnm,bmd->bnd', sigma, v)
        x = x[:, 0, :].squeeze()
        x = self.bn(x)
        x = self.fc(x)
        x = self.classifier(x)
        return x
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.003)

In [963]:
trainer = pl.Trainer(max_epochs=100, accelerator='gpu', gpus=1)
model_transformer = LitTransformer(1, 16, 2)
trainer.fit(model_transformer, train_dataloaders=train_dataloader)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6]

  | Name       | Type        | Params
-------------------------------------------
0 | linear     | Linear      | 32    
1 | relu       | ReLU        | 0     
2 | bn         | BatchNorm1d | 32    
3 | fc         | Linear      | 34    
4 | classifier | LogSoftmax  | 0     
-------------------------------------------
866       Trainable params
0         Non-trainable params
866       Total params
0.003     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]



In [966]:
b = next(iter(train_dataloader))
accuracy(model_base, b), accuracy(model, b), accuracy(model_gru, b), accuracy(model_transformer, b)

  


(tensor(0.5977), tensor(0.6523), tensor(0.5625), tensor(0.5977))