This notebook contains experimental training of LSTM with pytorch lightning

There is still a bug when shuffling data for training (see dataloader) but it seems to work better without shuffling

In [None]:
import pytorch_lightning
from pytorch_lightning import Trainer, LightningModule
import torch
from data_classes.IMDB import IMDBClass
import torchtext

In [None]:
# Prepare pretrained glove embeddings
glove_vec = torchtext.vocab.GloVe(max_vectors=10000)
glove_vocab = torchtext.vocab.vocab(glove_vec.stoi)
unk_token = "<unk>"
unk_index = 0
glove_vocab.insert_token(unk_token, unk_index)
glove_vocab.set_default_index(glove_vocab[unk_token])

In [None]:
# Prepare datasets
train_dataset = IMDBClass(train=True, transform=glove_vocab)
test_dataset = IMDBClass(train=False, transform=glove_vocab)

In [None]:
# Prepare DataLoader
from torch.utils.data import DataLoader
def collate_fn(batch):
    x = [item[0] for item in batch]
    lengths = torch.LongTensor(list(map(len, x)))
    x = pad_sequence(x, batch_first=True)
    y = torch.tensor([item[1] for item in batch], dtype=torch.long)
    return x, y, lengths

train_dataloader = DataLoader(train_dataset, batch_size=10, shuffle=True, collate_fn=collate_fn)
test_dataloader = DataLoader(test_dataset, batch_size=10, shuffle=True, collate_fn=collate_fn)

In [None]:
# Model and system definition
from torch.optim import Adam
from torch import nn
from torch.nn.utils.rnn import pad_sequence

class LSTM(LightningModule):
    def __init__(self, vocab_size=None, embedding_size=64, lstm_hidden_size=100, num_class=2, batch_size=32, learning_rate=0.001, vocab=None, vectors=None):
        super().__init__()
        if vocab is None:
            self.embedding = torch.nn.Embedding(vocab_size, embedding_size, padding_idx=0)
        else:
            self.embedding = torch.nn.Embedding.from_pretrained(vectors.vectors, freeze=True, padding_idx=vocab["<pad>"])
        self.lstm = nn.LSTM(embedding_size, lstm_hidden_size, batch_first=True)
        self.linear = nn.Linear(lstm_hidden_size, num_class)
        self.loss_function = nn.CrossEntropyLoss()
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.lstm_hidden_size = lstm_hidden_size
    
    def forward(self, X: torch.Tensor, lengths: torch.LongTensor):
        x = self.embedding(X)
        x = torch.nn.utils.rnn.pack_padded_sequence(x, lengths=lengths.to("cpu"), enforce_sorted=False, batch_first=True)
        _, (hn, _) = self.lstm(x)
        hn = hn[-1,:,:]
        x = self.linear(hn)
        return x
    
    def training_step(self, batch, batch_idx):
        x, y, lengths = batch
        y_hat = self(x, lengths)
        loss = self.loss_function(y_hat, y)
        self.log("Train Loss", loss.detach())
        return loss
           
    
    def configure_optimizers(self):
        return Adam(self.parameters(), lr=1e-2)
        
    def test_step(self, batch, batch_idx):
        x, y, lengths = batch
        y_hat = self(x, lengths)
        loss = self.loss_function(y_hat, y)
        labels_hat = torch.argmax(y_hat, dim=1)
        test_acc = torch.sum(labels_hat == y).item() / (len(y) * 1.0)
        return self.log_dict({'Test Loss': loss, 'Test Acc': test_acc})
    
    def train_dataloader(self):
        return DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True, collate_fn=collate_fn) # collate_fn=collate_fn

    
    def test_dataloader(self):
        return DataLoader(test_dataset, batch_size=self.batch_size, shuffle=False, collate_fn=collate_fn)


In [None]:
# Instantiate model
model = LSTM(embedding_size=300, num_class=2, vocab=glove_vocab, vectors=glove_vec)

In [None]:
# Instantiate Logger
from pytorch_lightning.loggers import TensorBoardLogger
logger = TensorBoardLogger('exp_logs', name='lstm')
# run tensorboard with:
# tensorboard --logdir exp_logs

In [None]:
# Instantiate trainer
trainer = Trainer(max_epochs=10, gpus=1, auto_select_gpus=True, auto_scale_batch_size=False, auto_lr_find=True, logger=[logger], track_grad_norm=2, 
accumulate_grad_batches=8)

In [None]:
trainer.tune(model)

In [None]:
model.batch_size

In [None]:
model.learning_rate

In [None]:

trainer.test(model)

In [None]:
trainer.fit(model)

In [None]:
trainer.test(model, test_dataloader)