In [193]:
import torch
from torch.nn import functional as F
from torch import nn
from torch.optim import Adam

from torchtext import data
from torchtext import datasets

import pytorch_lightning as pl
from pytorch_lightning import Trainer

import random

## Prepare Our Data and Data Iterator

In [64]:


SEED = 1234

torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

TEXT = data.Field(tokenize = 'spacy', include_lengths = True, batch_first=True)
LABEL = data.LabelField(dtype = torch.float)

train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)
train_data, valid_data = train_data.split(split_ratio=0.7)

In [65]:
len(train_data), len(test_data), len(valid_data)

(17500, 25000, 7500)

In [66]:
MAX_VOCAB_SIZE = 25_000

TEXT.build_vocab(train_data, 
                 max_size = MAX_VOCAB_SIZE, 
                 vectors = "glove.6B.100d", 
                 unk_init = torch.Tensor.normal_)

LABEL.build_vocab(train_data)

In [67]:
BATCH_SIZE = 64

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data), 
    batch_size = BATCH_SIZE,
    sort_within_batch = True,
    device = device)

In [68]:
len(TEXT.vocab)


25002

In [69]:
next(train_iterator.__iter__())



[torchtext.data.batch.Batch of size 64]
	[.text]:('[torch.cuda.LongTensor of size 64x133 (GPU 0)]', '[torch.cuda.LongTensor of size 64 (GPU 0)]')
	[.label]:[torch.cuda.FloatTensor of size 64 (GPU 0)]

In [70]:
for batch in train_iterator:
    print("batch.text:")
    print(batch.text)
    print("batch.label:")
    print(batch.label, len(batch.label))

    break

batch.text:
(tensor([[  66, 2872,   75,  ...,    4, 5734,   42],
        [  66, 4056,   24,  ...,   19, 1697,    4],
        [ 314,   31,   81,  ...,    2, 1923,    4],
        ...,
        [8661,   42,  266,  ...,    1,    1,    1],
        [  11,  626,  814,  ...,    1,    1,    1],
        [  11,  172,    5,  ...,    1,    1,    1]], device='cuda:0'), tensor([274, 274, 274, 274, 274, 274, 273, 273, 273, 273, 273, 273, 273, 273,
        273, 273, 273, 273, 273, 272, 272, 272, 272, 272, 272, 272, 272, 272,
        272, 272, 272, 272, 271, 271, 271, 271, 271, 271, 271, 271, 271, 271,
        271, 271, 271, 271, 271, 271, 271, 271, 270, 270, 270, 270, 270, 270,
        270, 270, 270, 270, 270, 270, 270, 270], device='cuda:0'))
batch.label:
tensor([1., 0., 1., 1., 1., 0., 0., 1., 0., 1., 1., 0., 0., 0., 0., 0., 0., 1.,
        0., 1., 0., 1., 0., 0., 0., 0., 1., 1., 1., 0., 1., 0., 0., 0., 0., 0.,
        0., 1., 0., 1., 0., 1., 1., 1., 0., 1., 1., 0., 1., 0., 1., 0., 0., 1.,
        0.,

## PyTorch Lightning Model 

In [198]:
class RNN(pl.LightningModule):
    def __init__(self, vocab_size=25002, embedding_dim=100, hidden_dim=256, output_dim=1, n_layers=2, 
                 bidirectional=True, dropout=0.5, pad_idx=1):
        
        super().__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx) #25002, 100, 1
        
        self.rnn = nn.LSTM(embedding_dim, 
                           hidden_dim, 
                           num_layers=n_layers, 
                           bidirectional=bidirectional, 
                           dropout=dropout, batch_first=True)
        
        self.fc = nn.Linear(hidden_dim * 2, output_dim) #512, 1
        
        self.dropout = nn.Dropout(dropout) #0.5
        
    def forward(self, text, text_lengths):
        #1. feed text into embedding with dropouts
        embedded = self.dropout(self.embedding(text))
        #embedded = [batch size(64), sent len(x), emb dim(100)]

        #2. pack sequence
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths,batch_first=True)
        
        #3. feed into LSTM
        packed_output, (hidden, cell) = self.rnn(packed_embedded)
        
        #4. unpack sequence
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output,batch_first=True)
       
        #5. Concat final forward hidden state and backward state value
        hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))
            
        return self.fc(hidden)


    def train_dataloader(self):
        return train_iterator
    
    def val_dataloader(self):
        return valid_iterator

    def binary_accuracy(self,preds, y):
        """
        Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
        """

        #round predictions to the closest integer
        rounded_preds = torch.round(torch.sigmoid(preds))
        correct = (rounded_preds == y).float() #convert into float for division 
        acc = correct.sum() / len(correct)
        return acc
    
    def loss_function(self, predictions, labels):
        criterion = nn.BCEWithLogitsLoss()
        loss = criterion(predictions, labels)

    def configure_optimizers(self):
        return Adam(self.parameters())

    def training_step(self, batch, batch_idx):
        text, text_lengths = batch.text
        
        predictions = model(text, text_lengths).squeeze(1)
        criterion = nn.BCEWithLogitsLoss()

        loss = criterion(predictions, batch.label)
        
        acc = self.binary_accuracy(predictions, batch.label)
        
        output = {
            'loss': loss, # required
            'progress_bar': {'acc':acc}, # Optional, must be tensor
            'train_acc':acc
        }
        
        return output
    
    def training_epoch_end(self, outputs):
        epoch_acc = 0
        for loss in outputs:
            epoch_acc += loss['train_acc']
        epoch_acc /= len(outputs)
        
      
        results = {
        'log': {'train_epoch_acc': epoch_acc}
        }
        return results
        
        
    
    def validation_step(self, batch, batch_idx):
        text, text_lengths = batch.text

        predictions = model(text, text_lengths).squeeze(1)
        criterion = nn.BCEWithLogitsLoss()

        loss = criterion(predictions, batch.label)

        acc = self.binary_accuracy(predictions, batch.label)
        return {
            'loss':loss,
            'val_acc':acc
        }
    
    def validation_epoch_end(self, outputs):
        epoch_loss = 0
        epoch_acc = 0
        for loss in outputs:
            epoch_acc += loss['val_acc']
        epoch_acc /= len(outputs)
        
        for loss in outputs:
            epoch_loss += loss['loss']   
        epoch_loss /= len(outputs)
        

        
        # Show this data into progress bar
        tqdm_dict = {'val_epoch_acc': epoch_acc, 'val_epoch_loss':epoch_loss}

        results = {
        'progress_bar': tqdm_dict,
        'log': {'val_acc': epoch_acc}
        }
        return results

        
        
    

        


## PyTorch Lightning Trainer

In [228]:
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.callbacks import ModelCheckpoint


### Early stop call back
Stop training when a monitored quantity has stopped improving. By default trainer will run for 1000 epochs.

**Parameters:**
* monitor (str): quantity to be monitored. Default: ‘val_loss’.
* min_delta (float) – minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement. Default: 0.
* patience (int) – number of epochs with no improvement after which training will be stopped. Default: 0.
* verbose (bool) – verbosity mode. Default: False.
* mode (str) – one of {auto, min, max}. In min mode, training will stop when the quantity monitored has stopped decreasing; in max mode it will stop when the quantity monitored has stopped increasing; in auto mode, the direction is automatically inferred from the name of the monitored quantity. Default: ‘auto’.
* strict (bool) – whether to crash the training if monitor is not found in the metrics. Default: True.


In [229]:
early_stopping = EarlyStopping(monitor='val_epoch_acc', 
                              min_delta=0.001,
                              patience=2)


### Model Checkpoint
Automatically save model checkpoints during training.

**Parameters:**
* filepath (Optional[str]) –path to save the model file. Can contain named formatting options to be auto-filled.
* monitor (str) – quantity to monitor.
* verbose (bool) – verbosity mode. Default: False.
* save_top_k (int) – if save_top_k == k, the best k models according to the quantity monitored will be saved. if save_top_k == 0, no models are saved. if save_top_k == -1, all models are saved. Please note that the monitors are checked every period epochs. if save_top_k >= 2 and the callback is called multiple times inside an epoch, the name of the saved file will be appended with a version count starting with v0.
* mode (str) – one of {auto, min, max}. If save_top_k != 0, the decision to overwrite the current save file is made based on either the maximization or the minimization of the monitored quantity. For val_acc, this should be max, for val_loss this should be min, etc. In auto mode, the direction is automatically inferred from the name of the monitored quantity.
* save_weights_only (bool) – if True, then only the model’s weights will be saved (model.save_weights(filepath)), else the full model is saved (model.save(filepath)).
* period (int) – Interval (number of epochs) between checkpoints.

In [231]:
checkpoint_callback = ModelCheckpoint(
    filepath='./saved_models/_{epoch:02d}-{val_epoch_acc:.2f}',
    save_top_k=1,
    verbose=True,
    monitor='val_epoch_acc',
    mode='max',
    prefix=''
)

In [232]:
INPUT_DIM = len(TEXT.vocab) #25002
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.5
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

model = RNN(INPUT_DIM, 
            EMBEDDING_DIM, 
            HIDDEN_DIM, 
            OUTPUT_DIM, 
            N_LAYERS, 
            BIDIRECTIONAL, 
            DROPOUT, 
            PAD_IDX)

trainer = Trainer(gpus=1,early_stop_callback=early_stopping,checkpoint_callback=checkpoint_callback)#max_epochs=4)
trainer.fit(model)

  f"Hyperparameter logging is not available for Torch version {torch.__version__}."


HBox(children=(FloatProgress(value=0.0, description='Validation sanity check', layout=Layout(flex='2'), max=5.…



HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), max=1.0), HTML(value='')), …



HBox(children=(FloatProgress(value=0.0, description='Validating', layout=Layout(flex='2'), max=118.0, style=Pr…

HBox(children=(FloatProgress(value=0.0, description='Validating', layout=Layout(flex='2'), max=118.0, style=Pr…

HBox(children=(FloatProgress(value=0.0, description='Validating', layout=Layout(flex='2'), max=118.0, style=Pr…

HBox(children=(FloatProgress(value=0.0, description='Validating', layout=Layout(flex='2'), max=118.0, style=Pr…

HBox(children=(FloatProgress(value=0.0, description='Validating', layout=Layout(flex='2'), max=118.0, style=Pr…

HBox(children=(FloatProgress(value=0.0, description='Validating', layout=Layout(flex='2'), max=118.0, style=Pr…

HBox(children=(FloatProgress(value=0.0, description='Validating', layout=Layout(flex='2'), max=118.0, style=Pr…

HBox(children=(FloatProgress(value=0.0, description='Validating', layout=Layout(flex='2'), max=118.0, style=Pr…

HBox(children=(FloatProgress(value=0.0, description='Validating', layout=Layout(flex='2'), max=118.0, style=Pr…

HBox(children=(FloatProgress(value=0.0, description='Validating', layout=Layout(flex='2'), max=118.0, style=Pr…

HBox(children=(FloatProgress(value=0.0, description='Validating', layout=Layout(flex='2'), max=118.0, style=Pr…

HBox(children=(FloatProgress(value=0.0, description='Validating', layout=Layout(flex='2'), max=118.0, style=Pr…

HBox(children=(FloatProgress(value=0.0, description='Validating', layout=Layout(flex='2'), max=118.0, style=Pr…

HBox(children=(FloatProgress(value=0.0, description='Validating', layout=Layout(flex='2'), max=118.0, style=Pr…

HBox(children=(FloatProgress(value=0.0, description='Validating', layout=Layout(flex='2'), max=118.0, style=Pr…




1

### Manual loading model from checkpoint

In [253]:
new_model = model.load_from_checkpoint(checkpoint_path="./saved_models/_epoch=12-val_epoch_acc=0.90.ckpt")
#OR
checkpoint = torch.load("./saved_models/_epoch=12-val_epoch_acc=0.90.ckpt", map_location=lambda storage, loc: storage)
model.load_state_dict(checkpoint['state_dict'])

<All keys matched successfully>

### Restore Training State for trainer

In [254]:
trainer = Trainer(resume_from_checkpoint='./saved_models/_epoch=12-val_epoch_acc=0.90.ckpt')

# automatically restores model, epoch, step, LR schedulers, apex, etc...
#trainer.fit(model)

## Make predictions

In [255]:
import spacy
nlp = spacy.load('en')

def predict_sentiment(model, sentence):
    model.eval()
    tokenized = [tok.text for tok in nlp.tokenizer(sentence)]
    indexed = [TEXT.vocab.stoi[t] for t in tokenized]
    length = [len(indexed)]
    tensor = torch.LongTensor(indexed).to(device)
    tensor = tensor.unsqueeze(0)
    length_tensor = torch.LongTensor(length)
    prediction = torch.sigmoid(model(tensor, length_tensor))
    return prediction.item()

In [257]:
predict_sentiment(model, "This film is terrible")


0.000628557289019227

In [258]:
predict_sentiment(model, "This film is awesome! I love it.")


0.9993434548377991