In [1]:
%load_ext autoreload
%autoreload 2



import pandas as pd
import os
import torch
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import Dataset, DataLoader
from torch.nn import functional as F
import numpy as np
from tqdm import tqdm
import random
from pytorch_lightning.callbacks import ModelCheckpoint
torch.manual_seed(1)
random.seed(1)
np.random.seed(1)
import time
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import Callback
#from torch.utils.data import random_split, DataLoader

from torch import nn
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
import torchmetrics
import wandb
import re
# Setting the seed
pl.seed_everything(1)
torch.set_printoptions(edgeitems=30)
import math
%matplotlib inline
import matplotlib.pyplot as plt

from data_layer import Vocabulary, Train_Dataset, Validation_Dataset, MyCollate, DataModule
 

DEVICE = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')


Global seed set to 1


In [2]:
data_module = DataModule('../train.txt', source_column = 'factors', 
                         target_column = 'expressions', batch_size = 512)

data_module.setup(val_frac = 0.1)

train_iterator = data_module.train_dataloader()
valid_iterator = data_module.val_dataloader()



len of train:  900000
len of val:  100000


In [1]:
!python main.py

Loading Checkpoint ...
Loading Model...
Loading Vocabulary...
Predicting Factors ....
100%|███████████████████████████████████████| 1000/1000 [00:49<00:00, 20.29it/s]
Score: 0.999


In [19]:
data_module.val.to_csv('val.csv',index=False)

In [4]:
# # Model hyperparameters
# # Model hyperparameters
# src_vocab_size = len(data_module.train_dataset.source_vocab.itos)
# trg_vocab_size = len(data_module.train_dataset.target_vocab.itos)
# embedding_size = 128
# num_heads = 4
# num_encoder_layers = 3
# num_decoder_layers = 3
# dropout = 0.10
# max_len = 40
# forward_expansion = 256
# src_pad_idx = data_module.train_dataset.source_vocab.stoi["<PAD>"]
# learning_rate = 0.0005

### Encoder Decoder

In [5]:
from model import Encoder, Decoder, Seq2Seq

In [6]:
model_params = {
    'INPUT_DIM': len(data_module.train_dataset.source_vocab.itos),
    'OUTPUT_DIM': len(data_module.train_dataset.target_vocab.itos),
    'HID_DIM' : 256,
    'ENC_LAYERS' : 3,
    'DEC_LAYERS' : 3,
    'ENC_HEADS' : 4,
    'DEC_HEADS' : 4,
    'ENC_PF_DIM' : 512,
    'DEC_PF_DIM' : 512,
    'ENC_DROPOUT' : 0.1,
    'DEC_DROPOUT' : 0.1
}

enc = Encoder(model_params['INPUT_DIM'], 
              model_params['HID_DIM'], 
              model_params['ENC_LAYERS'], 
              model_params['ENC_HEADS'], 
              model_params['ENC_PF_DIM'], 
              model_params['ENC_DROPOUT'], 
              DEVICE)

dec = Decoder(model_params['OUTPUT_DIM'], 
              model_params['HID_DIM'], 
              model_params['DEC_LAYERS'], 
              model_params['DEC_HEADS'], 
              model_params['DEC_PF_DIM'], 
              model_params['DEC_DROPOUT'], 
              DEVICE)

In [7]:
SRC_PAD_IDX = data_module.train_dataset.source_vocab.stoi['<PAD>']
TRG_PAD_IDX = data_module.train_dataset.target_vocab.stoi['<PAD>']

model = Seq2Seq(enc, dec, SRC_PAD_IDX, TRG_PAD_IDX, DEVICE).to(DEVICE)

In [8]:
#model

In [9]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 4,032,548 trainable parameters


In [10]:
def initialize_weights(m):
    if hasattr(m, 'weight') and m.weight.dim() > 1:
        nn.init.xavier_uniform_(m.weight.data)

In [11]:
model.apply(initialize_weights);

In [12]:
LEARNING_RATE = 0.0005

optimizer = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)

In [13]:
criterion = nn.CrossEntropyLoss()#(ignore_index = TRG_PAD_IDX)

In [14]:
from Engine import train, evaluate

In [16]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [17]:
N_EPOCHS = 15
CLIP = 1
early_stopping_counter = 0
early_stopping_limit = 0
best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    
    start_time = time.time()
    
    train_loss = train(model, train_iterator, optimizer, criterion, CLIP, DEVICE)
    valid_loss, val_sent_acc = evaluate(model, valid_iterator, criterion, DEVICE)
    
    end_time = time.time()
    
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        early_stopping_counter = 0
        best_valid_loss = valid_loss
        torch.save({'state_dict': model.state_dict(),
                    'model_params': model_params,
                    'source_stoi': data_module.train_dataset.source_vocab.stoi,
                    'source_itos': data_module.train_dataset.source_vocab.itos,
                    'target_stoi': data_module.train_dataset.target_vocab.stoi,
                    'target_itos': data_module.train_dataset.target_vocab.itos}, 'tut7-model.pt')
    else:
        early_stopping_counter += 1
        if early_stopping_counter >= early_stopping_limit:
            print(f'Loss did not reduce for last {early_stopping_counter} epochs. Stopped training..'
            break
    
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')
    print(f'\t Val. Sent Accuracy: {val_sent_acc:.1f}')

Avg train loss for last 0 steps: 4.32685
Avg train sent acc for last 0 steps: 0.00
Avg train loss for last 500 steps: 0.62760
Avg train sent acc for last 500 steps: 0.04
Avg train loss for last 1000 steps: 0.47638
Avg train sent acc for last 1000 steps: 0.10
Avg train loss for last 1500 steps: 0.39051
Avg train sent acc for last 1500 steps: 0.15
Epoch: 01 | Time: 2m 57s
	Train Loss: 0.357 | Train PPL:   1.429
	 Val. Loss: 0.117 |  Val. PPL:   1.124
	 Val. Sent Accuracy: 0.4
Avg train loss for last 0 steps: 0.15662
Avg train sent acc for last 0 steps: 0.34
Avg train loss for last 500 steps: 0.13204
Avg train sent acc for last 500 steps: 0.37
Avg train loss for last 1000 steps: 0.11997
Avg train sent acc for last 1000 steps: 0.39
Avg train loss for last 1500 steps: 0.10998
Avg train sent acc for last 1500 steps: 0.41
Epoch: 02 | Time: 2m 55s
	Train Loss: 0.106 | Train PPL:   1.111
	 Val. Loss: 0.058 |  Val. PPL:   1.060
	 Val. Sent Accuracy: 0.6
Avg train loss for last 0 steps: 0.07537
A

In [4]:
from evaluate import load_model, translate_sentence
import torch
import pandas as pd
from data_layer import Vocabulary, Train_Dataset, Validation_Dataset, MyCollate, DataModule

In [5]:
data_module = DataModule('train.txt', source_column='factors', 
                             target_column = 'expressions', batch_size=512)
    
print(f'Using split ratio of {0.1} for train val split')
data_module.setup(val_frac = 0.1)
class Evaluation:
    def __init__(self, device, checkpoint_path):
        self.device = device
        print('Loading Checkpoint ...')
        checkpoint = torch.load(checkpoint_path)
        print('Loading Model...')
        self.model = load_model(checkpoint, device)
        print('Loading Vocabulary...')
        self.source_stoi = checkpoint['source_stoi']
        self.source_itos = checkpoint['source_itos']
        self.target_stoi = checkpoint['target_stoi']
        self.target_itos = checkpoint['target_itos']
    
                 
    def predict(self, factor: str):

        expansion = translate_sentence(factor, self.source_stoi, self.source_itos, self.target_stoi,
                                               self.target_itos, self.model, self.device, max_len = 30)
        return expansion

Using split ratio of 0.1 for train val split
len of train:  900000
len of val:  100000


In [6]:
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')

checkpoint_path = 'checkpoints/checkpoint.pt'

evaluation = Evaluation(device, checkpoint_path)
print("Predicting Factors ....")



Loading Checkpoint ...
Loading Model...
Loading Vocabulary...
Predicting Factors ....


In [None]:
c = 0
data_module.val = data_module.val.sample(frac=1).reset_index(drop=True)
for i in range(len(data_module.val)):
    factor = data_module.val.loc[i,'factors']
    expression =  data_module.val.loc[i,'expressions']
    pred = evaluation.predict(factor)
    if pred == expression:
        c+=1
    else:
        print(factor, expression, pred)
    if i>10000:
        print(i)
        break

(-9*n-22)*(3*n-23) -27*n**2+141*n+506 -27*n**2+131*n+506
(3*t+29)*(8*t-31) 24*t**2+139*t-899 24*t**2+119*t-899
(-7*i-13)*(5*i-19) -35*i**2+68*i+247 -35*i**2+78*i+247
(3*s-23)*(5*s+29) 15*s**2-28*s-667 15*s**2-38*s-667
(29-7*tan(i))*(-5*tan(i)-27) 35*tan(i)**2+44*tan(i)-783 35*tan(i)**2+34*tan(i)-783
(-9*t-32)*(6*t+5) -54*t**2-237*t-160 -54*t**2-257*t-160
(-9*n-27)*(-7*n-9) 63*n**2+270*n+243 63*n**2+280*n+243
(30-8*tan(n))*(-3*tan(n)-30) 24*tan(n)**2+150*tan(n)-900 24*tan(n)**2+130*tan(n)-900
(-8*y-16)*(-7*y-32) 56*y**2+368*y+512 56*y**2+388*y+512


In [None]:
c/ (i+1)

In [None]:
def display_attention(sentence, translation, attention, n_heads = 8, n_rows = 4, n_cols = 2):
    
    assert n_rows * n_cols == n_heads
    
    fig = plt.figure(figsize=(15,25))
    
    for i in range(n_heads):
        
        ax = fig.add_subplot(n_rows, n_cols, i+1)
        
        _attention = attention.squeeze(0)[i].cpu().detach().numpy()

        cax = ax.matshow(_attention, cmap='bone')

        ax.tick_params(labelsize=12)
        ax.set_xticklabels(['']+['<sos>']+[t.lower() for t in sentence]+['<eos>'], 
                           rotation=45)
        ax.set_yticklabels(['']+translation)

        ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
        ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

    plt.show()
    plt.close()

In [None]:
from matplotlib import ticker

In [None]:
sent1 = '(-4*n-14)*(2*n-2)'
src, translation, attention = translate_sentence(sent1, source_stoi, source_itos, target_stoi, target_itos, model, device, max_len = 30)

display_attention(src, translation, attention)

In [None]:
translation

In [13]:
ch = torch.load('checkpoints/checkpoint.pt')

In [14]:
ch.keys()

dict_keys(['state_dict', 'model_params', 'source_stoi', 'source_itos', 'target_stoi', 'target_itos'])

In [16]:
#ch['state_dict']

In [32]:
!python main.py -t

Loading Checkpoint ...
Loading Model...
Loading Vocabulary...
Predicting Factors ....
100%|███████████████████████████████████████████| 25/25 [00:02<00:00, 10.25it/s]
Score: 1.0


In [33]:
!pip3 freeze > requirements.txt 

In [34]:
!pip install torch-summary

Collecting torch-summary
  Downloading torch_summary-1.4.5-py3-none-any.whl (16 kB)
Installing collected packages: torch-summary
Successfully installed torch-summary-1.4.5


In [35]:
from torchsummary import summary

In [37]:
summary(evaluation.model)

Layer (type:depth-idx)                                  Param #
├─Encoder: 1-1                                          --
|    └─Embedding: 2-1                                   8,192
|    └─Embedding: 2-2                                   25,600
|    └─ModuleList: 2-3                                  --
|    |    └─EncoderLayer: 3-1                           527,104
|    |    └─EncoderLayer: 3-2                           527,104
|    |    └─EncoderLayer: 3-3                           527,104
|    └─Dropout: 2-4                                     --
├─Decoder: 1-2                                          --
|    └─Embedding: 2-5                                   8,192
|    └─Embedding: 2-6                                   25,600
|    └─ModuleList: 2-7                                  --
|    |    └─DecoderLayer: 3-4                           790,784
|    |    └─DecoderLayer: 3-5                           790,784
|    |    └─DecoderLayer: 3-6                           790,784
|    └─

Layer (type:depth-idx)                                  Param #
├─Encoder: 1-1                                          --
|    └─Embedding: 2-1                                   8,192
|    └─Embedding: 2-2                                   25,600
|    └─ModuleList: 2-3                                  --
|    |    └─EncoderLayer: 3-1                           527,104
|    |    └─EncoderLayer: 3-2                           527,104
|    |    └─EncoderLayer: 3-3                           527,104
|    └─Dropout: 2-4                                     --
├─Decoder: 1-2                                          --
|    └─Embedding: 2-5                                   8,192
|    └─Embedding: 2-6                                   25,600
|    └─ModuleList: 2-7                                  --
|    |    └─DecoderLayer: 3-4                           790,784
|    |    └─DecoderLayer: 3-5                           790,784
|    |    └─DecoderLayer: 3-6                           790,784
|    └─