In [1]:
import torch
import imp
#import TransformerTrainer
import MyTransformer
import PruningTrainer
imp.reload(PruningTrainer)
imp.reload(MyTransformer)
from PruningTrainer import BaseDataModule, MyPruningTrainer
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning import loggers as pl_loggers
from pytorch_lightning.callbacks import LearningRateMonitor
import random
import numpy as np
import utils
from torch import nn

In [2]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
%env CUBLAS_WORKSPACE_CONFIG :16:8

env: CUBLAS_WORKSPACE_CONFIG=:16:8


In [3]:
DEVICE = "cuda"
BATCH_SIZE = 64
MAX_LEN = 50

torch.manual_seed(SEED)
torch.set_deterministic(True)

data_module = BaseDataModule(
    batch_size=BATCH_SIZE,
    device = DEVICE,
    data_path="./data/eng_rus.txt",
    seed=SEED
)

data_module.prepare_data()

In [4]:
model_params = {
    "src_vocab_size": data_module.src_vocab_len,
    "trg_vocab_size": data_module.trg_vocab_len,
    "d_model": 512,
    "n_enc_layers": 6,
    "n_dec_layers": 6,
    "n_enc_heads": 8,
    "n_dec_heads": 8,
    "enc_dropout": 0.1,
    "dec_dropout": 0.1
}

In [5]:
model = MyTransformer.Transformer(**model_params)
checkpoint = torch.load("models/transformer_model.pt")
model.load_state_dict(checkpoint)

<All keys matched successfully>

In [6]:
plmodel = MyPruningTrainer(
    model, data_module.src_pad_idx, data_module.trg_pad_idx, 1e-4
)
plmodel.to(DEVICE)
pass

In [7]:
N_EPOCHS = 32
CLIP = 1
plmodel.lr = 1e-4

tb_logger = pl_loggers.TensorBoardLogger('./logs/')
lr_monitor = LearningRateMonitor(logging_interval='step')
early_stop_callback = EarlyStopping(
   monitor='total_val_loss',
   min_delta=0.01,
   patience=2,
   verbose=False,
   mode='mean'
)
trainer = Trainer(
    max_epochs=N_EPOCHS,
    gradient_clip_val=CLIP,
    progress_bar_refresh_rate=1,
    callbacks=[early_stop_callback, lr_monitor], 
    logger=tb_logger,
    log_every_n_steps=20
)
data_module.setup('fit')
trainer.fit(plmodel, data_module)

GPU available: True, used: False
TPU available: None, using: 0 TPU cores

  | Name      | Type             | Params
-----------------------------------------------
0 | criterion | CrossEntropyLoss | 0     
1 | model     | Transformer      | 57.1 M
2 | pruner    | Pruner           | 720   
-----------------------------------------------
57.1 M    Trainable params
0         Non-trainable params
57.1 M    Total params


HBox(children=(HTML(value='Validation sanity check'), FloatProgress(value=1.0, bar_style='info', layout=Layout…

torch.Size([18, 8, 1]) torch.Size([18, 8, 1])
torch.Size([18, 8]) torch.Size([18, 512, 512])


RuntimeError: The size of tensor a (18) must match the size of tensor b (8) at non-singleton dimension 1

In [17]:
print(list(plmodel.pruner.parameters()))

[Parameter containing:
tensor([[0.6556, 0.7469, 0.5382, 0.7653, 0.7826,    nan, 0.7803, 0.7679],
        [0.6632, 0.7493, 0.5385, 0.7591, 0.7671,    nan, 0.7696, 0.7710],
        [0.6560, 0.7391, 0.5370, 0.7608, 0.7766,    nan, 0.7754, 0.7592],
        [0.6621, 0.7361, 0.5359, 0.7572, 0.7643,    nan, 0.7765, 0.7599],
        [0.6663, 0.7429, 0.5482, 0.7558, 0.7810,    nan, 0.7665, 0.7668],
        [0.6720, 0.7405, 0.5446, 0.7674, 0.7791,    nan, 0.7703, 0.7582],
        [0.6644, 0.7464, 0.5339, 0.7609, 0.7663,    nan, 0.7752, 0.7590],
        [0.6616, 0.7371, 0.5456, 0.7699, 0.7749,    nan, 0.7695, 0.7550],
        [0.6591, 0.7448, 0.5445, 0.7713, 0.7658,    nan, 0.7672, 0.7660],
        [0.6719, 0.7479, 0.5432, 0.7533, 0.7756,    nan, 0.7780, 0.7653],
        [0.6635, 0.7470, 0.5468, 0.7619, 0.7704,    nan, 0.7696, 0.7717],
        [0.6635, 0.7349, 0.5419, 0.7687, 0.7638,    nan, 0.7793, 0.7610],
        [0.6639, 0.7428, 0.5349, 0.7667, 0.7629,    nan, 0.7737, 0.7728],
        [0.6581