In [None]:
%load_ext autoreload
%autoreload 2


In [None]:
import os
local_dir = 's18-transformer-speeding-up-strategy'
repo_url = 'https://github.com/aakashvardhan/s18-transformer-speeding-up-strategy.git'

# Check if the local directory already exists
if not os.path.exists(local_dir):
    # Clone the repository because it does not exist
    !git clone --quiet {repo_url}
else:
    # Change directory to the local repository
    %cd {local_dir}
    # Pull the latest changes because the repository already exists
    !git pull

In [None]:
import sys
sys.path.append('/content/s18-transformer-speeding-up-strategy')

In [None]:
!pip install -q -r /content/s18-transformer-speeding-up-strategy/requirements.txt

In [None]:
%cd /content/s18-transformer-speeding-up-strategy

In [None]:
import os
import warnings
import random

import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
from torchmetrics.text import BLEUScore, CharErrorRate, WordErrorRate

import lightning as L
from lightning.pytorch.callbacks import (
    EarlyStopping,
    LearningRateMonitor,
    ModelCheckpoint,
    TQDMProgressBar,
)
from lightning.pytorch.loggers import TensorBoardLogger

from config_file import get_config, get_weights_file_path
from dataset import LiTDataModule
from utils import get_model, greedy_decode

In [None]:
cfg = get_config()
# cfg['batch_size'] = 24
cfg['num_epochs'] = 18

In [None]:
# Define the directory name
directory_name = "weights"

# Create the directory if it does not exist
if not os.path.exists(directory_name):
  os.makedirs(directory_name)
  print(f"Directory '{directory_name}' created!")
else:
  print(f"Directory '{directory_name}' already exists.")

In [None]:
# Clear CUDA cache and set seed
torch.cuda.empty_cache()
L.seed_everything(42, workers=True)
print("Seed set to 42...")

In [None]:
# Initialize the data module
datamodule = LiTDataModule(cfg)
datamodule.setup()
print("DataModule initialized...")
tokenizer_src, tokenizer_tgt = datamodule.tokenizer_src, datamodule.tokenizer_tgt
train_dataloader = datamodule.train_dataloader()
# Initialize TensorBoard logger
tb_logger = TensorBoardLogger(
    save_dir=os.getcwd(), version=1, name="lightning_logs"
)

In [None]:
# Initialize the trainer
trainer = L.Trainer(
            precision=cfg["precision"],
            max_epochs=cfg["num_epochs"],
            logger=tb_logger,
            accelerator=cfg["accelerator"],
            devices="auto",
            default_root_dir=cfg["model_folder"],
            callbacks=[
                ModelCheckpoint(
                    dirpath=cfg["model_folder"],
                    save_top_k=3,
                    monitor="train_loss_step",
                    mode="min",
                    filename="model-{epoch:02d}-{train_loss:.4f}",
                    save_last=True,
                ),
                LearningRateMonitor(logging_interval="step", log_momentum=True),
                EarlyStopping(
                    monitor="train_loss_step", mode="min", stopping_threshold=1.6
                ),
                TQDMProgressBar(refresh_rate=10),
            ],
            gradient_clip_val=0.5,
            num_sanity_val_steps=5,
            enable_progress_bar=True,
            check_val_every_n_epoch=1,
            limit_val_batches=2)

In [None]:
from main import LTModel
# Initialize the model
model = LTModel(cfg, tokenizer_src=tokenizer_src, tokenizer_tgt=tokenizer_tgt, train_dataloader=train_dataloader)
print("Model initialized...")

In [None]:
print(cfg["one_cycle_best_lr"])

In [None]:
 # Learning rate finder
tuner = L.pytorch.tuner.Tuner(trainer)
lr_finder = tuner.lr_find(
    model, datamodule=datamodule, num_training=trainer.max_epochs, min_lr=1e-5, max_lr=1e-3
)
print(lr_finder)

# Initialize suggested_lr with a default value
suggested_lr = model.one_cycle_best_lr

if lr_finder:
  fig = lr_finder.plot(suggest=True)
  fig.show()
  suggested_lr = lr_finder.suggestion()
  print(f"Suggested learning rate: {suggested_lr}")
else:
  print("Learning rate finding did not complete successfully.")

# Set the best learning rate
model.one_cycle_best_lr = suggested_lr

In [None]:
print(model.one_cycle_best_lr)

In [None]:
trainer.fit(model=model, datamodule=datamodule)

In [None]:
# Validate the model
trainer.validate(model=model, datamodule=datamodule)
print("Model Evaluation Done...")

In [None]:
# Save the model
torch.save(model.state_dict(), "saved_resnet18_model.pth")
print("Model saved...")

In [None]:
# start tensorboard
%load_ext tensorboard
%tensorboard --logdir /content/s18-transformer-speeding-up-strategy/lightning_logs