<a href="https://colab.research.google.com/github/codekali/Neural-SC-Descriptor/blob/master/SecondTrial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#  Mount google drive, install dependencies and import required packages

In [0]:
from google.colab import drive
drive.mount('/content/drive/')
%cd '/content/drive/My Drive/Transformer/'

In [0]:
# Install transformers
!pip install transformers
!pip install pytorch-lightning

In [0]:
import torch
import torch.optim as optim
import os.path
from os import path
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping
import matplotlib.pyplot as plt
from torch.utils import data
from torch.utils.data import Dataset
from tqdm.notebook import tqdm
from src import utils
from transformers import T5Tokenizer, T5ForConditionalGeneration

model_path = "models/checkpoint.pth"

# Retrieve training and validation data from google drive

In [0]:
X_train,Y_train = [],[]  
file=open("train/train.token.sbt")
for line in file:
  X_train.append(line)
file=open("train/train.token.nl")
for line in file:
  Y_train.append(line)
l=int(len(X_train)/3)
X_train = X_train[0:l]
Y_train = Y_train[0:l]

In [0]:
X_valid,Y_valid = [],[]
file=open("val/valid.token.sbt")
for line in file:
  X_valid.append(line)
file=open("val/valid.token.nl")
for line in file:
  Y_valid.append(line)
l=int(len(X_valid)/3)
X_valid = X_valid[0:l]
Y_valid = Y_valid[0:l]

# Dataset Class to load data while training and validation

In [0]:
class Dataset(data.Dataset):
  def __init__(self, X_item, Y_item):
    self.X_item=X_item
    self.Y_item=Y_item

  def __len__(self):
    return len(self.X_item)

  def __getitem__(self, index):
    X = self.X_item[index]
    
    Y = self.Y_item[index]
    return X, Y

# Take GPU into action and define batch size and num of workers

In [0]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(">>> ", device)
params = {'batch_size': 6,
          'shuffle': True,
          'num_workers': 2}

training_set = Dataset(X_train,Y_train)
training_generator = data.DataLoader(training_set, **params)

validation_set = Dataset(X_valid,Y_valid)
validation_generator = data.DataLoader(validation_set, **params)

num_epoch = 20

## Load tokenizer

In [0]:
tokenizer = T5Tokenizer.from_pretrained('t5-small')

## Load model, optimizer, trainer, epoch no. and losses from a checkpoint directly or from the saved location.

In [0]:
if path.exists(model_path):

  #load saved model from the drive
  checkpoint = torch.load(model_path)
  epoch = checkpoint['epoch']
  model.load_state_dict(checkpoint['state_dict'])
  optimizer.load_state_dict(checkpoint['optimizer'])
  loss_train = checkpoint['trainloss']
  loss_valid = checkpoint['validloss']
  trainer = checkpoint['trainer']
  print(">>> loaded saved checkpoint from epoch ", epoch)
  print(">>> Saved model ")
  model.eval()
  model = model.to(device)

else:

  #load pre-trained model
  model = T5ForConditionalGeneration.from_pretrained('t5-small')
  model = model.to(device)

  #load opptimizer
  no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
  optimizer_grouped_parameters = []
  lr=2e-5
  for key, value in dict(model.named_parameters()).items():
      if value.requires_grad:
          if any(nd in key for nd in no_decay):
              optimizer_grouped_parameters += [
                  {"params": [value], "lr": lr, "weight_decay": 0.01}
              ]
          if not any(nd in key for nd in no_decay):
              optimizer_grouped_parameters += [
                  {"params": [value], "lr": lr, "weight_decay": 0.0}
              ]
                  
  optimizer = utils.BertAdam(
              optimizer_grouped_parameters,
              lr=lr,
              warmup=0.1,
              t_total=100,
              schedule='warmup_constant',
          )
  loss_train = []
  loss_valid = []
  epoch = 0

# Training and validation loop

In [0]:
trainloss=0
validloss=0

# Monitor validloss
early_stopping = EarlyStopping('validloss', patience = 4)

while epoch < num_epoch:
    
  """Training"""
  for local_batch, local_labels in tqdm(training_generator): 
    model.zero_grad()
    """Forward Function Implementation"""
    input_ids = tokenizer.batch_encode_plus(local_batch, return_tensors="pt",pad_to_max_length=True)
    label = tokenizer.batch_encode_plus(local_labels, return_tensors="pt",pad_to_max_length=True)
    outputs = model(input_ids=(input_ids['input_ids']).to(device), lm_labels=(label['input_ids']).to(device),attention_mask=(input_ids['attention_mask']).to(device))
    loss = outputs[0]
    trainloss=loss
    """Forward Function Ends here"""
    loss_train.append(trainloss)
    """Loss and optimizer"""
    loss.backward()
    optimizer.step()


  """Validation"""
  with torch.set_grad_enabled(False):
    for local_batch, local_labels in tqdm(validation_generator):
      input_ids = tokenizer.batch_encode_plus(local_batch, return_tensors="pt",pad_to_max_length=True)
      label = tokenizer.batch_encode_plus(local_labels, return_tensors="pt",pad_to_max_length=True)
      model.eval()
      outputs = model(input_ids=(input_ids['input_ids']).to(device), lm_labels=(label['input_ids']).to(device))
      loss = outputs[0]
      validloss += loss
  loss_valid.append(validloss / len(validation_generator))
  print("\nEpoch ", epoch, " completed!, Train LOSS is: ", trainloss, " Valid loss is: ", validloss/len(validation_generator))
  
  """Early Stopping condition"""
  trainer = Trainer(early_stop_callback=early_stopping)

  """Save states"""
  states = {
          'epoch': epoch + 1,
          'state_dict': model.state_dict(),
          'optimizer': optimizer.state_dict(),
          'trainloss': loss_train,
          'validloss': loss_val,
          'trainer': trainer
      }
  torch.save(states, folder_name)

  epoch += 1

In [0]:
plt.plot(loss_train)
plt.show()
plt.plot(loss_valid)