## Converting to pytorch lightning fitting routine 

In [24]:
!export CUDA_LAUNCH_BLOCKING=1

In [None]:
# import tensorboard
?TextDataModule

In [1]:
import os
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import TensorBoardLogger
from pathlib import Path
from dataclasses import dataclass
from pathlib import Path
from babl.data import TextDataset, TextDataModule
from babl.utils import CallbackCollection
from babl.routine import Routine


WHITE='\033[1:97m]' 
RESET='\033[0m'
s = """
                                                                                
                            @@@@@@@@@@@@@@@@@@@@@@@@@                           
                      ,@@@@@@@@@@@            #@@@@@@@@@@@                      
                   @@@@@@      @@@@           @@@      @@@@@@&                  
                @@@@@          &@@@   ,@@@@@@@@@@          @@@@@.               
              @@@@@@      @@@@@@@@@   ,@@@   /@@   @@@      @@@@@@@             
            @@@@  @@@@@    @@@@       ,@@@        @@@@@@@@@@@@# @@@@@           
          @@@@      @@@@@   @@@@  .@@@@@@@@@@@  @@@@   @@@@@      @@@@          
         @@@@     @@@@@       @@@@@@@@@@@@@@@@@@@@@                 @@@@        
        @@@@     @@@@@.    @@@@@@@@   ,@@@   @@@@@@@@@    @@@@@@     @@@@       
       @@@@         @@@@@@@@@    @@@@@@@@@@@@@@@   @@@@@@@@@@@@@@     @@@(      
      @@@@@@@@@@@@(     @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@     @@@@@@@@@@@@      
      @@@.    @@@@     @@@  %@@@@@@@@@@@@@@@@@@@@@@@  @@@@     @@      @@@&     
      @@@     @@@     @@@@  @@@@@@@@@@@@@@@@@@@@@@@@   @@@             @@@@     
      @@@     @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@    @@@@     
      @@@             @@@@  @@@@@@@@@@@@@@@@@@@@@@@@   @@@     @@@#    @@@@     
      @@@(    @@@@     @@@  @@@@@@@@@@@@@@@@@@@@@@@@@ @@@@     @@@.    @@@#     
      @@@@@@@@@@@@@    %@@@@@@.@@@@@@@@@@@@@@@@@@@@@@@@@@     &@@@@@@@@@@@      
       @@@@     @@@@@@@@@@@@@@  @@@@@@@@@@@@@@@@   @@@@@@@@@@.        @@@       
        @@@@     @@@@@     @@@@@@@@    @@@    @@@@@@@      @@@@.     @@@#       
         @@@@                %@@@@@@@@@@@@@@@@@@@@@      %@@@@     /@@@         
          @@@@@     @@@@@@  @@@@    @@@@@@@@%   #@@@#  *@@@@.     @@@@          
            @@@@, @@@@/@@@@@@@@        @@@        @@@@    @@@@@ @@@@            
              @@@@@@       @@   @@@@%  @@@  .@@@@@@@@@      /@@@@@,             
                @@@@@@          @@@@@@@@@@   @@@@          @@@@@                
                   @@@@@@@     @@@@           @@@(     @@@@@@                   
                       @@@@@@@@@@@            .@@@@@@@@@@                       
                            .@@@@@@@@@@@@@@@@@@@@@@@                            
                                                                                
                                                                                
"""




class Fitter:
    def __init__(
        self,
        model,
        tokenizer,
        model_name,
        data_args,
        mini_dataset = True, 
    ):
        self.model = model
        self.tokenizer = tokenizer
        self.model_name = model_name
        self.args = data_args
        self.mini_dataset = mini_dataset
        self.data_module = TextDataModule(data_args=data_args, tokenizer=tokenizer, mini_dataset=mini_dataset) 
        self.trainer = None
        ####################################################################################
        @dataclass
        class FittingArgs:
            es_patience: int = 2
            model_dir = Path("/home/nameduser/Code/babl/outputs") / model_name
            max_epoch: int = 5
            fast_dev_run: bool = False
            mini_dataset: bool = True 
            def __post_init__(self):
                self.model_dir.mkdir(parents=True, exist_ok=True)
                self.model_dir  =  str(self.model_dir)

        ####################################################################################

        self.args = FittingArgs()

    def setup(self):
        train_loader = self.data_module.train_dataloader()
        val_loader = self.data_module.val_dataloader()
        test_loader = self.data_module.test_dataloader()

        return train_loader, val_loader, test_loader

    def callbacks(self):
        # cfg_fitting = self.cfg_fitting
        callback_collection = CallbackCollection(self.args)
        return callback_collection()

    def __call__(self):

        logger = TensorBoardLogger(
            save_dir=self.args.model_dir,
            name="lightning_logs",
        )
        # Model = self.model
        # get loaders and datamodule to access input shape
        train_loader, val_loader, test_loader = self.setup()
        print("Created training, validating and test loaders .... ")
        # get input shape for onnx exporting
        # input_shape = data_module.input_shape
        # init model
        # kwargs = {}
        # model = Model(**kwargs)

        # setup training, validating and testing routines for the model
        routine = Routine(self.model)

        # Init a trainer to execute routine
        callback_dict = self.callbacks()
        callback_list = [v for (_, v) in callback_dict.items()]
        number_devices = os.getenv("CUDA_VISIBLE_DEVICES", "1,").split(",")
        try:
            number_devices.remove("")
        except ValueError:
            pass

        self.trainer = Trainer(
            accelerator="cpu",
            devices=len(number_devices),
            # strategy=os.getenv("STRATEGY", "ddp_notebook"),
            sync_batchnorm=True,
            logger=logger,
            max_epochs=self.args.max_epoch,
            callbacks=callback_list,
            num_sanity_val_steps=2,
            # resume_from_checkpoint=self.cfg_fitting.resume_from_checkpoint,
            gradient_clip_val=1.0,
            fast_dev_run=self.args.fast_dev_run,
        )

        self.trainer.fit(
            routine, train_dataloaders=train_loader, val_dataloaders=val_loader
        )  # ,ckpt_path=PATH)

        if self.args.fast_dev_run:
            # issue with finding best weights path for in fast dev run using last model weights
            model_ckpt_path = callback_dict["checkpoint"].__dict__["last_model_path"]
        else:
            model_ckpt_path = callback_dict["checkpoint"].__dict__["best_model_path"]

        self.trainer.test(
            dataloaders=test_loader,
            ckpt_path=model_ckpt_path,
        )
        print(WHITE + s + RESET)
        # Return the input_shapes and trainer of the model for exporting
        return self

## Fitting 

In [2]:
%env MODEL_NAME=t5

env: MODEL_NAME=t5


In [3]:
from babl.models import MODELS_CHOICES, MODELS
from babl.config import T5 as T5Config
from pathlib import Path
import os 
from babl.config import Fitting as FittingArgs

args = FittingArgs()
print(args)


Fitting(es_patience=2, model_dir='/home/nameduser/Code/babl/outputs/t5-small', max_epoch=5, fast_dev_run=False, mini_dataset=True)


In [4]:
from babl.models import MODELS_CHOICES, MODELS
from babl.config import T5 as T5Config
from babl.config import Data as DataArgs
from pathlib import Path
import os 
# from babl.config import FittingArgs

model_name = "t5"
full_model_name = MODELS_CHOICES[model_name][0]
t_w_m = MODELS[model_name]

t = t_w_m["tok"]
m = t_w_m["model"]

tokenizer = t.from_pretrained(full_model_name)
model = m.from_pretrained(full_model_name, **T5Config().__dict__)
model.train()


# overwritting the MODEL_NAME with the full version
os.environ['MODEL_NAME'] = full_model_name
# changing this env variable after initilising the FittingArguments doesnt change 
# the root directoy
# placing in training mode 



data_path_root = Path("/home/nameduser/Code/babl/inputs") 
# data_path_val = data_path_root / "10k.jsonl"
# ds = TextDataset(data_path_val, tokenizer=tokenizer, plain_text=False)
# from babl.data import T2TDataCollator
# from torch.utils.data import DataLoader
# t_dl = DataLoader(ds, batch_size=64, shuffle=True, collate_fn=T2TDataCollator())
# # test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)
# data_module = TextDataModule(data_path_root, tokenizer)

fitter= Fitter(model=model, model_name=full_model_name, tokenizer=tokenizer, data_args=DataArgs())()


from babl.utils import OnnxExporter, Predictor 
from babl.config import Data as DataArgs
import torch 
# os.environ['MODEL_NAME']


# during distributed training accessing the model is further down the module tree
if torch.cuda.is_available() and torch.cuda.device_count() == 1:
    model = fitter.trainer.model.model
    # fitter.trainer.model
    tokenizer = fitter.data_module.tokenizer

args = DataArgs()
predictor = Predictor(tokenizer=tokenizer, model=model, input_max_len=args.input_max_len)
import pickle
from babl.config import Fitting as FittingArgs
args = FittingArgs()
with open(Path(args.model_dir) / "model.pickle", "wb") as f:
    pickle.dump(predictor, f)
with open(Path(args.model_dir) / "model.pickle", "rb") as f:
    loaded_model = pickle.load(f)

loaded_model("Where is london", "")

## Currently failing to export to onnx :(
# from babl.config import Fitting as FittingArgs
# args = FittingArgs()
# OnnxExporter(model=predictor, model_name=full_model_name,output_dir=args.model_dir)()





You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


MINI DATASET RUN?: True. Using 128 datapoints for training




MINI DATASET RUN?: True. Using 128 datapoints for training
MINI DATASET RUN?: True. Using 128 datapoints for training
Created training, validating and test loaders .... 


GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/nameduser/.local/share/virtualenvs/babl-qoEDH0A2/lib/python3.10/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
/home/nameduser/.local/share/virtualenvs/babl-qoEDH0A2/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /home/nameduser/Code/babl/outputs/t5-small exists and is not empty.

  | Name  | Type                       | Params | Mode 
-------------------------------------------------------------
0 | model | T5ForConditionalGeneration | 60.5 M | train
-------------------------------------------------------------
60.5 M    Trainable params
0         Non-trainable params
60.5 M    Total params
242.026   Total estimated model params size (MB)
277       Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/nameduser/.local/share/virtualenvs/babl-qoEDH0A2/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:476: Your `val_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.
/home/nameduser/.local/share/virtualenvs/babl-qoEDH0A2/lib/python3.10/site-packages/pytorch_lightning/loops/fit_loop.py:310: The number of training batches (16) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Restoring states from the checkpoint path at /home/nameduser/Code/babl/outputs/t5-small/epoch=0-val_loss=10.76-val_EM=0.98-val_F1=0.90.ckpt
Loaded model weights from the checkpoint at /home/nameduser/Code/babl/outputs/t5-small/epoch=0-val_loss=10.76-val_EM=0.98-val_F1=0.90.ckpt
/home/nameduser/.local/share/virtualenvs/babl-qoEDH0A2/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:476: Your `test_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.


Testing: |          | 0/? [00:00<?, ?it/s]

[1:97m]
                                                                                
                            @@@@@@@@@@@@@@@@@@@@@@@@@                           
                      ,@@@@@@@@@@@            #@@@@@@@@@@@                      
                   @@@@@@      @@@@           @@@      @@@@@@&                  
                @@@@@          &@@@   ,@@@@@@@@@@          @@@@@.               
              @@@@@@      @@@@@@@@@   ,@@@   /@@   @@@      @@@@@@@             
            @@@@  @@@@@    @@@@       ,@@@        @@@@@@@@@@@@# @@@@@           
          @@@@      @@@@@   @@@@  .@@@@@@@@@@@  @@@@   @@@@@      @@@@          
         @@@@     @@@@@       @@@@@@@@@@@@@@@@@@@@@                 @@@@        
        @@@@     @@@@@.    @@@@@@@@   ,@@@   @@@@@@@@@    @@@@@@     @@@@       
       @@@@         @@@@@@@@@    @@@@@@@@@@@@@@@   @@@@@@@@@@@@@@     @@@(      
      @@@@@@@@@@@@(     @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@     @@@@@@@@@@@@      
      @@@.    @@@@ 

  ans_encoded = self.m.generate(input_ids=torch.tensor([input_ids]), attention_mask=torch.tensor([attention_mask]))
  return torch.tensor(token, device=device, dtype=torch.long)
  eos_token_tensor is not None
  if eos_token_tensor is not None and (
  if has_default_max_length and generation_config.max_new_tokens is None and generation_config.max_length == 20:
  if input_ids_length >= generation_config.max_length:
  if generation_config.min_length is not None and generation_config.min_length > generation_config.max_length:
  inputs_tensor.shape[1] != input_ids_length
  or (is_torchdynamo_compiling() or cache_position[-1] >= input_ids.shape[1])  # Exception 3
  elif input_ids.shape[1] != cache_position.shape[0]:  # Default case (the "else", a no op, is Exception 2)
  if sequence_length != 1:
  elif this_peer_finished:
  or len(self.key_cache[layer_idx]) == 0  # the layer has no cache
  len(self.key_cache[layer_idx]) == 0
  return clean("".join([self.decode(x) for x in ans_encoded]))
  "p

RuntimeError: 0 INTERNAL ASSERT FAILED at "../torch/csrc/jit/ir/alias_analysis.cpp":617, please report a bug to PyTorch. We don't have an op for aten::full but it isn't a special case.  Argument types: int[], bool, int, NoneType, Device, bool, 

Candidates:
	aten::full.names(int[] size, Scalar fill_value, *, str[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
	aten::full(SymInt[] size, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
	aten::full.names_out(int[] size, Scalar fill_value, *, str[]? names, Tensor(a!) out) -> Tensor(a!)
	aten::full.out(SymInt[] size, Scalar fill_value, *, Tensor(a!) out) -> Tensor(a!)



'london'

In [None]:
from babl.config import Fitting as FittingArgs
args = FittingArgs()
OnnxExporter(model=predictor, model_name=full_model_name,output_dir=args.model_dir)()


In [9]:
from babl.utils import OnnxExporter, Predictor 
from babl.config import Data as DataArgs
import torch 
# os.environ['MODEL_NAME']


# during distributed training accessing the model is further down the module tree
if torch.cuda.is_available() and torch.cuda.device_count() == 1:
    model = fitter.trainer.model.model
    # fitter.trainer.model
    tokenizer = fitter.data_module.tokenizer

args = DataArgs()
predictor = Predictor(tokenizer=tokenizer, model=model, input_max_len=args.input_max_len)
from babl.config import Fitting as FittingArgs
args = FittingArgs()
OnnxExporter(model=predictor, model_name=full_model_name,output_dir=args.model_dir)()




In [None]:
question="What is the captial of England?"
context= 'I was born 1995'
predictor(question, context)


from babl.config import Fitting as FittingArgs
args = FittingArgs()
OnnxExporter(model=predictor, model_name=full_model_name,output_dir=args.model_dir)()


In [None]:
from dataclasses import dataclass, field
from argparse_dataclass import ArgumentParser
@dataclass
class Options:
     x: int = 1
     y: int = field(default=2)
     z: float = field(default_factory=lambda: 3.14)

parser = ArgumentParser(Options)


args = parser.parse_args()
# print (parser.parse_args([]))
# Options(x=1, y=2, z=3.14)



In [None]:
# for b in t_dl:
#     # print(b)
#     m(**b)

import torch.nn.functional as F 


y = torch.tensor([[ 822,   10,  125, 100, 100, 100],
                  [ 822,   10,  116, 100, 100, 100],
                  [ 822,   10,  125, 100, 100, 100],
                  [ 822,   10,  213, 100, 100, 100],
                  [ 822,   10,  213, 100, 100, 100],
                  [ 822,   10,  116, 100, 100, 100],
                  [ 822,   10,  125, 100, 100, 100],
                  [ 822,   10,  116, 100, 100, 100],
                  [ 822,   10,  125, 100, 100, 100],
                  [ 822,   10,  213, 100, 100, 100],
                  [ 822,   10,  213, 100, 100, 100],
                  [ 822,   10,  116, 100, 100, 100]], dtype=torch.long
)
y_hat = torch.tensor([[-20.2879,  -9.8936, -13.5965, -40.7275, -40.8642, -40.8486],
         [-34.0870,  -3.6627, -14.2458,  -46.1296, -46.3147, -46.2990],
         [-30.5974,  -3.4536, -15.5923,  -43.6581, -43.8461, -43.8219],
         [-18.1922,  -8.0767, -14.5352,  -45.5706, -45.7357, -45.7194],
         [-18.1516,  -8.0787, -14.4750,  -45.4796, -45.6429, -45.6272],
         [-18.1262,  -8.1061, -14.4559,  -45.4136, -45.5755, -45.5602],
         [-17.2200,  -9.7170, -14.2499,  -38.4455, -38.5326, -38.4609],
         [-34.3804,  -6.2359, -13.2374,  -42.5014, -42.6473, -42.5558],
         [-27.8060,  -7.1265, -15.4786,  -42.2502, -42.3610, -42.2977],
         [-17.2795,  -7.8251, -15.8752,  -44.6078, -44.7242, -44.6339],
         [-17.1784,  -7.7900, -15.8198,   -44.4029, -44.5184, -44.4275],
         [-17.1213,  -7.7632, -15.7711,   -44.2831, -44.3977, -44.3082]]) 

num_class= 1321
y_hat = torch.stack([y_hat]*num_class, dim=2)

y_hat.shape
y.shape
# y.shape
# y_hat.shape
y = F.one_hot(y, num_classes=1321)
y = y.float()

y.shape == y_hat.shape

y_hat = F.softmax(y_hat, dim=-1)



losses = []
for tok in range(y_hat.shape[1]):
    print(y[:,tok,:].shape)
    print(y_hat[:,tok,:].shape)
    loss = F.cross_entropy(y_hat[:,tok,:] , y[:,tok,:])
    print(loss)
    # loss = F.nll_loss(y_hat[:,tok,:] , y[:,tok,:])
    losses.append(loss)


torch.tensor(losses).mean()
# yx = F.one_hot(y, num_classes=1321)[:,0,:].shape

# loss = F.nll_loss(y_hat , y)


In [None]:

data_path_root = Path("/home/nameduser/Code/babl/inputs")

data_path_val = data_path_root / "10k.jsonl"
ds = TextDataset(data_path_val, tokenizer=t, plain_text=True )

# from babl.data import T2TDataCollator
# from torch.utils.data import DataLoader
# t_dl = DataLoader(ds, batch_size=64, shuffle=True, collate_fn=T2TDataCollator())
# test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)
# data_module = TextDataModule(data_path, tokenizer)


torch

In [None]:
y = torch.tensor([0,1,1,0,0,1])

yh=  torch.tensor([0,1,1,0,1,1])
(y == yh).int()


torch.prod(torch.tensor(torch.rand((10,10)).shape))


In [None]:
import torch
torch.manual_seed (1414)

t = torch.randn (8, 4)
a = t.argmax(1)
m = torch.zeros(t.shape).scatter(1, a.unsqueeze(1), 1.0)


print ('\n', t, '\n\n', a, '\n\n', m)

In [None]:
list({"x": [1, 2, 3, 4]}.values())[0].__len__()