In [None]:
from google.colab import drive
drive.mount('/content/drive')

import sys
sys.path.append('/content/drive/MyDrive/research/Infinite/')

In [None]:
! pip install transformers
! pip install seqeval
! pip install pytorch-lightning

In [1]:
import torch 
import torch.nn as nn 
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import DistilBertModel

import pytorch_lightning as pl
from pytorch_lightning import seed_everything, loggers as pl_loggers
from pytorch_lightning.callbacks import ModelCheckpoint
from transformers import  DistilBertModel,DistilBertTokenizerFast

from pytorch_metric_learning import miners, losses

import pandas as pd
import random

from scripts.dataset import *
from scripts.utils import *
from arguments import jointBert_argument

In [2]:
# model parameter
config = {

'mc' : {
    'model_name' : 'distilbert-base-multilingual-cased',
    'tokenizer_name' : 'distilbert-base-multilingual-cased',
    'joint_loss_coef' : 0.5,
    'id_1': 0.29868357362720055,
    'id_2':0.2226859356474008,
    'sd':0.3180000141987541,
    'Ihs': 77,
    'freeze_decoder' : True
},

# training parameters
'tc' : {
    'lr' : 0.00003,
    'epoch' : 40,
    'batch_size' : 15,
    'weight_decay' : 0.003,
    'shuffle_data' : True,
    'num_worker' : 8
},

# data params

'dc' : {
    'train_dir' : '/content/drive/MyDrive/research/Infinite/data/multiATIS/split/train/WWTLE_Augmented/test_EN.tsv',
    'val_dir' : '/content/drive/MyDrive/research/Infinite/data/multiATIS/split/valid/clean/val.tsv',
    'intent_num' : 18,
    'slots_num' : 159,
    'max_len' : 56
},

# misc
'misc' : {
    'fix_seed' : False,
    'gpus' : -1,
    'log_dir' : './',
    'precision' : 16,
}
}

In [4]:
# loading slot index file
final_slots = pd.read_csv('/content/drive/MyDrive/research/Infinite/data/multiATIS/slots_list.csv',sep=',',header=None,names=['SLOTS']).SLOTS.values.tolist()
idx2slots  = {idx:slots for idx,slots in enumerate(final_slots)}

# callback for pytorch lightning
checkpoint_callback = ModelCheckpoint(
    dirpath='/content/drive/MyDrive/research/Infinite/bin/ICL/',
    filename='Infinite-{epoch:02d}-{val_loss:.2f}',
)

In [5]:
class Infinite_encoder(nn.Module):
    def __init__(self, cfg):

        super(Infinite_encoder, self).__init__()

        self.encoder = DistilBertModel.from_pretrained(
            cfg["mc"]["model_name"], return_dict=True, output_hidden_states=True
        )
        self.cfg = cfg
        
        self.intent_loss = losses.NTXentLoss()
        self.slot_loss = losses.NTXentLoss()
        

    def forward(self, input_ids, attention_mask, intent_target, slots_target):

        encoded_output = self.encoder(input_ids, attention_mask)

        # intent data flow
        intent_hidden = encoded_output[0][:, 0]
     
        # accumulating intent contrastive loss
        intent_loss = self.intent_loss(intent_hidden,intent_target)
        
        return intent_loss

In [6]:
contraDL = contra_pl('/content/drive/MyDrive/research/Infinite/data/multiATIS/split/train/contraSet/train_EN.tsv',
               config['mc']['tokenizer_name'], config['dc']['max_len'],
               config['tc']['batch_size'],
                    config['tc']['num_worker'])

In [19]:
class Infinite_ICL_training(pl.LightningModule):
    
    def __init__(self, cfg):
        super().__init__()
        
        self.Infinite_encoder = Infinite_encoder(cfg)
        self.cfg = cfg

    def forward(self, input_ids, attention_mask , intent_target, slots_target):
        return self.Infinite_encoder(input_ids, attention_mask , intent_target, slots_target)

    def training_step(self, batch, batch_idx):
        
        token_ids, attention_mask = batch['token_ids'], batch['mask']
        intent_target,slots_target = batch['sent_id'], batch['slot_id']
        
        out = self(token_ids,attention_mask,intent_target,slots_target)
        
        self.log('train_ICL', out, on_step=False, on_epoch=True, logger=True)

        return out
    
    def configure_optimizers(self):
         return torch.optim.AdamW( self.parameters(), lr=config['tc']['lr'] ,  weight_decay=self.cfg['tc']['weight_decay'])

In [20]:
model = Infinite_ICL_training(config)

In [None]:
# model training
trainer = pl.Trainer(gpus=config['misc']['gpus'],callbacks=[checkpoint_callback] ,accumulate_grad_batches=4,precision=config['misc']['precision'],max_epochs=config['tc']['epoch'])

trainer.fit(model, contraDL)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name             | Type             | Params
------------------------------------------------------
0 | Infinite_encoder | Infinite_encoder | 134 M 
------------------------------------------------------
134 M     Trainable params
0         Non-trainable params
134 M     Total params
538.936   Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]