In [1]:
import torch 
import torch.nn as nn 
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import pytorch_lightning as pl
from pytorch_lightning import seed_everything, loggers as pl_loggers
from pytorch_lightning.callbacks import ModelCheckpoint
from transformers import  DistilBertModel,DistilBertTokenizerFast

import pandas as pd
import random

from scripts.dataset import *
from scripts.model import IC_NER
from scripts.utils import *
from arguments import jointBert_argument

# parameters

In [2]:
# model parameter
config = {

'mc' : {
    'model_name' : 'distilbert-base-multilingual-cased',
    'tokenizer_name' : 'distilbert-base-multilingual-cased',
    'joint_loss_coef' : 0.4
},

# training parameters
'tc' : {
    'encoder_lr' : 0.0005,
    'intent_lr' : 0.002,
    'slots_lr' : 0.002,
    'epoch' : 19,
    'batch_size' : 64,
    'weight_decay' : 0.003,
    'shuffle_data' : True,
    'num_worker' : 8
},

# data params

'dc' : {
    'train_dir' : './data/multiATIS/split/train/clean/train_EN.tsv',
    'val_dir' : './data/multiATIS/split/valid/clean/dev_EN.tsv',
    'intent_num' : 18,
    'slots_num' : 159,
    'max_len' : 56
},

# misc
'misc' : {
    'fix_seed' : False,
    'gpus' : -1,
    'log_dir' : './',
    'precision' : 16,
}
}

In [3]:
# loading slot index file
final_slots = pd.read_csv('./data/multiATIS/slots_list.csv',sep=',',header=None,names=['SLOTS']).SLOTS.values.tolist()
idx2slots  = {idx:slots for idx,slots in enumerate(final_slots)}

# defining model checkpoint callback
#checkpoint_callback = ModelCheckpoint(dirpath=args.weight_dir,monitor='val_IC_NER_loss', mode='min', filename='jointBert-{epoch:02d}-{val_loss}')

In [4]:
class jointBert(pl.LightningModule):
    
    def __init__(self, cfg):
        super().__init__()
        self.IC_NER = IC_NER(cfg)
        self.cfg = cfg

    def forward(self, input_ids, attention_mask , intent_target, slots_target):
        return self.IC_NER(input_ids, attention_mask , intent_target, slots_target)

    def training_step(self, batch, batch_idx):
        
        token_ids, attention_mask = batch['token_ids'], batch['mask']
        intent_target,slots_target = batch['intent_id'], batch['slots_id']
        
        out = self(token_ids,attention_mask,intent_target,slots_target)
        
        self.log('train_IC_NER_loss', out['joint_loss'], on_step=False, on_epoch=True, logger=True)
        self.log('train_IC_loss', out['ic_loss'], on_step=False, on_epoch=True, logger=True)
        self.log('train_NER_loss', out['ner_loss'], on_step=False, on_epoch=True, logger=True)
        
        return out['joint_loss']
    
    def validation_step(self, batch, batch_idx):
        
        token_ids, attention_mask = batch['token_ids'], batch['mask']
        intent_target,slots_target = batch['intent_id'], batch['slots_id']
        
        out = self(token_ids,attention_mask,intent_target,slots_target)
        intent_pred, slot_pred = out['intent_pred'], out['slot_pred']
        
        self.log('val_IC_NER_loss', out['joint_loss'], on_step=False, on_epoch=True,  logger=True)
        self.log('val_IC_loss', out['ic_loss'], on_step=False, on_epoch=True,  logger=True)
        self.log('val_NER_loss', out['ner_loss'], on_step=False, on_epoch=True,  logger=True)
        self.log('val_intent_acc', accuracy(out['intent_pred'],intent_target), on_step=False, on_epoch=True,  logger=True)
        self.log('slot_f1', slot_F1(out['slot_pred'],slots_target,idx2slots), on_step=False, on_epoch=True, logger=True)
        
        
        return out['joint_loss']
    
    def test_step(self,batch,batch_idx):
        
        token_ids, attention_mask = batch['token_ids'], batch['mask']
        intent_target,slots_target = batch['intent_id'], batch['slots_id']
        
        out = self(token_ids,attention_mask,intent_target,slots_target)
        intent_pred, slot_pred = out['intent_pred'], out['slot_pred']
        
        self.log('test_intent_acc', accuracy(intent_pred,intent_target), on_step=False, on_epoch=True,  logger=True)
        self.log('test_slot_f1', slot_F1(slot_pred,slots_target,idx2slots), on_step=False, on_epoch=True, logger=True)
        
        return out['joint_loss']
        

    def configure_optimizers(self):
         return torch.optim.AdamW( self.parameters(), lr=3e-5 ,  weight_decay=self.cfg['tc']['weight_decay'])

In [5]:
dm = NLU_Dataset_pl(config['dc']['train_dir'], config['dc']['val_dir'], config['dc']['val_dir'],
                   config['mc']['tokenizer_name'], config['dc']['max_len'], config['tc']['batch_size'],
                    config['tc']['num_worker'])

In [6]:
model = jointBert(config)

In [7]:
trainer = pl.Trainer(gpus=config['misc']['gpus'],precision=config['misc']['precision'],max_epochs=config['tc']['epoch'], check_val_every_n_epoch=1)

trainer.fit(model, dm)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type   | Params
----------------------------------
0 | IC_NER | IC_NER | 134 M 
----------------------------------
134 M     Trainable params
0         Non-trainable params
134 M     Total params
539.828   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

RuntimeError: CUDA out of memory. Tried to allocate 42.00 MiB (GPU 0; 3.95 GiB total capacity; 2.89 GiB already allocated; 32.38 MiB free; 2.94 GiB reserved in total by PyTorch)