In [None]:
from google.colab import drive
drive.mount('/content/drive')

basepath = '/content/drive/MyDrive/research/Infinite/'

import sys
sys.path.append('/content/drive/MyDrive/research/Infinite/')

In [None]:
! pip install pytorch-lightning
! pip install transformers
! pip install seqeval

In [1]:
import torch 
import torch.nn as nn 
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import pytorch_lightning as pl
from pytorch_lightning import seed_everything, loggers as pl_loggers
from pytorch_lightning.callbacks import ModelCheckpoint
from transformers import  DistilBertModel,DistilBertTokenizerFast

import pandas as pd
import random

from scripts.dataset import *
from scripts.model import IC_NER
from scripts.utils import *
from arguments import jointBert_argument

# parameters

In [2]:
# model parameter
config = {

'mc' : {
    'model_name' : 'distilbert-base-multilingual-cased',
    'tokenizer_name' : 'distilbert-base-multilingual-cased',
    'joint_loss_coef' : 0.5,
    'id_1': 0.3742511688366331,
    'id_2':0.4907499594086448,
    'sd':0.4103913444544801,
    'Ihs': 90
},

# training parameters
'tc' : {
    'lr' : 0.00003,
    'epoch' : 20,
    'batch_size' : 16,
    'weight_decay' : 0.003,
    'shuffle_data' : True,
    'num_worker' : 8
},

# data params

'dc' : {
    'train_dir' : basepath + 'data/multiATIS/split/train/clean/train.tsv',
    'val_dir' : basepath + 'data/multiATIS/split/valid/clean/val.tsv',
    'intent_num' : 18,
    'slots_num' : 159,
    'max_len' : 56
},

# misc
'misc' : {
    'fix_seed' : False,
    'gpus' : -1,
    'log_dir' : './',
    'precision' : 16,
}
}

In [3]:
# loading slot index file
final_slots = pd.read_csv( basepath + 'data/multiATIS/slots_list.csv',sep=',',header=None,names=['SLOTS']).SLOTS.values.tolist()
idx2slots  = {idx:slots for idx,slots in enumerate(final_slots)}

# callback for pytorch lightning
checkpoint_callback = ModelCheckpoint(
    monitor='val_IC_NER_loss',
    dirpath= basepath + 'bin/clean/',
    filename='jointBert-{epoch:02d}-{val_loss:.2f}',
    save_top_k=1,
    mode='min',
)

In [4]:
class jointBert(pl.LightningModule):
    
    def __init__(self, cfg):
        super().__init__()
        
        self.IC_NER = IC_NER(cfg)
        self.cfg = cfg

    def forward(self, input_ids, attention_mask , intent_target, slots_target):
        return self.IC_NER(input_ids, attention_mask , intent_target, slots_target)

    def training_step(self, batch, batch_idx):
        
        token_ids, attention_mask = batch['token_ids'], batch['mask']
        intent_target,slots_target = batch['intent_id'], batch['slots_id']
        
        out = self(token_ids,attention_mask,intent_target,slots_target)
        
        self.log('train_IC_NER_loss', out['joint_loss'], on_step=False, on_epoch=True, logger=True)
        self.log('train_IC_loss', out['ic_loss'], on_step=False, on_epoch=True, logger=True)
        self.log('train_NER_loss', out['ner_loss'], on_step=False, on_epoch=True, logger=True)
        
        return out['joint_loss']
    
    def validation_step(self, batch, batch_idx):
        
        token_ids, attention_mask = batch['token_ids'], batch['mask']
        intent_target,slots_target = batch['intent_id'], batch['slots_id']
        
        out = self(token_ids,attention_mask,intent_target,slots_target)
        intent_pred, slot_pred = out['intent_pred'], out['slot_pred']
        
        self.log('val_IC_NER_loss', out['joint_loss'], on_step=False, on_epoch=True,  logger=True)
        self.log('val_IC_loss', out['ic_loss'], on_step=False, on_epoch=True,  logger=True)
        self.log('val_NER_loss', out['ner_loss'], on_step=False, on_epoch=True,  logger=True)
        self.log('val_intent_acc', accuracy(out['intent_pred'],intent_target), on_step=False, on_epoch=True,  logger=True)
        self.log('slot_f1', slot_F1(out['slot_pred'],slots_target,idx2slots), on_step=False, on_epoch=True, logger=True)
        
        
        return out['joint_loss']
    
    def test_step(self,batch,batch_idx):
        
        token_ids, attention_mask = batch['token_ids'], batch['mask']
        intent_target,slots_target = batch['intent_id'], batch['slots_id']
        
        out = self(token_ids,attention_mask,intent_target,slots_target)
        intent_pred, slot_pred = out['intent_pred'], out['slot_pred']
        
        self.log('test_intent_acc', accuracy(intent_pred,intent_target), on_step=False, on_epoch=True,  logger=True)
        self.log('test_slot_f1', slot_F1(slot_pred,slots_target,idx2slots), on_step=False, on_epoch=True, logger=True)
        
        return out['joint_loss']
        

    def configure_optimizers(self):
         return torch.optim.AdamW( self.parameters(), lr=config['tc']['lr'] ,  weight_decay=self.cfg['tc']['weight_decay'])

In [5]:
# initialize the dataloader
dm = NLU_Dataset_pl(config['dc']['train_dir'], config['dc']['val_dir'], config['dc']['val_dir'],
                   config['mc']['tokenizer_name'], config['dc']['max_len'], config['tc']['batch_size'],
                    config['tc']['num_worker'])

In [6]:
# initialize the model
model = jointBert(config)

In [7]:
# model training
trainer = pl.Trainer(gpus=config['misc']['gpus'],callbacks=[checkpoint_callback] ,accumulate_grad_batches=4,precision=config['misc']['precision'],max_epochs=config['tc']['epoch'], check_val_every_n_epoch=1)

trainer.fit(model, dm)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type   | Params
----------------------------------
0 | IC_NER | IC_NER | 134 M 
----------------------------------
134 M     Trainable params
0         Non-trainable params
134 M     Total params
539.709   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

RuntimeError: CUDA out of memory. Tried to allocate 352.00 MiB (GPU 0; 3.95 GiB total capacity; 2.03 GiB already allocated; 359.19 MiB free; 2.43 GiB reserved in total by PyTorch)

In [14]:
def cal_mean_stderror(metric):
    
    if len(metric) == 1:
        return metric
    var,std_error = 0,0
    mean = sum(metric)/len(metric)
    for m in metric:
        var += (m-mean)**2
    var = (var/(len(metric)-1))**0.5
    std_error = var/((len(metric))**0.5)
    return [round(mean,4),round(std_error,4)]

In [24]:
# testing the model

acc,slotF1 = [],[]

test_files = [ './data/multiATIS/split/test/OOC_Noise/60per/test_EN_01.tsv',
               './data/multiATIS/split/test/OOC_Noise/60per/test_EN_02.tsv',
               './data/multiATIS/split/test/OOC_Noise/60per/test_EN_03.tsv',
               './data/multiATIS/split/test/OOC_Noise/60per/test_EN_04.tsv',
               './data/multiATIS/split/test/OOC_Noise/60per/test_EN_05.tsv'
             ]
#model = jointBert.load_from_checkpoint("./bin/clean/jointBert-epoch=13-val_loss=0.00.ckpt",cfg=config)
    
for test_fn in test_files:
    
    dm = NLU_Dataset_pl(test_fn,test_fn, test_fn,config['mc']['tokenizer_name'],config['dc']['max_len'],1,1)
    dm.setup() 
    test = dm.test_dataloader()
    out = trainer.test(model=model ,test_dataloaders=test)
    acc.append(out[0]['test_intent_acc'])
    slotF1.append(out[0]['test_slot_f1'])
    
print('acc:',cal_mean_stderror(acc),'slotsF1',cal_mean_stderror(slotF1))

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_intent_acc': 0.879059374332428, 'test_slot_f1': 0.781968355178833}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_intent_acc': 0.8835386633872986, 'test_slot_f1': 0.7816176414489746}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_intent_acc': 0.8812990188598633, 'test_slot_f1': 0.769542396068573}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_intent_acc': 0.8779395222663879, 'test_slot_f1': 0.7748270630836487}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_intent_acc': 0.8633818626403809, 'test_slot_f1': 0.766289234161377}
--------------------------------------------------------------------------------
acc: [0.877, 0.0035] slotsF1 [0.7748, 0.0031]


In [25]:
{'test_intent_acc': 0.9697648286819458, 'test_slot_f1': 0.9032818078994751}

BG NEWS : 
acc: [0.8396, 0.0036] slotsF1 [0.3673, 0.0022]

WWTLE:
    
    8 words : acc: [0.9604130983352661] slotsF1 [0.8325998187065125]
    10 words :   acc: [0.9520000219345093] slotsF1 [0.8977355360984802] 
    12 words : acc: [0.9321267008781433] slotsF1 [0.9351863861083984]
    14 words : acc: [0.9262295365333557] slotsF1 [0.9395879507064819]

OOC :
    20: acc: [0.9207, 0.0031] slotsF1 [0.8278, 0.002]
    40: acc: [0.9021, 0.0032] slotsF1 [0.7966, 0.0032]
    60: acc: acc: [0.8748, 0.0038] slotsF1 [0.7518, 0.0032]
    


SyntaxError: invalid syntax (<ipython-input-25-7f0e269330bb>, line 3)

In [None]:
# inference

test_files = ['./data/multiATIS/split/test/WWTLE/25per/v1/test_EN.tsv']

for test_fn in test_files:
    
    dm = NLU_Dataset_pl(test_fn,test_fn, test_fn,config['mc']['tokenizer_name'],config['dc']['max_len'],1,1)
    dm.setup() 
    test = dm.test_dataloader()
    
    model = jointBert.load_from_checkpoint("./bin/clean/jointBert-epoch=13-val_loss=0.00.ckpt",cfg=config)
    
    for batch in test:
        
        intent_target,slots_target = batch['intent_id'], batch['slots_id']
        token_ids, attention_mask = batch['token_ids'], batch['mask']
        
        out = model(token_ids,attention_mask,intent_target,slots_target)
        intent_pred, slot_pred = out['intent_pred'], out['slot_pred']
        
        print(intent_pred,intent_target)
        print(slot_pred,slots_target)
        print(batch['text'])
        
        break
    
#print('acc:',cal_mean_stderror(acc),'slotsF1',cal_mean_stderror(slotF1))

In [None]:
'./data/multiATIS/split/test/clean/test.tsv'