In [1]:
import torch 
import torch.nn as nn 
import torch.nn.functional as F
from transformers import  DistilBertModel,DistilBertTokenizerFast
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl
from pytorch_lightning import seed_everything, loggers as pl_loggers
from pytorch_lightning.callbacks import ModelCheckpoint
import pandas as pd
import random
from scripts.dataset import nluDataset
#from scripts.model import IC_NER
from scripts.utils import *

In [2]:
def slot_F1(pred,target,id2slots):
    
    pred_list = pred.tolist()
    target_list = target.tolist()
    
    pred_slots , target_slots = [],[]

    for idx_st,t in enumerate(target_list):
        pred_sample,target_sample = [],[]
        for idx_wt,wt in enumerate(t):

            if wt != -100:
                target_sample.append(id2slots[wt])
                pred_sample.append(id2slots[pred_list[idx_st][idx_wt]])

        pred_slots.append(pred_sample)
        target_slots.append(target_sample)
    
    return f1_score( target_slots, pred_slots,mode='strict', scheme=IOB2, average='weighted')

In [3]:
#seed_everything(42)
tb_logger = pl_loggers.TensorBoardLogger('logs/augmented1/01/jointBertEN')
final_slots = pd.read_csv('./data/multiATIS/slots_list.csv',sep=',',header=None,names=['SLOTS']).SLOTS.values.tolist()
idx2slots  = {idx:slots for idx,slots in enumerate(final_slots)}
checkpoint_callback = ModelCheckpoint(dirpath='bin/augmented1/01/EN/v1/',monitor='val_IC_NER_loss', filename='jointBert-{epoch:02d}-{val_loss}')

In [4]:
class IC_NER(nn.Module):

    def __init__(self, model_name):

        super(IC_NER,self).__init__()
        
        self.encoder = DistilBertModel.from_pretrained(model_name,return_dict=True,output_hidden_states=True)
       
        self.intent_dropout = nn.Dropout(0.25)
        self.intent_FC1 = nn.Linear(768, 512)
        self.intent_FC2 = nn.Linear(512, 128)
        self.intent_FC3 = nn.Linear(128, 18)
 

        # slots layer
        self.slots_dropout = nn.Dropout(0.25)
        self.slots_FC = nn.Linear(768, 159)
        

        self.intent_loss_fn = nn.CrossEntropyLoss()
        self.slot_loss_fn = nn.CrossEntropyLoss()
        #self.log_vars = nn.Parameter(torch.zeros((2)))
        self.jlc = 0.5
        #self.args = args
        

    
    def forward(self, input_ids, attention_mask , intent_target, slots_target):

        encoded_output = self.encoder(input_ids, attention_mask)

        #intent data flow
        intent_hidden = encoded_output[0][:,0]
        intent_hidden = self.intent_FC1(self.intent_dropout(F.relu(intent_hidden)))
        intent_hidden = self.intent_FC2(self.intent_dropout(F.relu(intent_hidden)))
        intent_logits = self.intent_FC3(self.intent_dropout(F.relu(intent_hidden)))
        
        
        # accumulating intent classification loss 
        intent_loss = self.intent_loss_fn(intent_logits, intent_target)
        intent_pred = torch.argmax(nn.Softmax(dim=1)(intent_logits), axis=1)
        

        # slots data flow 
        slots_hidden = encoded_output[0]
        slots_logits = self.slots_FC(self.slots_dropout(F.relu(slots_hidden)))
        slot_pred =  torch.argmax(nn.Softmax(dim=2)(slots_logits), axis=2)

        # accumulating slot prediction loss
        slot_loss = self.slot_loss_fn(slots_logits.view(-1, 159), slots_target.view(-1))


        '''Multi-Task Learning Using Uncertainty to Weigh Losses for Scene Geometry and Semantics'''
        
        #precision1 = torch.exp(-self.log_vars[0])
        #loss_intent = torch.sum(precision1*intent_loss + self.log_vars[0],-1)

        #precision2 = torch.exp(-self.log_vars[1])
        #loss_slots = torch.sum(precision1*slot_loss + self.log_vars[1],-1)

        #joint_loss = torch.mean(loss_intent + loss_slots)
        
        joint_loss = self.jlc*intent_loss + (1.0 - self.jlc)*slot_loss

        return {'joint_loss':joint_loss,
                'ic_loss': intent_loss,
                'ner_loss': slot_loss,
                'intent_pred':intent_pred,
                'slot_pred':slot_pred}

In [5]:
class NLU_Dataset(pl.LightningDataModule):
    
    def __init__(self, train_dir, val_dir, test_dir,tokenizer, max_len, batch_size):
        
        super().__init__()
        self.train_dir = train_dir
        self.val_dir = val_dir
        self.test_dir = test_dir
        self.batch_size = batch_size
        self.tokenizer = tokenizer
        self.max_len = max_len

    def setup(self,stage: [str] = None): 
        self.train = nluDataset( self.train_dir, self.tokenizer, self.max_len,2)
        
        self.val = nluDataset( self.val_dir, self.tokenizer, self.max_len,2)
        
        self.test =  nluDataset( self.test_dir, self.tokenizer, self.max_len,2)

    def train_dataloader(self):
        return DataLoader(self.train, batch_size=self.batch_size)

    def val_dataloader(self):
        return DataLoader(self.val, batch_size=self.batch_size)
    
    def test_dataloader(self):
        return DataLoader(self.test, batch_size=self.batch_size)

In [6]:
class jointBert(pl.LightningModule):
    
    def __init__(self):
        super().__init__()
        self.IC_NER = IC_NER('distilbert-base-multilingual-cased')

    def forward(self, input_ids, attention_mask , intent_target, slots_target):
        return self.IC_NER(input_ids, attention_mask , intent_target, slots_target)

    def training_step(self, batch, batch_idx):
        
        token_ids, attention_mask = batch['token_ids'], batch['mask']
        intent_target,slots_target = batch['intent_id'], batch['slots_id']
        
        out = self(token_ids,attention_mask,intent_target,slots_target)
        
        self.log('train_IC_NER_loss', out['joint_loss'], on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('train_IC_loss', out['ic_loss'], on_step=False, on_epoch=True, logger=True)
        self.log('train_NER_loss', out['ner_loss'], on_step=False, on_epoch=True, logger=True)
        
        return out['joint_loss']
    
    def validation_step(self, batch, batch_idx):
        
        token_ids, attention_mask = batch['token_ids'], batch['mask']
        intent_target,slots_target = batch['intent_id'], batch['slots_id']
        
        out = self(token_ids,attention_mask,intent_target,slots_target)
        intent_pred, slot_pred = out['intent_pred'], out['slot_pred']
        
        self.log('val_IC_NER_loss', out['joint_loss'], on_step=False, on_epoch=True,  logger=True)
        self.log('val_IC_loss', out['ic_loss'], on_step=False, on_epoch=True,  logger=True)
        self.log('val_NER_loss', out['ner_loss'], on_step=False, on_epoch=True,  logger=True)
        self.log('val_intent_acc', accuracy(out['intent_pred'],intent_target), on_step=False, on_epoch=True,  logger=True)
        self.log('slot_f1', slot_F1(out['slot_pred'],slots_target,idx2slots), on_step=False, on_epoch=True, logger=True)
        
        
        return out['joint_loss']
    
    def test_step(self,batch,batch_idx):
        
        token_ids, attention_mask = batch['token_ids'], batch['mask']
        intent_target,slots_target = batch['intent_id'], batch['slots_id']
        
        out = self(token_ids,attention_mask,intent_target,slots_target)
        intent_pred, slot_pred = out['intent_pred'], out['slot_pred']
        self.log('test_IC_NER_loss', out['joint_loss'], on_step=False, on_epoch=True,  logger=True)
        self.log('test_IC_loss', out['ic_loss'], on_step=False, on_epoch=True,  logger=True)
        self.log('test_NER_loss', out['ner_loss'], on_step=False, on_epoch=True,  logger=True)
        self.log('test_intent_acc', accuracy(out['intent_pred'],intent_target), on_step=False, on_epoch=True,  logger=True)
        self.log('test_slot_f1', slot_F1(out['slot_pred'],slots_target,idx2slots), on_step=False, on_epoch=True, logger=True)
        
        return out['joint_loss']
        

    def configure_optimizers(self):
         return torch.optim.AdamW(self.parameters(), lr=3e-5)

In [6]:
dm = NLU_Dataset('./data/multiATIS/split/train/augmented/train_01_EN.tsv','./data/multiATIS/split/valid/augmented/dev_01_EN.tsv', './data/multiATIS/split/test/OOC_test/clean/test_EN.tsv',
                 'distilbert-base-multilingual-cased',56,16)
model = jointBert()

In [7]:
trainer = pl.Trainer(gpus=-1,precision=16,accumulate_grad_batches=4,max_epochs=15, check_val_every_n_epoch=1,logger=tb_logger,callbacks=[checkpoint_callback])

#trainer.fit(model, dm)

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
Using native 16bit precision.


In [8]:
def cal_mean_stderror(metric):
    var,std_error = 0,0
    mean = sum(metric)/len(metric)
    for m in metric:
        var += (m-mean)**2
    var = (var/(len(metric)-1))**0.5
    std_error = var/((len(metric))**0.5)
    return [round(mean,4),round(std_error,4)]

In [11]:
acc,slotF1 = [],[]
dm1 = NLU_Dataset('./data/multiATIS/split/test/OOC_test/0_10n/test_EN.tsv','./data/multiATIS/split/valid/clean/dev_EN.tsv', 
                  './data/multiATIS/split/test/OOC_test/clean/test_EN.tsv',
                 'distilbert-base-multilingual-cased',56,1)
dm1.setup()
test1 = dm1.test_dataloader()

for m in ['./bin/augmented1/050/EN/v1/jointBert-epoch=18-val_loss=0.ckpt',
          './bin/augmented1/050/EN/v2/jointBert-epoch=18-val_loss=0.ckpt',
         './bin/augmented1/050/EN/v3/jointBert-epoch=17-val_loss=0.ckpt',
         './bin/augmented1/050/EN/v5/jointBert-epoch=18-val_loss=0.ckpt']:
    model = jointBert.load_from_checkpoint(checkpoint_path=m,map_location=None)
    model.eval()
    out = trainer.test(model=model,test_dataloaders=test1)
    acc.append(out[0]['test_intent_acc'])
    slotF1.append(out[0]['test_slot_f1'])
    #print(out)
    #print(acc,out,out[0])
print('acc:',cal_mean_stderror(acc),'slotsF1',cal_mean_stderror(slotF1))

Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_IC_NER_loss': 0.42969295382499695,
 'test_IC_loss': 0.5731149911880493,
 'test_NER_loss': 0.28627094626426697,
 'test_intent_acc': 0.9025756120681763,
 'test_slot_f1': 0.9085431098937988}
--------------------------------------------------------------------------------


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_IC_NER_loss': 0.4418017566204071,
 'test_IC_loss': 0.6008492112159729,
 'test_NER_loss': 0.2827543318271637,
 'test_intent_acc': 0.8891377449035645,
 'test_slot_f1': 0.9081439971923828}
--------------------------------------------------------------------------------


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_IC_NER_loss': 0.4131576120853424,
 'test_IC_loss': 0.5451993942260742,
 'test_NER_loss': 0.2811158001422882,
 'test_intent_acc': 0.8857782483100891,
 'test_slot_f1': 0.9046146273612976}
--------------------------------------------------------------------------------


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_IC_NER_loss': 0.4027230441570282,
 'test_IC_loss': 0.5194768309593201,
 'test_NER_loss': 0.2859693467617035,
 'test_intent_acc': 0.935050368309021,
 'test_slot_f1': 0.9065072536468506}
--------------------------------------------------------------------------------
acc: [0.9031, 0.0112] slotsF1 [0.907, 0.0009]
