In [3]:
import torch 
import torch.nn as nn 
import torch.nn.functional as F
from transformers import  DistilBertModel
import torch
from torch.utils.data import Dataset
from transformers import DistilBertTokenizerFast
import pandas as pd
import random
from torch.utils.data import DataLoader

In [4]:
class jointBert(nn.Module):

    def __init__(self, model_name):

        super(jointBert,self).__init__()
        
        self.encoder = DistilBertModel.from_pretrained(model_name,return_dict=True,output_hidden_states=True)
        #p_intent = trial.suggest_float("intent_dropout", 0.1, 0.4)
        self.intent_dropout = nn.Dropout(0.1)#args.intent_dropout_val)
        self.intent_FC = nn.Linear(768, 17)
 

        # slots layer
        self.slots_dropout = nn.Dropout(0.1)#args.slots_dropout_val)
        self.slots_FC = nn.Linear(768, 159)
        #p_slots = trial.suggest_float("slots_dropout", 0.1, 0.4)

        self.intent_loss_fn = nn.CrossEntropyLoss()
        self.slot_loss_fn = nn.CrossEntropyLoss()
        #self.log_vars = nn.Parameter(torch.zeros((2)))

        self.jlc = 0.5#args.joint_loss_coef
        

    
    def forward(self, input_ids, attention_mask , intent_target, slots_target):

        encoded_output = self.encoder(input_ids, attention_mask)

        #intent data flow
        intent_hidden = encoded_output[0][:,0]
        intent_logits = self.intent_FC(self.intent_dropout(F.relu(intent_hidden)))
        
        
        # accumulating intent classification loss 
        intent_loss = self.intent_loss_fn(intent_logits, intent_target)
        intent_pred = torch.argmax(nn.Softmax(dim=1)(intent_logits), axis=1)
        

        # slots data flow 
        slots_hidden = encoded_output[0]
        slots_logits = self.slots_FC(self.slots_dropout(F.relu(slots_hidden)))
       

        # accumulating slot prediction loss
        slot_loss = self.slot_loss_fn(slots_logits.view(-1, 159), slots_target.view(-1))
        
        joint_loss = ((1-self.jlc)*intent_loss + (self.jlc)*slot_loss)
        

        return joint_loss,intent_pred,intent_loss,slot_loss


In [5]:
class nluDataset(Dataset):

    def __init__(self, file_dir, tokenizer, max_len, device):
        
        self.data = pd.read_csv(file_dir, sep='\t')
        self.tokenizer = DistilBertTokenizerFast.from_pretrained(tokenizer)
        self.max_len = max_len
    
    def processSlotLabel(self,word_ids,slot_ids):
        
        slot_ids = list(map(int, slot_ids.split(' ')))   
        new_labels = []
        previous_word_idx = None
        
        for word_idx in word_ids:
            if word_idx is None:
                new_labels.append(-100)
            elif word_idx != previous_word_idx:
                new_labels.append(slot_ids[word_idx])
            else:
                new_labels.append(-100)
        
        return new_labels  
        

    def __getitem__(self, index):
        
        text = str(self.data.TEXT[index])
        text = " ".join(text.split())
        
        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            return_token_type_ids=False,
            truncation=True,
            max_length=self.max_len,
            padding='max_length',
            
            #is_split_into_words=True
        )
        
        
        # text encoding
        token_ids = torch.tensor(inputs['input_ids'], dtype=torch.long)
        mask = torch.tensor(inputs['attention_mask'], dtype=torch.long)
        word_ids = inputs.word_ids()

        # intent
        intent_id = torch.tensor(self.data.INTENT_ID[index], dtype=torch.long)
        intent_label = self.data.INTENT[index]

        # label processing
        slot_label = self.data.SLOTS[index]
        slot_id = self.processSlotLabel(word_ids,self.data.SLOTS_ID[index])
        slot_id = torch.tensor(slot_id,dtype=torch.long)
        #language = self.data.language[index]
        
        return {
            'token_ids': token_ids,
            'mask': mask,
            'intent_id': intent_id,
            'slots_id' : slot_id,
            'intent_label': intent_label,
            'slots_label' : slot_label
        } 
    
    def __len__(self):
        return len(self.data)

In [7]:
model = jointBert('distilbert-base-multilingual-cased').to('cuda')

In [8]:
train_DS =  nluDataset('../data/multiATIS/splits/train_EN.tsv','distilbert-base-multilingual-cased',56,2)

Downloading:   0%|          | 0.00/996k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

In [9]:
train_DL = DataLoader(train_DS,batch_size=4)

In [10]:
for batch in train_DL:
    batch
    token_id,mask,intent_target,slot_target = batch['token_ids'] , batch['mask'],batch['intent_id'],batch['slots_id']
    out = model(token_id,mask,intent_target,slot_target)
    print(out)

AttributeError: 'dict' object has no attribute 'to'