In [1]:
# imports and PAD_TOKEN
PAD_TOKEN = 0
import os
import json
import numpy as np

import torch
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'	
os.environ['CUDA_LAUNCH_BLOCKING'] = "1" # Used to report errors on CUDA side

import torch.utils.data as data
from torch.utils.data import DataLoader

import torch.nn as nn

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# datasets
def word2id_func(raw_dataset):
# returns a dictionary of words and their ids
    words = []
    for entry in raw_dataset:
       words.extend(entry['utterance'].split())
    words = list(set(words))
    words_dict = {'[PAD]': PAD_TOKEN}
    words_dict.update({w:i+1 for i, w in enumerate(words)})
    words_dict['[UNK]'] = len(words_dict)
    return words_dict

def slot2id_func(raw_dataset):
# returns a dictionary of slots and their ids
    slots = ['[PAD]']
    for entry in raw_dataset:
       slots.extend(entry['slots'].split())
    slots = list(set(slots))
    slots_dict = {s:i for i, s in enumerate(slots)}
    return slots_dict

def intent2id_func(raw_dataset):
# returns a dictionary of intents and their ids
    intents = [entry['intent'] for entry in raw_dataset]
    intents = list(set(intents))
    intents_dict = {inte:i for i, inte in enumerate(intents)}
    return intents_dict
def vocab_func(raw_dataset):
    vocab = set()
    for entry in raw_dataset:
        vocab = vocab.union(set(entry['utterance'].split()))
    return ['[PAD]'] + list(vocab) + ['[UNK]']



class Lang():
    def __init__(self, train_raw, dev_raw, test_raw):
        self.word2id = word2id_func(train_raw + dev_raw + test_raw)
        self.slot2id = slot2id_func(train_raw + dev_raw + test_raw)
        self.intent2id = intent2id_func(train_raw + dev_raw + test_raw)
        self.vocab = vocab_func(train_raw + dev_raw + test_raw)
        self.id2word = {v:k for k, v in self.word2id.items()}
        self.id2slot = {v:k for k, v in self.slot2id.items()}
        self.id2intent = {v:k for k, v in self.intent2id.items()}
        
def load_data(path):
    dataset = []
    with open(path) as f:
        dataset = json.loads(f.read())
    return dataset

In [3]:
class IntentsAndSlots (data.Dataset):
    # Mandatory methods are __init__, __len__ and __getitem__
    def __init__(self, dataset, lang, unk='[UNK]'):
        self.utterances = []
        self.intents = []
        self.slots = []
        self.unk = unk
        
        for x in dataset:
            self.utterances.append(x['utterance'])
            self.slots.append(x['slots'])
            self.intents.append(x['intent'])

        self.utt_ids = self.mapping_seq(self.utterances, lang.word2id)
        self.slot_ids = self.mapping_seq(self.slots, lang.slot2id)
        self.intent_ids = self.mapping_lab(self.intents, lang.intent2id)

    def __len__(self):
        return len(self.utterances)

    def __getitem__(self, idx):
        utt = torch.Tensor(self.utt_ids[idx])
        slots = torch.Tensor(self.slot_ids[idx])
        intent = self.intent_ids[idx]
        sample = {'utterance': utt, 'slots': slots, 'intent': intent}
        return sample
    
    # Auxiliary methods
    
    def mapping_lab(self, data, mapper):
        return [mapper[x] if x in mapper else mapper[self.unk] for x in data]
    
    def mapping_seq(self, data, mapper): # Map sequences to number
        res = []
        for seq in data:
            tmp_seq = []
            for x in seq.split():
                if x in mapper:
                    tmp_seq.append(mapper[x])
                else:
                    tmp_seq.append(mapper[self.unk])
            res.append(tmp_seq)
        return res

def collate_fn(data):
    def merge(sequences):
        '''
        merge from batch * sent_len to batch * max_len 
        '''
        lengths = [len(seq) for seq in sequences]
        max_len = 1 if max(lengths)==0 else max(lengths)
        # Pad token is zero in our case
        # So we create a matrix full of PAD_TOKEN (i.e. 0) with the shape 
        # batch_size X maximum length of a sequence
        padded_seqs = torch.LongTensor(len(sequences),max_len).fill_(PAD_TOKEN)
        for i, seq in enumerate(sequences):
            end = lengths[i]
            padded_seqs[i, :end] = seq # We copy each sequence into the matrix
        # print(padded_seqs)
        padded_seqs = padded_seqs.detach()  # We remove these tensors from the computational graph
        return padded_seqs, lengths
    # Sort data by seq lengths
    data.sort(key=lambda x: len(x['utterance']), reverse=True) 
    new_item = {}
    for key in data[0].keys():
        new_item[key] = [d[key] for d in data]
    # We just need one length for packed pad seq, since len(utt) == len(slots)
    src_utt, _ = merge(new_item['utterance'])
    y_slots, y_lengths = merge(new_item["slots"])
    intent = torch.LongTensor(new_item["intent"])
    
    src_utt = src_utt.to(device) # We load the Tensor on our seleceted device
    y_slots = y_slots.to(device)
    intent = intent.to(device)
    y_lengths = torch.LongTensor(y_lengths).to(device)
    
    new_item["utterances"] = src_utt
    new_item["intents"] = intent
    new_item["y_slots"] = y_slots
    new_item["slots_len"] = y_lengths
    return new_item

In [4]:
def prepare_data(dataset):
    train_raw = load_data(os.path.join('data', dataset, 'train.json'))
    test_raw = load_data(os.path.join('data', dataset, 'test.json'))
    dev_raw = load_data(os.path.join('data', dataset, 'valid.json'))
    
    lang = Lang(train_raw, dev_raw, test_raw)

    ##############################
    train_dataset = IntentsAndSlots(train_raw, lang)
    dev_dataset = IntentsAndSlots(dev_raw, lang)
    test_dataset = IntentsAndSlots(test_raw, lang)
    
    ##############################
    train_loader = DataLoader(train_dataset, batch_size=128, collate_fn=collate_fn,  shuffle=True)
    dev_loader = DataLoader(dev_dataset, batch_size=64, collate_fn=collate_fn)
    test_loader = DataLoader(test_dataset, batch_size=64, collate_fn=collate_fn)
    
    return train_loader, dev_loader, test_loader, lang

In [5]:
dataset = "SNIPS"
train_loader, dev_loader, test_loader, lang = prepare_data(dataset)
hid_size = 200
emb_size = 300

lr = 0.0001 # learning rate
clip = 5 # gradient clipping	

out_slot = len(lang.slot2id)
out_int = len(lang.intent2id)
vocab_len = len(lang.word2id)

train_raw = load_data(os.path.join('data', dataset, 'train.json'))
test_raw = load_data(os.path.join('data', dataset, 'test.json'))
dev_raw = load_data(os.path.join('data', dataset, 'valid.json'))

In [6]:
from transformers import AutoTokenizer, AutoModel

class JERNIE(nn.Module):
    def __init__ (self, out_int, out_slot):
        super(JERNIE, self).__init__()
        self.tokenizer = AutoTokenizer.from_pretrained("nghuyong/ernie-2.0-base-en")
        self.ERNIE = AutoModel.from_pretrained("nghuyong/ernie-2.0-base-en")
        self.ERNIE.to(device)
        self.intent_classifier = nn.Linear(768, out_int)
        self.slot_classifier = nn.Linear(768, out_slot)
        
    def forward(self, input, lang):
        # get back the input sentence
        utterance = []
        for element in input:
            utterance.append(' '.join(lang.vocab[i] for i in element if i > 0))
        tokenized = self.tokenizer(utterance, return_tensors='pt', add_special_tokens=True,padding=True).to(device)
        output = self.ERNIE(**tokenized)
        intent = output.pooler_output
        slots = output.last_hidden_state[:,:input.size(1),:]
        
        intent = self.intent_classifier(intent)
        slots = self.slot_classifier(slots)
        slots = slots.permute(0, 2, 1)
        return intent, slots

In [7]:
from conll import evaluate
from sklearn.metrics import classification_report
def evaluation_loop(data, criterion_slots, criterion_intents, model, lang):
    model.eval()
    loss_array = []
    
    ref_intents = []
    hyp_intents = []
    
    ref_slots = []
    hyp_slots = []
    with torch.no_grad(): # It used to avoid the creation of computational graph
        for sample in data:
            intents, slots = model(sample['utterances'], lang)
            loss_intent = criterion_intents(intents, sample['intents'])
            loss_slot = criterion_slots(slots, sample['y_slots'])
            loss = loss_intent + loss_slot 
            loss_array.append(loss.item())
            # Intent inference
            # Get the highest probable class
            out_intents = [lang.id2intent[x] 
                           for x in torch.argmax(intents, dim=1).tolist()] 
            gt_intents = [lang.id2intent[x] for x in sample['intents'].tolist()]
            ref_intents.extend(gt_intents)
            hyp_intents.extend(out_intents)
            
            # Slot inference 
            output_slots = torch.argmax(slots, dim=1)
            for id_seq, seq in enumerate(output_slots):
                length = sample['slots_len'].tolist()[id_seq]
                utt_ids = sample['utterance'][id_seq][:length].tolist()
                gt_ids = sample['y_slots'][id_seq].tolist()
                gt_slots = [lang.id2slot[elem] for elem in gt_ids[:length]]
                utterance = [lang.id2word[elem] for elem in utt_ids]
                to_decode = seq[:length].tolist()
                ref_slots.append([(utterance[id_el], elem) for id_el, elem in enumerate(gt_slots)])
                tmp_seq = []
                for id_el, elem in enumerate(to_decode):
                    tmp_seq.append((utterance[id_el], lang.id2slot[elem]))
                hyp_slots.append(tmp_seq)
    try:            
        results = evaluate(ref_slots, hyp_slots)
    except Exception as ex:
        # Sometimes the model predics a class that is not in REF
        print(ex)
        ref_s = set([x[1] for x in ref_slots])
        hyp_s = set([x[1] for x in hyp_slots])
        print(hyp_s.difference(ref_s))
        
    report_intent = classification_report(ref_intents, hyp_intents, 
                                          zero_division=False, output_dict=True)
    return results, report_intent, loss_array

In [8]:
import torch.optim as optim
import torch.nn.functional as F

def training_loop(data, optimizer, criterion_slots, criterion_intents, model, lang):
    model.train()
    loss_array = []
    for sample in data:
        optimizer.zero_grad() # Zeroing the gradient
        intent, slots = model(sample['utterances'], lang)
        loss_intent = criterion_intents(intent, sample['intents'])
        loss_slot = criterion_slots(slots, sample['y_slots'])
        loss = loss_intent + loss_slot # In joint training we sum the losses. 
                                       # Is there another way to do that?
        loss_array.append(loss.item())
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)  
        optimizer.step() # Update the weights
    return loss_array

    

model = JERNIE(out_int, out_slot)
model = model.to(device)
# encoded_train = encode_dataset(tokenizer, train_utterances, max_len_utt)

optimizer = optim.Adam(model.parameters(), lr=lr)
criterion_slots = nn.CrossEntropyLoss()
criterion_intents = nn.CrossEntropyLoss(ignore_index=0)

n_epochs = 200
patience = 10

losses_train = []
losses_dev = []
sampled_epochs = []
best_f1 = 0
from tqdm import tqdm
for x in tqdm(range(1,n_epochs)):
    loss = training_loop(train_loader, optimizer, criterion_slots, 
                    criterion_intents, model, lang)
    if x % 5 == 0:
        sampled_epochs.append(x)
        losses_train.append(np.asarray(loss).mean())
        results_dev, intent_res, loss_dev = evaluation_loop(dev_loader, criterion_slots, 
                                                    criterion_intents, model, lang)
        losses_dev.append(np.asarray(loss_dev).mean())
        f1 = results_dev['total']['f']
        
        if f1 > best_f1:
            best_f1 = f1
        else:
            # set optimizer learning rate to 0.1 times the previous value	
            if patience%3 ==0:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = param_group['lr'] /2
            patience -= 1
        if patience <= 0: # Early stopping with patience
            break # Not nice but it keeps the code clean
results_test, intent_test, _ = evaluation_loop(test_loader, criterion_slots, 
                                            criterion_intents, model, lang)

 42%|████▏     | 84/199 [1:12:41<1:39:31, 51.93s/it]


In [9]:
print('Slot F1: ', results_test['total']['f'])
print('Intent Accuracy:', intent_test['accuracy'])

Slot F1:  0.9465181058495822
Intent Accuracy: 0.8671428571428571


In [10]:
# ATIS
slot_results = [0.9385887735351047, 0.9373680506685432, 0.9337550518362328, 0.9391487864931412, 0.9047619047619048]
intent_results = [0.9787234042553191, 0.9742441209406495, 0.973124300111982, 0.9316909294512878, 0.9686450167973124]

In [11]:
# SNIPS
slot_results = [0.939511653718091, 0.9477487493051697, 0.9411111111111111, 0.9495005549389567, 0.9465181058495822]
intent_results = [0.8414285714285714, 0.8414285714285714, 0.8571428571428571, 0.8385714285714285, 0.8671428571428571]