In [1]:
#Mount Google drive
#from google.colab import drive
#drive.mount('/content/drive')

In [2]:
#change current working directory
#%cd "/content/drive/MyDrive/1_dialog_act"

In [3]:
!pip install transformers
!pip install accelerate
#!pip install datasets
!python3 -m spacy download en_core_web_lg

Collecting en-core-web-lg==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl (587.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.7/587.7 MB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')


In [4]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import torch
from transformers import TrainingArguments, Trainer
from transformers import BertForSequenceClassification, BertTokenizer, BertConfig
from transformers import RobertaForSequenceClassification, RobertaTokenizer, RobertaConfig
from transformers import XLNetForSequenceClassification, XLNetTokenizer, XLNetConfig
from transformers import XLMForSequenceClassification, XLMTokenizer, XLMConfig
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer, DistilBertConfig
from transformers import AlbertForSequenceClassification, AlbertTokenizer, AlbertConfig
from transformers import AdamW
from transformers import EarlyStoppingCallback, IntervalStrategy
from torch.utils.data import DataLoader, TensorDataset, RandomSampler, Dataset
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import recall_score, precision_score, f1_score
from torch import cuda
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report, confusion_matrix
from sklearn.preprocessing import normalize
from itertools import product
import json
import shutil

import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys

import nltk

nltk.download('punkt')

import spacy

nlp = spacy.load("en_core_web_lg")

#sys.path.append(os.path.abspath('../../'))
#from util import generate_metrics_latex_table



[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [5]:
SAVE_PARAMETERS_TO = "./saved_parameters.json"
SAVE_MODEL_TO_PATH = "./saved_models/"
SAVE_MODEL_NAME = "dialogue_act_model"
#TRAIN_DATA_SAVE_PATH = "utterances_act_types/train.json"
#TEST_DATA_SAVE_PATH = "utterances_act_types/test.json"
#VALID_DATA_SAVE_PATH = "utterances_act_types/valid.json"
TRAIN_DATA_SAVE_PATH = "/kaggle/input/preprocessed-data-question-tags-no-none/train.json"
TEST_DATA_SAVE_PATH = "/kaggle/input/preprocessed-data-question-tags-no-none/test.json"
VALID_DATA_SAVE_PATH = "/kaggle/input/preprocessed-data-question-tags-no-none/valid.json"

#best_model_path = SAVE_MODEL_TO_PATH + './best_model_dialog_act.pt'

best_model_path = './best_model_dialog_act.pt'

PRETRAINED_MODELS = {
    'bert': 'bert-large-uncased',
    'roberta': 'roberta-base',
    'xlnet': 'xlnet-large-cased',
    'xlm': 'xlm-mlm-en-2048',
    'distilbert': 'distilbert-base-uncased',
    'albert':'albert-base-v2'
}

MODEL_CLASSES = {
    'bert': (BertForSequenceClassification, BertTokenizer, BertConfig),
    'roberta': (RobertaForSequenceClassification, RobertaTokenizer, RobertaConfig),
    'xlnet': (XLNetForSequenceClassification, XLNetTokenizer, XLNetConfig),
    'xlm': (XLMForSequenceClassification, XLMTokenizer, XLMConfig),
    'distilbert': (DistilBertForSequenceClassification, DistilBertTokenizer, DistilBertConfig),
    'albert':(AlbertForSequenceClassification,AlbertTokenizer, AlbertConfig)
}

MODEL_TYPE = 'roberta'
PRETRAINED_MODEL_NAME = PRETRAINED_MODELS[MODEL_TYPE]

model_class, tokenizer_class, config_class = MODEL_CLASSES[MODEL_TYPE]

LEARNING_RATE = 1e-5
BATCH_SIZE = 8
EPOCHS = 50
WEIGHT_DECAY = 0.001

In [6]:
def save_variable_to_json(variable, file_path):
    with open(file_path, 'w') as file:
        json.dump(variable, file)

def load_variable_from_json(file_path):
    with open(file_path, 'r') as file:
        variable = json.load(file)
    return variable

In [7]:
def parse(sentence):
    # Tokenize
    sentence = nlp(sentence)
    # Remove stop words
    sentence = " ".join([token.lemma_ for token in sentence])
    
    return sentence

In [8]:
try:
    X_train, y_train = load_variable_from_json(TRAIN_DATA_SAVE_PATH)
    X_test, y_test = load_variable_from_json(TEST_DATA_SAVE_PATH)
    X_valid, y_valid = load_variable_from_json(VALID_DATA_SAVE_PATH)

    print('Data loaded from file.')
except:
    print('No saved data found.')

Data loaded from file.


In [9]:
longest_train_data = max(X_train + X_test + X_valid, key=lambda x: len(x.split()))
print('Longest utterance length:', len(longest_train_data.split()))

num_labels = len(set([act for act_list in y_train for act in act_list ]))

all_labels = sorted(set([act for act_list in y_train for act in act_list ]))
print('Labels:', all_labels)


Longest utterance length: 121
Labels: ['hotel-address', 'hotel-area', 'hotel-internet', 'hotel-name', 'hotel-parking', 'hotel-phone', 'hotel-postcode', 'hotel-pricerange', 'hotel-ref', 'hotel-stars', 'hotel-type', 'restaurant-address', 'restaurant-area', 'restaurant-food', 'restaurant-name', 'restaurant-phone', 'restaurant-postcode', 'restaurant-pricerange', 'restaurant-ref']


In [10]:
# This will help us to transform the labels into a one-hot encoded numeric array
mlb = MultiLabelBinarizer(classes=list(all_labels))
mlb.fit(y_train)

In [11]:
# Load the pre-trained BERT model and tokenizer
tokenizer = tokenizer_class.from_pretrained(PRETRAINED_MODEL_NAME)

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

In [12]:
max_length = min(2 ** (len(tokenizer.tokenize(longest_train_data))-1).bit_length(), 512)
print('Max chosen length:', max_length)


Max chosen length: 256


In [13]:
class CustomDataset(Dataset):
    def __init__(self, tokenizer, X, y, max_length):
        self.X = X
        self.tokenizer = tokenizer
        self.max_length = max_length
        # Fit the label binarizer and transform the labels into one-hot encoded format
        self.labels = mlb.fit_transform(y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        # Encode the utterance using the provided tokenizer
        encoding = self.tokenizer.encode_plus(
            self.X[idx],
            add_special_tokens=True,
            max_length = self.max_length,
            return_token_type_ids=True,
            padding='max_length',
            return_attention_mask=True,
            truncation=False,
            return_tensors='pt'
        )
        # Convert the list of strings into a one-hot encoded format
        label = self.labels[idx]  # This should now be a binary vector instead of a list of strings
        # Return the encoding and the label
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.float),
            'token_type_ids': encoding['token_type_ids'].flatten()
        }

In [14]:
# Smaller dataset to try
p = 1
n_train_samples = int(len(X_train) * p)
n_test_samples = int(len(X_test) * p)
n_valid_samples = int(len(X_valid) * p)

print("Train samples: ", n_train_samples)
print("Test samples: ", n_test_samples)
print("Valid samples: ", n_valid_samples)

Train samples:  4800
Test samples:  571
Valid samples:  591


In [15]:
train_dataset = CustomDataset(tokenizer, X_train[:n_train_samples], y_train[:n_train_samples], max_length)
test_dataset = CustomDataset(tokenizer, X_test[:n_test_samples], y_test[:n_test_samples], max_length)
valid_dataset = CustomDataset(tokenizer, X_valid[:n_valid_samples], y_valid[:n_valid_samples], max_length)

## Evaluation

In [16]:
train_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [17]:
device = 'cuda' if cuda.is_available() else 'cpu'
print(device)

cuda


In [18]:
def load_ckp(checkpoint_fpath, model, optimizer):
    checkpoint = torch.load(checkpoint_fpath, map_location=device)
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    valid_loss_min = checkpoint['valid_loss_min']
    return model, optimizer, checkpoint['epoch'], valid_loss_min

def save_ckp(state, best_model_path):
    torch.save(state, best_model_path)


In [19]:
class BERTClass(torch.nn.Module):
    def __init__(self, pretrained_model_name, num_labels):
        super(BERTClass, self).__init__()
        self.num_labels = num_labels
        self.l1 = model_class.from_pretrained(pretrained_model_name, num_labels=self.num_labels)
        self.pre_classifier = torch.nn.Linear(self.num_labels, 768)
        self.dropout = torch.nn.Dropout(0.3)
        self.classifier = torch.nn.Linear(768, self.num_labels)

    def forward(self, input_ids, attention_mask, token_type_ids):
        output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        hidden_state = output_1[0]
        pooler = hidden_state
        pooler = self.pre_classifier(pooler)
        pooler = torch.nn.ReLU()(pooler)
        pooler = self.dropout(pooler)
        output = self.classifier(pooler)
        output = output.view(-1, self.num_labels)  # Reshape the output
        return output

model = BERTClass(PRETRAINED_MODEL_NAME, num_labels)
model = model.to(device)

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [20]:
def loss_fn(outputs, targets):
    return torch.nn.BCEWithLogitsLoss()(outputs, targets)

optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY, correct_bias=False)



In [21]:
def valid(model, valid_dataloader):
    val_targets = []
    val_outputs = []
    
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch in tqdm(valid_dataloader):
            input_ids = batch['input_ids'].to(device, dtype=torch.long)
            attention_mask = batch['attention_mask'].to(device, dtype=torch.long)
            token_type_ids = batch['token_type_ids'].to(device, dtype=torch.long)
            labels = batch['labels'].to(device, dtype=torch.float)

            outputs = model(input_ids, attention_mask, token_type_ids)

            loss = loss_fn(outputs, labels)
            val_loss += loss.item()

            val_targets.extend(labels.cpu().detach().numpy().tolist())
            val_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
            

    val_loss /= len(valid_dataloader)
    
    return val_loss, val_targets, val_outputs

In [22]:
def train(model, train_dataloader):
    model.train()
    train_loss = 0
    for batch in tqdm(train_dataloader):
        input_ids = batch['input_ids'].to(device, dtype=torch.long)
        attention_mask = batch['attention_mask'].to(device, dtype=torch.long)
        token_type_ids = batch['token_type_ids'].to(device, dtype=torch.long)
        labels = batch['labels'].to(device, dtype=torch.float)

        model.zero_grad()
        outputs = model(input_ids, attention_mask, token_type_ids)

        loss = loss_fn(outputs, labels)
        train_loss += loss.item()

        loss.backward()
        optimizer.step()

    train_loss /= len(train_dataloader)
    
    return train_loss

In [23]:
def train_model(num_epochs, train_dataloader, valid_dataloader, model, optimizer, best_model_path, patience = 2):
    valid_loss_min = np.Inf

    num_not_improved = 0
    for epoch in range(1, num_epochs):
        print()
        print("#################### Epoch {}: Training Start    ####################".format(epoch))

        train_loss = train(model, train_dataloader)
        print('#################### Epoch {}: Training End      ####################'.format(epoch))

        print()
        print("#################### Epoch {}: Validation Start ####################".format(epoch))

        valid_loss, val_targets, val_outputs = valid(model, valid_dataloader)

        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(epoch, train_loss, valid_loss))

        if valid_loss <= valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min, valid_loss))

            checkpoint = {
                        'epoch': epoch,
                        'valid_loss_min': valid_loss,
                        'state_dict': model.state_dict(),
                        'optimizer': optimizer.state_dict()
                    }

            save_ckp(checkpoint, best_model_path)
            valid_loss_min = valid_loss
            num_not_improved = 0
        else:
            num_not_improved += 1
            if num_not_improved >= patience:
                print('Not improvement for more than:', num_not_improved)
                break
        print("#################### Epoch {}: Validation End   ####################".format(epoch))
        print()

    print("#################### Training finished     ####################")
    return model


In [24]:
try:
  trained_model, trained_optimizer, epoch, valid_loss_min = load_ckp(best_model_path, model, optimizer)
except:
  print('No saved model found. Need to be train from scratch.')
  trained_model = train_model(EPOCHS, train_dataloader, valid_dataloader, model, optimizer,  best_model_path)


No saved model found. Need to be train from scratch.

#################### Epoch 1: Training Start    ####################


100%|██████████| 72/72 [00:17<00:00,  4.19it/s]


#################### Epoch 1: Training End      ####################

#################### Epoch 1: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 14.19it/s]


Epoch: 1 	Training Loss: 0.408546 	Validation Loss: 0.252394
Validation loss decreased (inf --> 0.252394).  Saving model ...
#################### Epoch 1: Validation End   ####################


#################### Epoch 2: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.73it/s]


#################### Epoch 2: Training End      ####################

#################### Epoch 2: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 14.09it/s]


Epoch: 2 	Training Loss: 0.262432 	Validation Loss: 0.249007
Validation loss decreased (0.252394 --> 0.249007).  Saving model ...
#################### Epoch 2: Validation End   ####################


#################### Epoch 3: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.72it/s]


#################### Epoch 3: Training End      ####################

#################### Epoch 3: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 14.05it/s]


Epoch: 3 	Training Loss: 0.260408 	Validation Loss: 0.247833
Validation loss decreased (0.249007 --> 0.247833).  Saving model ...
#################### Epoch 3: Validation End   ####################


#################### Epoch 4: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.74it/s]


#################### Epoch 4: Training End      ####################

#################### Epoch 4: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 14.11it/s]


Epoch: 4 	Training Loss: 0.251557 	Validation Loss: 0.237128
Validation loss decreased (0.247833 --> 0.237128).  Saving model ...
#################### Epoch 4: Validation End   ####################


#################### Epoch 5: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.72it/s]


#################### Epoch 5: Training End      ####################

#################### Epoch 5: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 14.03it/s]


Epoch: 5 	Training Loss: 0.240032 	Validation Loss: 0.225052
Validation loss decreased (0.237128 --> 0.225052).  Saving model ...
#################### Epoch 5: Validation End   ####################


#################### Epoch 6: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.70it/s]


#################### Epoch 6: Training End      ####################

#################### Epoch 6: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 14.04it/s]


Epoch: 6 	Training Loss: 0.230240 	Validation Loss: 0.214599
Validation loss decreased (0.225052 --> 0.214599).  Saving model ...
#################### Epoch 6: Validation End   ####################


#################### Epoch 7: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.69it/s]


#################### Epoch 7: Training End      ####################

#################### Epoch 7: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 14.03it/s]


Epoch: 7 	Training Loss: 0.219662 	Validation Loss: 0.208566
Validation loss decreased (0.214599 --> 0.208566).  Saving model ...
#################### Epoch 7: Validation End   ####################


#################### Epoch 8: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.70it/s]


#################### Epoch 8: Training End      ####################

#################### Epoch 8: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.90it/s]


Epoch: 8 	Training Loss: 0.214484 	Validation Loss: 0.201679
Validation loss decreased (0.208566 --> 0.201679).  Saving model ...
#################### Epoch 8: Validation End   ####################


#################### Epoch 9: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.70it/s]


#################### Epoch 9: Training End      ####################

#################### Epoch 9: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.99it/s]


Epoch: 9 	Training Loss: 0.212425 	Validation Loss: 0.196290
Validation loss decreased (0.201679 --> 0.196290).  Saving model ...
#################### Epoch 9: Validation End   ####################


#################### Epoch 10: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.69it/s]


#################### Epoch 10: Training End      ####################

#################### Epoch 10: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.98it/s]


Epoch: 10 	Training Loss: 0.204681 	Validation Loss: 0.190468
Validation loss decreased (0.196290 --> 0.190468).  Saving model ...
#################### Epoch 10: Validation End   ####################


#################### Epoch 11: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.69it/s]


#################### Epoch 11: Training End      ####################

#################### Epoch 11: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 14.01it/s]


Epoch: 11 	Training Loss: 0.196251 	Validation Loss: 0.184708
Validation loss decreased (0.190468 --> 0.184708).  Saving model ...
#################### Epoch 11: Validation End   ####################


#################### Epoch 12: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.69it/s]


#################### Epoch 12: Training End      ####################

#################### Epoch 12: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.93it/s]


Epoch: 12 	Training Loss: 0.192027 	Validation Loss: 0.178103
Validation loss decreased (0.184708 --> 0.178103).  Saving model ...
#################### Epoch 12: Validation End   ####################


#################### Epoch 13: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.70it/s]


#################### Epoch 13: Training End      ####################

#################### Epoch 13: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.96it/s]


Epoch: 13 	Training Loss: 0.187341 	Validation Loss: 0.172820
Validation loss decreased (0.178103 --> 0.172820).  Saving model ...
#################### Epoch 13: Validation End   ####################


#################### Epoch 14: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.69it/s]


#################### Epoch 14: Training End      ####################

#################### Epoch 14: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 14.03it/s]


Epoch: 14 	Training Loss: 0.177824 	Validation Loss: 0.164850
Validation loss decreased (0.172820 --> 0.164850).  Saving model ...
#################### Epoch 14: Validation End   ####################


#################### Epoch 15: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.69it/s]


#################### Epoch 15: Training End      ####################

#################### Epoch 15: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.95it/s]


Epoch: 15 	Training Loss: 0.170598 	Validation Loss: 0.156147
Validation loss decreased (0.164850 --> 0.156147).  Saving model ...
#################### Epoch 15: Validation End   ####################


#################### Epoch 16: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.70it/s]


#################### Epoch 16: Training End      ####################

#################### Epoch 16: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.98it/s]


Epoch: 16 	Training Loss: 0.165112 	Validation Loss: 0.149309
Validation loss decreased (0.156147 --> 0.149309).  Saving model ...
#################### Epoch 16: Validation End   ####################


#################### Epoch 17: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.69it/s]


#################### Epoch 17: Training End      ####################

#################### Epoch 17: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.93it/s]


Epoch: 17 	Training Loss: 0.156501 	Validation Loss: 0.140116
Validation loss decreased (0.149309 --> 0.140116).  Saving model ...
#################### Epoch 17: Validation End   ####################


#################### Epoch 18: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.68it/s]


#################### Epoch 18: Training End      ####################

#################### Epoch 18: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.96it/s]


Epoch: 18 	Training Loss: 0.149514 	Validation Loss: 0.134071
Validation loss decreased (0.140116 --> 0.134071).  Saving model ...
#################### Epoch 18: Validation End   ####################


#################### Epoch 19: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.69it/s]


#################### Epoch 19: Training End      ####################

#################### Epoch 19: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.99it/s]


Epoch: 19 	Training Loss: 0.143326 	Validation Loss: 0.126117
Validation loss decreased (0.134071 --> 0.126117).  Saving model ...
#################### Epoch 19: Validation End   ####################


#################### Epoch 20: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.69it/s]


#################### Epoch 20: Training End      ####################

#################### Epoch 20: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.95it/s]


Epoch: 20 	Training Loss: 0.133649 	Validation Loss: 0.119712
Validation loss decreased (0.126117 --> 0.119712).  Saving model ...
#################### Epoch 20: Validation End   ####################


#################### Epoch 21: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.69it/s]


#################### Epoch 21: Training End      ####################

#################### Epoch 21: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.98it/s]


Epoch: 21 	Training Loss: 0.127158 	Validation Loss: 0.111155
Validation loss decreased (0.119712 --> 0.111155).  Saving model ...
#################### Epoch 21: Validation End   ####################


#################### Epoch 22: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.69it/s]


#################### Epoch 22: Training End      ####################

#################### Epoch 22: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.99it/s]


Epoch: 22 	Training Loss: 0.119839 	Validation Loss: 0.106673
Validation loss decreased (0.111155 --> 0.106673).  Saving model ...
#################### Epoch 22: Validation End   ####################


#################### Epoch 23: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.69it/s]


#################### Epoch 23: Training End      ####################

#################### Epoch 23: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 14.01it/s]


Epoch: 23 	Training Loss: 0.113311 	Validation Loss: 0.098429
Validation loss decreased (0.106673 --> 0.098429).  Saving model ...
#################### Epoch 23: Validation End   ####################


#################### Epoch 24: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.69it/s]


#################### Epoch 24: Training End      ####################

#################### Epoch 24: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.91it/s]


Epoch: 24 	Training Loss: 0.107571 	Validation Loss: 0.094424
Validation loss decreased (0.098429 --> 0.094424).  Saving model ...
#################### Epoch 24: Validation End   ####################


#################### Epoch 25: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.68it/s]


#################### Epoch 25: Training End      ####################

#################### Epoch 25: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 14.00it/s]


Epoch: 25 	Training Loss: 0.100109 	Validation Loss: 0.087072
Validation loss decreased (0.094424 --> 0.087072).  Saving model ...
#################### Epoch 25: Validation End   ####################


#################### Epoch 26: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.69it/s]


#################### Epoch 26: Training End      ####################

#################### Epoch 26: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.99it/s]


Epoch: 26 	Training Loss: 0.094571 	Validation Loss: 0.081680
Validation loss decreased (0.087072 --> 0.081680).  Saving model ...
#################### Epoch 26: Validation End   ####################


#################### Epoch 27: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.68it/s]


#################### Epoch 27: Training End      ####################

#################### Epoch 27: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.98it/s]


Epoch: 27 	Training Loss: 0.090169 	Validation Loss: 0.076391
Validation loss decreased (0.081680 --> 0.076391).  Saving model ...
#################### Epoch 27: Validation End   ####################


#################### Epoch 28: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.69it/s]


#################### Epoch 28: Training End      ####################

#################### Epoch 28: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.98it/s]


Epoch: 28 	Training Loss: 0.083854 	Validation Loss: 0.071107
Validation loss decreased (0.076391 --> 0.071107).  Saving model ...
#################### Epoch 28: Validation End   ####################


#################### Epoch 29: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.68it/s]


#################### Epoch 29: Training End      ####################

#################### Epoch 29: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.98it/s]


Epoch: 29 	Training Loss: 0.080170 	Validation Loss: 0.068807
Validation loss decreased (0.071107 --> 0.068807).  Saving model ...
#################### Epoch 29: Validation End   ####################


#################### Epoch 30: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.69it/s]


#################### Epoch 30: Training End      ####################

#################### Epoch 30: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.99it/s]


Epoch: 30 	Training Loss: 0.074810 	Validation Loss: 0.062474
Validation loss decreased (0.068807 --> 0.062474).  Saving model ...
#################### Epoch 30: Validation End   ####################


#################### Epoch 31: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.68it/s]


#################### Epoch 31: Training End      ####################

#################### Epoch 31: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.76it/s]


Epoch: 31 	Training Loss: 0.071806 	Validation Loss: 0.059937
Validation loss decreased (0.062474 --> 0.059937).  Saving model ...
#################### Epoch 31: Validation End   ####################


#################### Epoch 32: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.68it/s]


#################### Epoch 32: Training End      ####################

#################### Epoch 32: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.76it/s]


Epoch: 32 	Training Loss: 0.065433 	Validation Loss: 0.055590
Validation loss decreased (0.059937 --> 0.055590).  Saving model ...
#################### Epoch 32: Validation End   ####################


#################### Epoch 33: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.70it/s]


#################### Epoch 33: Training End      ####################

#################### Epoch 33: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.92it/s]


Epoch: 33 	Training Loss: 0.063071 	Validation Loss: 0.052192
Validation loss decreased (0.055590 --> 0.052192).  Saving model ...
#################### Epoch 33: Validation End   ####################


#################### Epoch 34: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.68it/s]


#################### Epoch 34: Training End      ####################

#################### Epoch 34: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.96it/s]


Epoch: 34 	Training Loss: 0.059668 	Validation Loss: 0.050226
Validation loss decreased (0.052192 --> 0.050226).  Saving model ...
#################### Epoch 34: Validation End   ####################


#################### Epoch 35: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.68it/s]


#################### Epoch 35: Training End      ####################

#################### Epoch 35: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.89it/s]


Epoch: 35 	Training Loss: 0.057180 	Validation Loss: 0.046963
Validation loss decreased (0.050226 --> 0.046963).  Saving model ...
#################### Epoch 35: Validation End   ####################


#################### Epoch 36: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.69it/s]


#################### Epoch 36: Training End      ####################

#################### Epoch 36: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.98it/s]


Epoch: 36 	Training Loss: 0.053030 	Validation Loss: 0.043653
Validation loss decreased (0.046963 --> 0.043653).  Saving model ...
#################### Epoch 36: Validation End   ####################


#################### Epoch 37: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.68it/s]


#################### Epoch 37: Training End      ####################

#################### Epoch 37: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 14.04it/s]


Epoch: 37 	Training Loss: 0.050814 	Validation Loss: 0.041780
Validation loss decreased (0.043653 --> 0.041780).  Saving model ...
#################### Epoch 37: Validation End   ####################


#################### Epoch 38: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.68it/s]


#################### Epoch 38: Training End      ####################

#################### Epoch 38: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.93it/s]


Epoch: 38 	Training Loss: 0.047641 	Validation Loss: 0.038975
Validation loss decreased (0.041780 --> 0.038975).  Saving model ...
#################### Epoch 38: Validation End   ####################


#################### Epoch 39: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.68it/s]


#################### Epoch 39: Training End      ####################

#################### Epoch 39: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.93it/s]


Epoch: 39 	Training Loss: 0.046901 	Validation Loss: 0.037947
Validation loss decreased (0.038975 --> 0.037947).  Saving model ...
#################### Epoch 39: Validation End   ####################


#################### Epoch 40: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.69it/s]


#################### Epoch 40: Training End      ####################

#################### Epoch 40: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 14.02it/s]


Epoch: 40 	Training Loss: 0.042729 	Validation Loss: 0.035572
Validation loss decreased (0.037947 --> 0.035572).  Saving model ...
#################### Epoch 40: Validation End   ####################


#################### Epoch 41: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.69it/s]


#################### Epoch 41: Training End      ####################

#################### Epoch 41: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.97it/s]


Epoch: 41 	Training Loss: 0.041165 	Validation Loss: 0.032711
Validation loss decreased (0.035572 --> 0.032711).  Saving model ...
#################### Epoch 41: Validation End   ####################


#################### Epoch 42: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.69it/s]


#################### Epoch 42: Training End      ####################

#################### Epoch 42: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 14.02it/s]


Epoch: 42 	Training Loss: 0.038880 	Validation Loss: 0.031726
Validation loss decreased (0.032711 --> 0.031726).  Saving model ...
#################### Epoch 42: Validation End   ####################


#################### Epoch 43: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.69it/s]


#################### Epoch 43: Training End      ####################

#################### Epoch 43: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.92it/s]


Epoch: 43 	Training Loss: 0.037443 	Validation Loss: 0.030554
Validation loss decreased (0.031726 --> 0.030554).  Saving model ...
#################### Epoch 43: Validation End   ####################


#################### Epoch 44: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.70it/s]


#################### Epoch 44: Training End      ####################

#################### Epoch 44: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.25it/s]


Epoch: 44 	Training Loss: 0.036403 	Validation Loss: 0.028488
Validation loss decreased (0.030554 --> 0.028488).  Saving model ...
#################### Epoch 44: Validation End   ####################


#################### Epoch 45: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.68it/s]


#################### Epoch 45: Training End      ####################

#################### Epoch 45: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 14.01it/s]


Epoch: 45 	Training Loss: 0.034090 	Validation Loss: 0.027066
Validation loss decreased (0.028488 --> 0.027066).  Saving model ...
#################### Epoch 45: Validation End   ####################


#################### Epoch 46: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.70it/s]


#################### Epoch 46: Training End      ####################

#################### Epoch 46: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 14.02it/s]


Epoch: 46 	Training Loss: 0.032257 	Validation Loss: 0.025412
Validation loss decreased (0.027066 --> 0.025412).  Saving model ...
#################### Epoch 46: Validation End   ####################


#################### Epoch 47: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.70it/s]


#################### Epoch 47: Training End      ####################

#################### Epoch 47: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 13.97it/s]


Epoch: 47 	Training Loss: 0.030834 	Validation Loss: 0.024937
Validation loss decreased (0.025412 --> 0.024937).  Saving model ...
#################### Epoch 47: Validation End   ####################


#################### Epoch 48: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.70it/s]


#################### Epoch 48: Training End      ####################

#################### Epoch 48: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 14.02it/s]


Epoch: 48 	Training Loss: 0.029009 	Validation Loss: 0.023687
Validation loss decreased (0.024937 --> 0.023687).  Saving model ...
#################### Epoch 48: Validation End   ####################


#################### Epoch 49: Training Start    ####################


100%|██████████| 72/72 [00:15<00:00,  4.69it/s]


#################### Epoch 49: Training End      ####################

#################### Epoch 49: Validation Start ####################


100%|██████████| 72/72 [00:05<00:00, 14.02it/s]


Epoch: 49 	Training Loss: 0.028650 	Validation Loss: 0.022497
Validation loss decreased (0.023687 --> 0.022497).  Saving model ...
#################### Epoch 49: Validation End   ####################

#################### Training finished     ####################


In [25]:
test_loss, test_labels , test_predictions_probs = valid(trained_model, test_dataloader)


100%|██████████| 72/72 [00:05<00:00, 14.03it/s]


In [26]:
threshold = 0.5
test_predictions = [[prob > threshold for prob in prob_list] for prob_list in test_predictions_probs ]

In [27]:
print('Accuracy:', accuracy_score(test_labels, test_predictions))
print('Precision:', precision_score(test_labels, test_predictions, average='weighted'))
print('Recall:', recall_score(test_labels, test_predictions, average='weighted'))
print('F1:', f1_score(test_labels, test_predictions, average='weighted'))

report = classification_report(test_labels, test_predictions, target_names=mlb.classes_)
print(report)

Accuracy: 0.9159369527145359
Precision: 0.9724294519092261
Recall: 0.9286608260325406
F1: 0.946926224889718
                       precision    recall  f1-score   support

        hotel-address       0.98      0.96      0.97        54
           hotel-area       1.00      0.89      0.94        27
       hotel-internet       1.00      0.91      0.95        32
           hotel-name       1.00      1.00      1.00        27
        hotel-parking       1.00      0.87      0.93        31
          hotel-phone       1.00      1.00      1.00        73
       hotel-postcode       1.00      0.96      0.98        55
     hotel-pricerange       1.00      0.82      0.90        34
            hotel-ref       1.00      0.98      0.99        44
          hotel-stars       0.00      0.00      0.00         7
           hotel-type       0.00      0.00      0.00        13
   restaurant-address       0.99      0.98      0.99       101
      restaurant-area       1.00      0.36      0.53        14
      res

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [28]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report, coverage_error, label_ranking_average_precision_score, label_ranking_loss
import numpy as np
import pandas as pd

def generate_metrics_latex_table(model_name, task_number, true_labels, binary_predictions, prediction_probs, target_names):
    report = classification_report(true_labels, binary_predictions, target_names=target_names, digits=3, output_dict=True)
    df = pd.DataFrame(report).transpose()
    df['support'] = df['support'].astype(int)
    df = df.rename({'precision': r'\textbf{Precision}', 'recall': r'\textbf{Recall}', 'f1-score': r'\textbf{F1-Score}', 'support': r'\textbf{Support}'}, axis=1)

    # Generating additional metrics
    accuracy = accuracy_score(true_labels, binary_predictions)
    precision, recall, f_score, _ = precision_recall_fscore_support(true_labels, binary_predictions, average='micro')

    # Calculating multilabel-specific metrics
    coverage_err = coverage_error(true_labels, prediction_probs)
    lrap = label_ranking_average_precision_score(true_labels, prediction_probs)
    ranking_loss = label_ranking_loss(true_labels, prediction_probs)

    # Calculate best/worst/expected values where applicable
    # Best possible coverage error is the average number of true labels per instance
    best_coverage = true_labels.sum(axis=1).mean()
    # The worst case is the total number of labels
    worst_coverage = true_labels.shape[1]

    # For LRAP, the best value is 1 and the worst is 0. Expected is the baseline or random performance.
    best_lrap = 1.0
    worst_lrap = 0.0  # This is theoretical; in practice, it's unlikely to get 0

    # For ranking loss, the best value is 0. 
    best_rl = 0.0
    # The worst case needs to account for the number of possible incorrect pairings. For each instance, it's the number of true labels times the number of false labels
    worst_rl = np.mean([(sum(row) * (len(row) - sum(row))) for row in true_labels])


    # Converting to LaTeX table
    latex_table = df.to_latex(float_format="%.3f", column_format='|l|c|c|c|c|')
    # Removing some stuff from df.to_latex() output
    latex_table = latex_table.replace('\\toprule\n ', r'\hline' + '\n' + r'\textbf{Class}') \
                             .replace('\\midrule\n', '') \
                             .replace('\\bottomrule', r'\multicolumn{5}{c}{}\\') \
                             .replace('\\end{tabular}\n', '') \
                             .replace(r'\\', r'\\ \hline') \
                             .replace('\nmicro avg','\\hline\nmicro avg')
    
    # Adding overall metrics
    overall_metrics = f"""
{latex_table}
\\textbf{{Accuracy}}                    & \\multicolumn{{4}}{{c|}}{{{accuracy:.3f}}}                                 \\\\ \\hline
\\textbf{{Overall Precision}}           & \\multicolumn{{4}}{{c|}}{{{precision:.3f}}}                                \\\\ \\hline
\\textbf{{Overall Recall}}              & \\multicolumn{{4}}{{c|}}{{{recall:.3f}}}                                   \\\\ \\hline
\\textbf{{Overall F1-Score}}            & \\multicolumn{{4}}{{c|}}{{{f_score:.3f}}}                                  \\\\ \\hline
\\textbf{{Label Ranking Avg Precision}} & \\multicolumn{{4}}{{c|}}{{{lrap:.3f}}}                                    \\\\ \\hline
\\textbf{{Coverage Error}}              & \\multicolumn{{4}}{{c|}}{{{coverage_err:.3f} (worst: {worst_coverage:.3f}, best: {best_coverage:.3f})}}                             \\\\ \\hline
\\textbf{{Ranking Loss}}                & \\multicolumn{{4}}{{c|}}{{{ranking_loss:.3f} (worst: {worst_rl:.3f}, best: {best_rl:.3f})}}                             \\\\ \\hline
\\end{{tabular}}
"""

    # Final LaTeX output with caption and label
    final_latex_output = f"""
\\begin{{table}}[h]
\\centering
{overall_metrics}
\\caption{{Metrics Overview of {model_name} Model for Task {task_number}}}
\\label{{table:{model_name}_metrics_task_{task_number}}}
\\end{{table}}
    """

    # Print or write to a file
    with open('metrics.tex', 'w') as f:
        f.write(final_latex_output)


In [29]:
generate_metrics_latex_table(model_name= 'roberta', task_number = '01', true_labels = np.array(test_labels), binary_predictions = test_predictions, prediction_probs = test_predictions_probs, target_names=mlb.classes_)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [30]:
def predict(model, tokenizer, sentence):
    model.eval()
    device = 'cuda' if cuda.is_available() else 'cpu'
    sentence = parse(sentence)
    inputs = tokenizer.encode_plus(
            sentence,
            add_special_tokens=True,
            max_length = max_length,
            return_token_type_ids=True,
            padding='max_length',
            return_attention_mask=True,
            truncation=False,
            return_tensors='pt'
        )
    

    input_ids = inputs['input_ids'].to(device, dtype=torch.long)
    attention_mask = inputs['attention_mask'].to(device, dtype=torch.long)
    token_type_ids = inputs['token_type_ids'].to(device, dtype=torch.long)

    outputs = model(input_ids, attention_mask, token_type_ids)

    outputs = torch.sigmoid(outputs).cpu().detach().numpy()
    threshold = 0.5
    outputs = test_predictions = [[prob > threshold for prob in prob_list] for prob_list in outputs ]
    
    outputs = mlb.inverse_transform(np.array(outputs))
    return sentence, outputs

In [31]:
sentence = "I need a place to dine in the center that s expensive | Restaurant-Inform | restaurant-area , restaurant-pricerange | I have several option for you ; do you prefer african , asian , or british food ? | Restaurant-Select , Restaurant-Inform | restaurant-food , restaurant-choice | any sort of food would be fine , as long as it be a bit expensive . could I get the phone number for your recommendation ? | Restaurant-Request"
final_labels =['restaurant-food']

In [32]:
sentence, labels = predict(model, tokenizer, sentence)

In [33]:
print('Sentence:', sentence)
print('Dialogue act type:', labels)

Sentence: I need a place to dine in the center that s expensive | restaurant - inform | restaurant - area , restaurant - pricerange | I have several option for you ; do you prefer african , asian , or british food ? | restaurant - Select , restaurant - inform | restaurant - food , restaurant - choice | any sort of food would be fine , as long as it be a bit expensive . could I get the phone number for your recommendation ? | restaurant - request
Dialogue act type: [('restaurant-phone',)]
