In [None]:
import sys
import os
os.chdir("../../")

from slot_extraction.utilities.slots import *
from datasets import load_dataset

dataset = load_dataset("multi_woz_v22")

# Testing data
test_filtered = filter_dataset(dataset['test'])
test_slot_extraction_data = construct_slot_extraction_data(test_filtered)

# Training data
filtered = filter_dataset(dataset['train'])
slot_extraction_data = construct_slot_extraction_data(filtered)

  from .autonotebook import tqdm as notebook_tqdm
2023-12-17 12:29:09.808629: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-17 12:29:10.792618: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-17 12:29:10.800016: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Available slots per domain

In [12]:
import pandas as pd

full_list = list(get_slots_from_dataset(filtered))
list_hotel = list(filter(lambda sl: 'hotel' in sl, full_list))
list_restaurant = list(filter(lambda sl: 'restaurant' in sl, full_list))

relevant_slots = pd.DataFrame()
max_size = max(len(list_restaurant), len(list_hotel))



def find_match(h,rlist):
    looking_for = h.split('-')[2]
    for r in rlist:
        if looking_for in r:
            return r
    return None

slot_matches = set([(h, find_match(h,list_restaurant)) for h in list_hotel] +[(find_match(r,list_hotel), r) for r in list_restaurant])

relevant_slots = pd.DataFrame(slot_matches,
                              columns=['hotel', 'restaurant'])

relevant_slots

Unnamed: 0,hotel,restaurant
0,hotel-inform-type,
1,hotel-inform-bookstay,
2,hotel-inform-bookpeople,restaurant-inform-bookpeople
3,,restaurant-inform-booktime
4,hotel-inform-stars,
5,hotel-inform-pricerange,restaurant-inform-pricerange
6,hotel-inform-name,restaurant-inform-name
7,hotel-inform-area,restaurant-inform-area
8,,restaurant-inform-food
9,hotel-inform-bookday,restaurant-inform-bookday


# Training Code

In [9]:

import numpy as np
import pickle

sensitivities = np.arange(1.1, step=0.05)
ngram_sizes = range(1, 6)
sample_size = 500

slot_representatitives = {
    'hotel':
        {
            'bookpeople': ['number people', 'book people', 'people', 'persons', 'one two three four five six seven eight people',  '1 2 3 4 5 6 7 8 9 10 people'],
            'type': ['hotel', 'hotel accommodations', 'amenities', 'services', 'hotel guesthouse dontcare', 'hotel guesthouse', 'type of hotel'],
            'name': ['place to stay', 'Hotel', 'the hotel house home inn room guesthouse bed and breakfast'],
            'bookday': ['day of week', 'day', 'bookday', 'date', 'date of booking', 'monday tuesday wednesday thursday friday saturday sunday', 'first second third fourth fifth sixth seventh weekday'],
            'area': ['area', 'location', 'cardinal directions', 'north east south west center', 'north east south west center don\'t care'],
            'bookstay': ['1 2 3 4 5 days', 'one two three four five days', 'days of stay'],
            'pricerange': ['price', 'price range', 'range', 'expensiveness' 'cheap moderate expensive', 'cheap moderate expensive don\'t care'],
            'stars': ['star','stars of hotel', 'quality', 'prestige', 'zero one two three four five stars']
        },
    'restaurant':
        {
            'bookpeople': ['number people', 'book people', 'people', 'persons', 'one two three four five six seven eight people',  '1 2 3 4 5 6 7 8 9 10 people'],
            'name': ['food place', 'name', 'restaurant','name of restaurant', 'pizza wok cafe curry pipasha chinese', 'the restaurant house grill boat seoul brasserie pizzeria kitchen cafe'],
            'bookday': ['day of week', 'day', 'bookday', 'date', 'date of booking', 'monday tuesday wednesday thursday friday saturday sunday', 'first second third fourth fifth sixth seventh weekday'],
            'area': ['area', 'location', 'cardinal directions', 'north east south west center', 'north east south west center don\'t care'],
            'booktime': ['time of booking', 'time', 'hour', 'hour minute','0:00 1:00 2:00 3:00 4:00 5:00 6:00 7:00 8:00 9:00 10:00 11:00 12:00 am pm', 
                         '0:00 1:00 2:00 3:00 4:00 5:00 6:00 7:00 8:00 9:00 10:00 11:00 12:00 13:00 14:00 15:00 16:00 17:00 18:00 19:00 20:00 21:00 22:00 23:00 am pm'],
            'pricerange': ['price', 'price range', 'range', 'expensiveness' 'cheap moderate expensive', 'cheap moderate expensive don\'t care'],
            'food': ['type of food', 'food', 'culinary style', 'cuisine', 'modern global international country creative food', 'north-american south-american european east-asian south-asian african chinese food']
        }
}

best_slot_params = {k:{sl:{'representative':'', 'ngram':0, 'sensitivity':0.0, 'scores':tuple()} for sl in slot_representatitives[k]} for k in slot_representatitives}


slot_representatives_results = {dom:{slot:{} for slot in slot_representatitives[dom]} for dom in slot_representatitives}


for ngram_size in ngram_sizes:
    for s in sensitivities:
        for dom, slots in slot_representatitives.items():
            for slot, reprs in slots.items():
                for repr in reprs:
                    
                    slot_mappings = {dom:{slot:repr}}
                    slot_data_sample = slot_extraction_data.sample(sample_size)
                    slot_data_sample['predictions'] = slot_data_sample.apply(df_extract_slots, axis=1,args=[tokenizer, embedder, slot_mappings, 
                                                                                            s, ngram_size])
                    precision,recall,f1_score=get_evaluation_metrics(slot_data_sample, filter_slots=[dom+'-'+slot])
                    scores = [precision, recall, f1_score]
                    #display(slot_data_sample)
                    
                    if not repr in slot_representatives_results[dom][slot]:
                        slot_representatives_results[dom][slot][repr] = {ngram_size:{s: scores}}
                        best_slot_params[dom][slot]['representative'] = repr
                        best_slot_params[dom][slot]['ngram'] = ngram_size
                        best_slot_params[dom][slot]['sensitivity'] = s
                        best_slot_params[dom][slot]['scores'] = scores
                    else:
                        # Pick the result with the best f1_score 
                        if not ngram_size in slot_representatives_results[dom][slot][repr]:
                            slot_representatives_results[dom][slot][repr][ngram_size] = {}
                        slot_representatives_results[dom][slot][repr][ngram_size][s] = scores
                        if(best_slot_params[dom][slot]['scores'][2] < f1_score):
                            best_slot_params[dom][slot]['representative'] = repr
                            best_slot_params[dom][slot]['ngram'] = ngram_size
                            best_slot_params[dom][slot]['sensitivity'] = s
                            best_slot_params[dom][slot]['scores'] = scores


display(slot_representatives_results)
display(best_slot_params)

with open('best_similarity_params.pickle', 'wb') as best_results_f:
    pickle.dump(best_slot_params, best_results_f)

with open('similarity_params_results.pickle', 'wb') as results_f:
    pickle.dump(best_slot_params, results_f)
 

{'hotel': {'bookpeople': {'number people': {1: {0.0: [1.0, 0.0, 0.0],
     0.05: [1.0, 0.0, 0.0],
     0.1: [1.0, 0.0, 0.0],
     0.15000000000000002: [1.0, 0.0, 0.0],
     0.2: [1.0, 0.11764705882352941, 0.21052631578947367],
     0.25: [0.5714285714285714, 0.11428571428571428, 0.19047619047619044],
     0.30000000000000004: [0.1111111111111111,
      0.038461538461538464,
      0.05714285714285715],
     0.35000000000000003: [0.375, 0.0967741935483871, 0.15384615384615383],
     0.4: [0.7142857142857143, 0.14705882352941177, 0.24390243902439024],
     0.45: [0.5, 0.14705882352941177, 0.22727272727272727],
     0.5: [0.5625, 0.23684210526315788, 0.3333333333333333],
     0.55: [0.3333333333333333, 0.21428571428571427, 0.2608695652173913],
     0.6000000000000001: [0.5789473684210527,
      0.39285714285714285,
      0.46808510638297873],
     0.65: [0.43243243243243246, 0.5161290322580645, 0.47058823529411764],
     0.7000000000000001: [0.4, 0.7894736842105263, 0.5309734513274337],
  

{'hotel': {'bookpeople': {'representative': 'number people',
   'ngram': 5,
   'sensitivity': 0.6000000000000001,
   'scores': [0.5555555555555556, 0.8108108108108109, 0.6593406593406594]},
  'type': {'representative': 'hotel',
   'ngram': 4,
   'sensitivity': 0.65,
   'scores': [1.0, 0.8529411764705882, 0.9206349206349206]},
  'name': {'representative': 'place to stay',
   'ngram': 1,
   'sensitivity': 0.65,
   'scores': [0.05128205128205128, 0.125, 0.07272727272727274]},
  'bookday': {'representative': 'monday tuesday wednesday thursday friday saturday sunday',
   'ngram': 2,
   'sensitivity': 0.65,
   'scores': [0.84375, 0.7941176470588235, 0.8181818181818182]},
  'area': {'representative': 'north east south west center',
   'ngram': 2,
   'sensitivity': 0.55,
   'scores': [0.6428571428571429, 0.6923076923076923, 0.6666666666666666]},
  'bookstay': {'representative': '1 2 3 4 5 days',
   'ngram': 1,
   'sensitivity': 0.75,
   'scores': [0.6875, 0.5238095238095238, 0.5945945945945946

The first output cell above are the precision, recall and f1 scores of each slot for each of the slot representative, ngram size and sensitivity tested. The second output cell are the best parameters for each slot. The metrics were calculated in an isolated manner, meaning that on one iteration of the training loop only one slot was evaluated and using only the ground truth corresponding to that slot. The functions used to calculate the scores were the ones provided in the evaluation notebooks.

# Similarity approach evaluation

In [15]:
slot_sample = slot_extraction_data.sample(500)

slot_sample['predictions'] = slot_sample.apply(predict_similarity_df, axis=1)

In [13]:
slot_sample.to_csv('similarity_predictions.csv')

## Bad examples

| utterance | ground truth | predictions |
|-----------|--------------|-------------|
|I need a place to stay in the south of town.       |[(hotel-area, south)]| {(hotel-area, north)}|
|I would like a 4 star rating, but price doesn't really matter. What are the prices? | [('hotel-stars', '4'), ('hotel-pricerange', '?')]|{('hotel-name', "doesn't"), ('hotel-bookpeople', '1'), ('hotel-bookstay', '2'), ('hotel-pricerange', 'cheap')}|
|Is Monday available?| [('hotel-bookday', 'monday')] | {('hotel-bookday', 'tuesday')}|
|I need to find a restaurant that serves Indian food in the same area as the hotel|[('restaurant-food', 'indian'), ('restaurant-area', 'east')]|{('restaurant-area', 'centre')}|

## Good examples

| utterance | ground truth | predictions |
|-----------|--------------|-------------|
|I'm so hungry! Can you find me an expensive restaurant?|[('restaurant-pricerange', 'expensive')]|{('restaurant-pricerange', 'expensive')}|
|Yes try 15:30 on saturday and need a reference number thank you.|[('restaurant-booktime', '15:30'), ('restaurant-ref', '?')]|{('restaurant-bookday', 'saturday'), ('restaurant-food', 'saturday')}|
|Ok, how about an expensive british restaurant?|[('restaurant-pricerange', 'expensive'), ('restaurant-food', 'british')]|{('restaurant-pricerange', 'expensive'), ('restaurant-food', 'british')}|


## Global evaluation metrics for the similarity approach

In [16]:
metrics = get_evaluation_metrics(slot_sample)
print(f"Precision: {metrics[0]}")
print(f"Recall: {metrics[1]}")
print(f"F1-score: {metrics[2]}")

Accuracy: 0.5873417721518988
Recall: 0.4496124031007752
F1-score: 0.5093304061470912
