In [1]:
import os
import sys
import math
import logging
from pathlib import Path

import numpy as np
import scipy as sp
import sklearn

%load_ext autoreload
%autoreload 2

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# import seaborn as sns
# sns.set_context("poster")
# sns.set(rc={"figure.figsize": (16, 9.)})
# sns.set_style("whitegrid")

logging.basicConfig(level=logging.INFO, stream=sys.stdout)

**PLEASE** save this file right now using the following naming convention: `NUMBER_FOR_SORTING-YOUR_INITIALS-SHORT_DESCRIPTION`, e.g. `1.0-fw-initial-data-exploration`. Use the number to order the file within the directory according to its usage.

# Setup

In [2]:
MULTWIOZ21_PATH = "/home/mifs/ac2123/dev/crazyneuraluser/baselines/data/multiwoz21/data.json"

In [3]:
os.chdir("/home/mifs/ac2123/dev/crazyneuraluser/baselines/")

## Question: Are goal changes consistently annotated in MultiWOZ 2.1?

In [4]:
from prettyprinter import pprint

from utils import load_goals, CorpusGoalGenerator

In [5]:
goal_id_subsample = ['MUL2674.json']

In [6]:
goals = load_goals(MULTWIOZ21_PATH)
# goals = {g_id: goals[g_id] for g_id in goal_id_subsample}

An example of raw goal from the corpus. Note the difference between `info` and `fail_info` vs `book` and `fail_book` constraints:

In [7]:
pprint(goals['MUL2674.json']["goal"])

{
    'taxi': {},
    'police': {},
    'hospital': {},
    'hotel': {
        'info': {
            'type': 'guesthouse',
            'stars': '4',
            'parking': 'yes'
        },
        'fail_info': {
            'type': 'hotel',
            'stars': '5',
            'parking': 'yes'
        },
        'book': {
            'pre_invalid': True,
            'people': '8',
            'day': 'monday',
            'invalid': False,
            'stay': '2'
        },
        'fail_book': {'stay': '3'}
    },
    'attraction': {
        'info': {'type': 'college'},
        'reqt': ['area', 'postcode'],
        'fail_info': {}
    },
    'train': {},
    'message': [
        'You are looking for information in Cambridge',
        "You are looking for a <span class='emphasis'>place to stay</span>. "
            "The hotel should have <span class='emphasis'>a star of 5</span> and "
            "should <span class='emphasis'>include free parking</span>",
        "The hotel should be 

The code below is implmented in `CorpusGoalGenerator.standardise_goal`:

In [8]:
fields = ["info", "book"]
fail_book_goals, fail_info_goals, total_goals = 0, 0, 0
inconsistent_goals = {field: 0 for field in fields}
for goal_id in goals:
    total_goals += 1
    corpus_goal = goals[goal_id]["goal"]
    CorpusGoalGenerator._clean_goal(corpus_goal)
    for domain in corpus_goal:
        domain_goal = corpus_goal[domain]
        for field in fields:
            if field in domain_goal and f"fail_{field}" in domain_goal:
                if field == 'book':
                    fail_book_goals += 1
                else:
                    fail_info_goals += 1
                assert domain_goal[field]
                assert domain_goal[f"fail_{field}"]
                field_keys = domain_goal[field].keys()
                fail_field_keys = domain_goal[f"fail_{field}"].keys()
                # check if the keys are the same in both * and fail_*
                if field_keys != fail_field_keys:
                    inconsistent_goals[field] += 1
                # make sure slots are repeated across the two subgoals
                for slot in field_keys:
                    if slot not in fail_field_keys:
                        domain_goal[f"fail_{field}"][slot] = domain_goal[field][
                            slot
                        ]
pprint(f"There were {total_goals} goals ..")
pprint(f"There were {fail_book_goals} dialogues with booking failure")
pprint(f"Book inconsistent: {inconsistent_goals['book']}")
pprint(f"There were {fail_info_goals} dialogues with info failure")
pprint(f"Info inconsistent: {inconsistent_goals['info']}")

'There were 10438 goals ..'
'There were 1087 dialogues with booking failure'
'Book inconsistent: 1087'
'There were 4141 dialogues with info failure'
'Info inconsistent: 0'


Goal after standardisation. Note that `info`/`fail_info` and `book/fail_book` follow the same convention: slots are copied from `fail_book` to `book` if they are not already there with a different value. This ensures consistent evaluator behaviour across all subdomains.

In [9]:
pprint(goals['MUL2674.json']["goal"])

{
    'hotel': {
        'info': {
            'type': 'guesthouse',
            'stars': '4',
            'parking': 'yes'
        },
        'fail_info': {
            'type': 'hotel',
            'stars': '5',
            'parking': 'yes'
        },
        'book': {
            'people': '8',
            'day': 'monday',
            'stay': '2'
        },
        'fail_book': {
            'stay': '3',
            'people': '8',
            'day': 'monday'
        }
    },
    'attraction': {
        'info': {'type': 'college'},
        'reqt': ['area', 'postcode']
    }
}
