In [1]:
import os
import re
import ast
import json
import nltk
import string
import openai
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tqdm import tqdm
from langchain import PromptTemplate

from src.DST.evaluate_utils import remapping
from src.DST.dst import SLOTS_DESCRIPTIONS, SLOTS_REVERSE_REMAPPING
from src.config import CONFIG

from dataclasses import dataclass, field
from typing import Optional
from transformers import TrainingArguments
from src.DST.evaluate_utils import unpack_belief_states, fix_typos, nested_fix, remapping


pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', 500)



@dataclass
class ModelArguments:
    """
    Arguments pertaining to which model/config/tokenizer we are going to utilize.
    """
    model_name_or_path: Optional[str] = field(
        default="openai/gpt-3.5-turbo",
        metadata={"help": "The path of the HuggingFace model."}
    )
    use_int8: Optional[bool] = field(
        default=False,
        metadata={"help": "Whether to use int8 model or not."}
    )
    use_deepspeed: Optional[bool] = field(
        default=False,
        metadata={"help": "Whether to use deepspeed model or not."}
    )
    

@dataclass
class DataArguments:
    """
    Arguments pertaining to the data loading and preprocessing pipeline.
    """
    dataset_name: Optional[str] = field(
        default=None,
        metadata={"help": "Train dataset path"}
    )
    dataset_names: Optional[str] = field(
        default=None,
        metadata={"help": "Train dataset paths"}
    )
    root_data_path: Optional[str] = field(
        default="./data", metadata={"help": "The path to the data directory."},
    )
    mwoz_path: Optional[str] = field(
        default="/home/willy/instructod/MultiWOZ_2.1/",
        metadata={"help": "MWOZ path"}
    )
    dialog_history_limit_dst: Optional[int] = field(
        default=0,
        metadata={"help": "Lenght of dialogue history for dst"}
    )
    dialog_history_limit_dst_recorrect: Optional[int] = field(
        default=0,
        metadata={"help": "Lenght of dialogue history for dst update"}
    )
    dialog_history_limit_rg: Optional[int] = field(
        default=20,
        metadata={"help": "Lenght of dialogue history for response generation"}
    )
    dialog_history_limit_e2e: Optional[int] = field(
        default=20,
        metadata={"help": "Lenght of dialogue history for e2e"}
    )
    single_domain_only: Optional[bool] = field(
        default=False,
        metadata={"help": "Whether to keep only the single domain sample or not"}
    )
    with_slot_description: Optional[bool] = field(
        default=False,
        metadata={"help": "Whether to use slot description or not for DST"}
    )
    with_req_inf_differentiation: Optional[bool] = field(
        default=False,
        metadata={"help": "Whether to differentiate between require and inform slot for DST"}
    )
    with_all_slots: Optional[bool] = field(
        default=True,
        metadata={"help": "Whether to use all slots or not"}
    )
    debug_mode: Optional[bool] = field(
        default=False,
        metadata={"help": "debug mode to only try 20 samples"}
    )
    start_idx: Optional[int] = field(
        default=0,
        metadata={"help": "Starting index to restart the prediction if needed"}
    )
    save_path: Optional[str] = field(
        default="results/",
        metadata={"help": "save path"}
    )
    save_every: Optional[int] = field(
        default=5,
        metadata={"help": "every step to save in case api fail"}
    )
    db_format_type: Optional[str] = field(
        default="1",
        metadata={"help": "1 is more precise, 2 is more concise for db integration"},
    )
    with_slot_domain_diff: Optional[bool] = field(
        default=False,
        metadata={"help": "differentiation between slot and domain"}
    )
    

@dataclass
class PromptingArguments(TrainingArguments):
    """
    Arguments pertraining to the prompting pipeline.
    """
    output_dir: Optional[str] = field(
        default="./out",
        metadata={"help": "Output directory"},
    )
    task: Optional[str] = field(
        default="dst",
        metadata={"help": "Task to perform"}
    )
    max_requests_per_minute: Optional[int] = field(
        default=20,
        metadata={"help": "Max number of requests for OpenAI API."}
    )
    openai_api_key_name: Optional[str] = field(
        default="OPENAI_API_KEY",
        metadata={"help": "OpenAI API key name."}
    )

class PromptConstructor():
    def __init__(self, 
                 config):
        self.config = config
        self.instructions = config["INSTRUCTIONS"]
        self.prompt_templates = config["PROMPT_TEMPLATES"]
        self.examples = config["EXAMPLES"]
        
    def _get_slots_from_domains(self, domains, ontology, with_slot_description, with_all_slots, with_slot_domain_diff):
        
        if with_all_slots:
            domains = ["restaurant", "train", "attraction", "hotel", "taxi"]
        
        slots = []
        for slot in list(ontology.keys()):
            splitted_slot = slot.split("-")
            if splitted_slot[0] in domains:
                if with_slot_domain_diff:
                    if splitted_slot[-1] not in slots:
                        slots.append(splitted_slot[-1])
                else:
                    slots.append(splitted_slot[0] + "-" + splitted_slot[-1])
        
        slots_info = []
        added_slots = []
        if with_slot_description:
            for slot in slots:
                splitted_slot = slot.split("-")
                if with_slot_domain_diff:
                    if slot in added_slots:
                        continue
                    slots_info.append(f"name: {slot}, description: {SLOTS_DESCRIPTIONS[slot.lower()]}")
                    added_slots.append(slot)
                else:
                    slots_info.append(f"name: {slot}, description: {SLOTS_DESCRIPTIONS[splitted_slot[1].lower()]}")

                    
            slots = slots_info
        
        slots_prompt = "\n".join(slots)
        if with_slot_domain_diff:
            return slots_prompt + f"\n\nDOMAINS: {', '.join(domains)}"
        else:
            return slots_prompt
            
                
                
#         if with_all_slots:
#             domains = "all"
        
#         if with_slot_description:
#             with_req_inf_differentiation = False #Slot description is the discriminator

#         if domains == "all":
#             if with_req_inf_differentiation:
#                 req_slots = ", ".join(self.config["multiwoz21"]["all_requestable_slots"])
#                 inf_slots = ", ".join(self.config["multiwoz21"]["all_informable_slots"])
#             else:
#                 slots = set(self.config["multiwoz21"]["all_requestable_slots"] + 
#                             self.config["multiwoz21"]["all_informable_slots"])
#                 slots = ", ".join(slots)
#         elif not isinstance(domains, list):
#             raise ValueError("""Provided domain should be either 'all' or list of valid domain names:
#                                 - for multiwoz2.1 and 2.4: taxi, restaurant, hotel, train, attraction""")
#         else:
#             req_slots = ""
#             inf_slots = ""
#             domain_req_slots = []
#             domain_inf_slots = []
#             for domain in domains:
#                 domain_req_slots += self.config["multiwoz21"]["requestable_slots"][domain]
#                 domain_inf_slots += self.config["multiwoz21"]["informable_slots"][domain]
#             if with_req_inf_differentiation:
#                 domain_req_slots = set(domain_req_slots)
#                 domain_inf_slots = set(domain_inf_slots)
#                 req_slots += ", ".join(domain_req_slots)
#                 inf_slots += ", ".join(domain_inf_slots)
#             else:
#                 slots = set(domain_req_slots + domain_inf_slots)
#                 slots = ", ".join(slots)

#         if with_req_inf_differentiation:
#             slots_info = f"Requestable slots: {req_slots}\nInformable slots: {inf_slots}"
#         else:
#             slots_info = f"{slots}"

#         if with_slot_description:
#             slots = slots.split(", ")
#             slots_info = ""
#             for slot in slots:
#                 if slot not in self.config["multiwoz21"]["all_informable_slots"]:
#                     continue
#                 slots_info += f"name: {slot}, description: {SLOTS_DESCRIPTIONS[slot]}\n"
#             slots_info = slots_info[:-2]
        
#         return slots_info
    
    
    def _build_prompt(self, mode="", example="", dialogue_context="", ontology="", slots="", dialogue_acts="", belief_states="", database=""):
        prompt = ""
        if mode == "dst":
            instruction = self.instructions["instruction_with_slots"]
            template_variables = self.prompt_templates["template_with_slots"]
            template = PromptTemplate(input_variables= template_variables["input_variables"],
                                      template = template_variables["template"])
            prompt = template.format(instruction=instruction,
                                     slots=slots,
                                     example=example,
                                     dialogue_context=dialogue_context)
            
        elif mode == "dst_recorrect":
            instruction = self.instructions["instruction_with_slots_recorrect"]
            template_variables = self.prompt_templates["template_with_slots_recorrect"]
            template = PromptTemplate(input_variables= template_variables["input_variables"],
                                      template = template_variables["template"])            
            prompt = template.format(instruction=instruction,
                                    slots=slots,
                                    dialogue_context=dialogue_context,
                                    belief_states=belief_states)
            
        elif mode == "database_query":
            instruction = self.instructions["instruction_query_database"]
            template_variables = self.prompt_templates["template_query_database"]
            template = PromptTemplate(input_variables= template_variables["input_variables"],
                                      template = template_variables["template"])
            prompt = template.format(instruction=instruction,
                                     belief_states=belief_states)
            
        elif mode == "response_generation":
            example = self.config["EXAMPLES"]["response_generation"]
            
            instruction = self.instructions["instruction_response_generation"]
            template_variables = self.prompt_templates["template_response_generation"]
            template = PromptTemplate(input_variables = template_variables["input_variables"],
                                      template = template_variables["template"])
            prompt = template.format(instruction=instruction,
                                     example=example,
                                     dialogue_context=dialogue_context)
        elif mode == "e2e":
            instruction = self.instructions["instruction_e2e"]
            template_variables = self.prompt_templates["template_e2e"]
            template = PromptTemplate(input_variables = template_variables["input_variables"],
                                      template = template_variables["template"])
            prompt = template.format(instruction=instruction,
                                     database=database,
                                     dialogue_context=dialogue_context)

        else:
            raise ValueError("'mode' should be one of: [dst, dst_recorrect, database_query, response_generation, e2e]")
        
        return prompt


class MWOZ_Dataset(PromptConstructor):
    def __init__(self,
                 config,
                 data_args):
        PromptConstructor.__init__(self, config)
        self.dataset = {"id":[],
                        "dialogue_id":[],
                        "dialogue_context":[],
                        "turn":[],
                        "prompt_dst":[],
                        "prompt_dst_update":[],
                        "prompt_rg":[],
                        "prompt_e2e":[],
                        "domains":[],
                        "turn_domain":[],
                        "gold_turn_bs":[],
                        "gold_bs":[],
                        "gold_act":[],
                        "gold_response":[],
                        "gold_database_result":[],
                        }
        
        print("Loading data...")
        self.all_data, self.testfiles, self.system_acts, self.ontology = self._get_mwoz_data(data_args.mwoz_path)
        print("Loading databases...")
        self.dbs_lexicalized = self._get_dbs_lexicalized(data_args.mwoz_path, data_args.db_format_type)
        self.idx = 0
        self.dialog_history_limit_dst = data_args.dialog_history_limit_dst
        self.dialog_history_limit_rg = data_args.dialog_history_limit_rg
        self.dialog_history_limit_e2e = data_args.dialog_history_limit_e2e
        self.single_domain_only = data_args.single_domain_only
        self.with_slot_description = data_args.with_slot_description
        self.with_slot_domain_diff = data_args.with_slot_domain_diff
        self.with_all_slots = data_args.with_all_slots
        self.all_domains = ["restaurant", "taxi", "hotel", "train", "attraction"]

        print("Processing mwoz...")
        for sample in tqdm(self.all_data):
            if sample in self.testfiles:
                dialogue_log = self.all_data[sample]["log"]
                self._process_dialogue_log(sample=sample,
                                           dialogue_log=dialogue_log)

        self.dataset = pd.DataFrame(self.dataset)
        if self.single_domain_only:
            for index, row in tqdm(self.dataset.iterrows()):
                if "sng" not in row["dialogue_id"].lower():
                    self.dataset.drop(index, inplace=True)

        for index, row in self.dataset.iterrows():
            if row["turn_domain"] == "":
                self.dataset.loc[index, 'turn_domain'] = row["domains"][0]

                    
    def _get_mwoz_data(self, mwoz_path):
        data_path = os.path.join(mwoz_path, "data.json")
        testListFile_path = os.path.join(mwoz_path, "testListFile.txt")
        system_acts_path = os.path.join(mwoz_path, "system_acts.json")
        ontology_path = os.path.join(mwoz_path, "ontology.json")

        with open(data_path, "r") as f:
            all_data = json.load(f)
            
        with open(testListFile_path, "r") as f:
            testfiles = f.read()
        testfiles = testfiles.split("\n")
        
        with open(system_acts_path, "r") as f:
            system_acts = json.load(f)
            
        with open(ontology_path, "r") as f:
            ontology = json.load(f)
            
        return all_data, testfiles, system_acts, ontology
    
    def _get_dbs_lexicalized(self, mwoz_path, format_type):
        domains = ["restaurant", "hotel", "train", "attraction"]
        keep_data = {"restaurant":["address", "area", "food", "name", "pricerange", "phone", "postcode"],
                    "attraction":["name", "area", "address", "type", "postcode"],
                    "hotel":["name", "address", "area", "phone", "postcode", "pricerange", "stars"],
                    "train":["departure", "destination"]}
        dbs_lexicalized = {}
        for domain in domains:
            db_path = os.path.join(mwoz_path, f"{domain}_db.json")
            with open(db_path, "r") as f:
                db_data = json.load(f)

            db_lexicalized = []
            if format_type == "1":
                for row in db_data:
                    row_keep = []
                    for key in keep_data[domain]:
                        if key in row:
                            row_keep.append(f"{key}: {row[key]}")
                    db_lexicalized.append(", ".join(row_keep))
            
            elif format_type == "2":
                #more concise db to fit in context length limit
                db_lexicalized.append(", ".join(keep_data[domain]))
                for row in db_data:
                    row_keep = []
                    for key in keep_data[domain]:
                        if key in row:
                            row_keep.append(f"{row[key]}")
                    db_lexicalized.append(", ".join(row_keep))
                    # db_lexicalized.append(", ".join([f"{row[key]}" for key in keep[domain]]))
            dbs_lexicalized[domain] = "\n".join(set(db_lexicalized))

        return dbs_lexicalized
    
    def _process_dialogue_log(self, sample, dialogue_log):

        dialog_history_memory_dst = []
        dialog_history_memory_rg = []
        dialog_history_memory_e2e = []
        dialog_history_dst = ""
        dialog_history_rg = ""
        dialog_history_e2e = ""
        turn_domain = ""
        domains = self._get_domains_from_log(dialogue_log)
        slots = self._get_slots_from_domains(domains=domains, 
                                             ontology=self.ontology,
                                             with_slot_description=self.with_slot_description,
                                             with_slot_domain_diff=self.with_slot_domain_diff,
                                             with_all_slots=self.with_all_slots) # or all
        if self.dialog_history_limit_dst == 0:
            example = self.examples["dst_dh0"]
        else:
            example = self.examples["dst_dh-1"]

        for turn_nb, turn in enumerate(dialogue_log):

            if turn_nb % 2 == 0:
                speaker = "USER"
            else:
                speaker = "SYSTEM"
            
            utterance = f"""{speaker}: {turn["text"]}\n"""
            dialog_act = turn["dialog_act"]
            cur_system_act = self.system_acts[sample.split(".")[0]][str((turn_nb//2)+1)]
            
            dialogue_context_dst = dialog_history_dst + utterance
            prompt_dst = self._build_prompt(mode="dst",
                                            slots=slots,
                                            example=example,
                                            dialogue_context=dialogue_context_dst)
            
            lexicalized_act = self._lexicalize_act(cur_system_act)
            dialogue_context_rg = dialog_history_rg + utterance + f"ACT:{lexicalized_act}\nSYSTEM:"
            prompt_rg = self._build_prompt(mode="response_generation",
                                            dialogue_context=dialogue_context_rg)
            
            dialogue_context_e2e = dialog_history_e2e + utterance + "SYSTEM:"
    
            turn_domain = self._get_domain_from_turn(turn_domain, cur_system_act)
            if turn_domain and turn_domain != "taxi":
                database = self.dbs_lexicalized[turn_domain]
            else:
                database = ""
            prompt_e2e = self._build_prompt(mode="e2e",
                                            database=database,
                                            dialogue_context=dialogue_context_e2e).replace("\n\n\n", "\n")

            dialog_history_dst, dialog_history_memory_dst = self._update_dialogue_memory(utterance, 
                                                                                         dialogue_log, 
                                                                                         self.dialog_history_limit_dst, 
                                                                                         dialog_history_memory_dst)
            dialog_history_rg, dialog_history_memory_rg = self._update_dialogue_memory(utterance, 
                                                                                       dialogue_log, 
                                                                                       self.dialog_history_limit_rg,
                                                                                       dialog_history_memory_rg)
            dialog_history_e2e, dialog_history_memory_e2e = self._update_dialogue_memory(utterance, 
                                                                                         dialogue_log, 
                                                                                         self.dialog_history_limit_e2e, 
                                                                                         dialog_history_memory_e2e) 
                
            metadata = turn["metadata"]
            bspn = {}
            if metadata:
                for domain in domains:
                    for k, v in metadata[domain].items():
                        for slot, value in v.items():
                            if isinstance(value, str) and value not in ["", "not mentioned", "none"]:
                                bspn[domain+"-"+slot] = value
            self.idx += 1
            if turn_nb % 2 == 0:
                self.dataset["gold_turn_bs"].append(dialog_act)
                self.dataset["dialogue_context"].append(dialogue_context_dst)
                self.dataset["gold_database_result"].append(None) 
                self.dataset["turn"].append(turn_nb//2)
                self.dataset["domains"].append(domains)
                self.dataset["id"].append(self.idx//2)
                self.dataset["dialogue_id"].append(sample)
                self.dataset["prompt_dst"].append(prompt_dst)
                self.dataset["prompt_dst_update"].append(prompt_dst)
                self.dataset["prompt_rg"].append(prompt_rg)
                self.dataset["prompt_e2e"].append(prompt_e2e)
                self.dataset["turn_domain"].append(turn_domain)
            else:
                self.dataset["gold_response"].append(utterance)
                self.dataset["gold_bs"].append(bspn)
                self.dataset["gold_act"].append(dialog_act)

    def _update_dialogue_memory(self, utterance, dialogue_log, dialog_history_limit, dialog_history_memory):
        if dialog_history_limit != 0:
            if dialog_history_limit == -1:
                dialog_history_limit = len(dialogue_log)
            if len(dialog_history_memory) >= dialog_history_limit:
                dialog_history_memory.pop(0)
            dialog_history_memory.append(utterance)

        dialog_history = "".join(dialog_history_memory)
        return dialog_history, dialog_history_memory
    
    def _lexicalize_act(self, act):
        if act == "No Annotation":
            return "None"
        
        lexicalized_acts = []
        lexicalize_mapping = {"leave": "leave time",
                              "arrive":"arrival time",
                              "departure":"departure place",
                              "post":"postcode",
                              "addr":"address"}

        for act, slot_values in act.items():


            if "request" in act.lower():
                requests = []
                for (slot, value) in slot_values:
                    slot = slot.lower()
                    if slot in lexicalize_mapping:
                        slot = lexicalize_mapping[slot]
                    if slot == "none":
                        break
                    else:
                        requests.append(slot)
                if requests:
                    lexicalized_act = "Request the user about " + ", ".join(requests) + "."
                    lexicalized_acts.append(lexicalized_act)

            elif "recommend" in act.lower():
                recommends = []
                for (slot, value) in slot_values:
                    slot, value = slot.lower(), value.lower()
                    if slot in lexicalize_mapping:
                        slot = lexicalize_mapping[slot]
                    if slot == "none":
                        break
                    else:
                        recommends.append(value)
                if recommends:
                    lexicalized_act = "Recommend the user for " + ", ".join(recommends) + "."
                    lexicalized_acts.append(lexicalized_act)

            elif "inform" in act.lower():
                informs = []
                for (slot, value) in slot_values:
                    slot, value = slot.lower(), value.lower()
                    if slot in lexicalize_mapping:
                        slot = lexicalize_mapping[slot]
                    if slot == "none":
                        break
                    else:
                        informs.append(f"the {slot} is {value}")
                if informs:
                    lexicalized_act = "Inform the user that " + ", ".join(informs) + "."  
                    lexicalized_acts.append(lexicalized_act)

            else:
                pass
        if lexicalized_acts:
            return " ".join(lexicalized_acts)
        else:
            return "None"
        
    def _get_domain_from_turn(self, domain, act):
        for k in act:
            turn_domain = k.lower().split("-")[0]
            if turn_domain in self.all_domains:
                return turn_domain
        return domain
            

    def _get_domains_from_log(self, dialogue_log):
        domains = []
        for log in dialogue_log:
            for domain_act in log["dialog_act"]:
                domain = domain_act.split("-")[0].lower()
                if domain in self.all_domains and domain not in domains:
                    domains.append(domain)
        return domains
                
                
def evaluate_dst(results_df, vocal=True, save_path=None):
    global_turns = 0    
    global_jga = 0
    results_single_domain = {"taxi":{"turns":0, "correct_turns_jga":0, "correct_slots":0, "total_slots":0, "slot_f1":0},
                            "restaurant":{"turns":0, "correct_turns_jga":0, "correct_slots":0, "total_slots":0, "slot_f1":0},
                            "hotel":{"turns":0, "correct_turns_jga":0, "correct_slots":0, "total_slots":0, "slot_f1":0},
                            "train":{"turns":0, "correct_turns_jga":0, "correct_slots":0, "total_slots":0, "slot_f1":0},
                            "attraction":{"turns":0, "correct_turns_jga":0, "correct_slots":0, "total_slots":0, "slot_f1":0},
                            "all":{"global_turns":0, "global_f1":0}}
    
    for _, row in results_df.iterrows():
        unpacked_gold = unpack_belief_states(row["gold_bs"], "gold")
        unpacked_pred = unpack_belief_states(row["preds"], "pred")
        domains = row["domains"]
        if isinstance(domains, str):
            domains = ast.literal_eval(domains)

        if set(unpacked_gold)==set(unpacked_pred):
            global_jga += 1
            if len(domains) == 1:
                results_single_domain[domains[0]]["correct_turns_jga"] += 1

        gold_values = [gold.split("-")[1] for gold in unpacked_gold]
        pred_values = [pred.split("-")[1] for pred in unpacked_pred]
        F1, recall, precision = compute_prf(gold_values, pred_values)
        if len(domains) == 1:
            results_single_domain[domains[0]]["slot_f1"] += F1
            results_single_domain[domains[0]]["turns"] += 1
        results_single_domain["all"]["global_f1"] += F1
        results_single_domain["all"]["global_turns"] += 1
        global_turns += 1

    total_single_domain_jga = 0
    total_single_domain_turns = 0
    for domain in results_single_domain:
        if domain == "all":
            continue
        domain_slot_f1 = results_single_domain[domain]["slot_f1"]
        domain_jga = results_single_domain[domain]["correct_turns_jga"]
        domain_turns = results_single_domain[domain]["turns"]
        total_single_domain_jga += domain_jga
        total_single_domain_turns += domain_turns
        results_single_domain[domain]["JGA"] = domain_jga/domain_turns
        results_single_domain[domain]["SLOT-F1"] = domain_slot_f1/domain_turns

        if vocal:
            print(f"""For {domain}, JGA: {results_single_domain[domain]["JGA"]} - SLOT-F1: {results_single_domain[domain]["SLOT-F1"]}""")
    jga_single_domain_average = total_single_domain_jga/total_single_domain_turns
    jga_average = global_jga/global_turns    
    slot_f1_average = results_single_domain["all"]["global_f1"] / results_single_domain["all"]["global_turns"]
    if vocal:
        print(f"""Average JGA in single domain samples only: {jga_single_domain_average}""")
        print(f"""Average JGA overall: {jga_average}""")
        print(f"""Average Slot F1 Overall: {slot_f1_average}""")

    results = results_single_domain
    results["JGA_single_domain_average"] = jga_single_domain_average
    results["JGA_average"] = jga_average

    return results


def completion(prompt, model):            
    completion = openai.ChatCompletion.create(
            model=model,
            messages=[
                {"role": "user", "content": prompt}
            ],
            temperature=0
        )
    response = completion.choices[0].message.content.strip()
    return response    

2023-06-21 14:03:10.587187: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-06-21 14:03:11.432214: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-06-21 14:03:11.432312: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory


In [2]:
model_args = ModelArguments()
data_args = DataArguments()
data_args.single_domain_only = True
data_args.dialog_history_limit_dst = 0
data_args.dialog_history_limit_e2e = -1
data_args.dialog_history_limit_rg = -1
data_args.with_slot_domain_diff = False
data_args.with_all_slots = True
data_args.with_slot_description = False

In [3]:
#load mwoz21
mwoz = MWOZ_Dataset(CONFIG, data_args)
dataset = mwoz.dataset

Loading data...
Loading databases...
Processing mwoz...


100%|██████████| 10438/10438 [00:02<00:00, 4062.08it/s]
7372it [00:08, 914.61it/s] 


In [7]:
len(dataset)

1053

In [5]:
#dataset format for multiwoz2.1
dataset.head(2)

Unnamed: 0,id,dialogue_id,dialogue_context,turn,prompt_dst,prompt_dst_update,prompt_rg,prompt_e2e,domains,turn_domain,gold_turn_bs,gold_bs,gold_act,gold_response,gold_database_result
0,0,SNG0073.json,USER: I would like a taxi from Saint John's co...,0,Generate the belief state of the very last dia...,Generate the belief state of the very last dia...,"In a task oriented dialogue setting, generate ...",Generate the answer of the SYSTEM in the follo...,[taxi],taxi,"{'Taxi-Inform': [['Dest', 'pizza hut fen ditto...",[taxi] dest pizza hut fenditton depart saint j...,"{'Taxi-Request': [['Leave', '?'], ['Arrive', '...",SYSTEM: What time do you want to leave and wha...,
1,1,SNG0073.json,USER: I want to leave after 17:15.\n,1,Generate the belief state of the very last dia...,Generate the belief state of the very last dia...,"In a task oriented dialogue setting, generate ...",Generate the answer of the SYSTEM in the follo...,[taxi],taxi,"{'Taxi-Inform': [['Leave', '17:15']]}",[taxi] leave 17:15 dest pizza hut fenditton de...,"{'Taxi-Inform': [['Car', 'blue honda'], ['Phon...",SYSTEM: \nBooking completed! your taxi will be...,


In [26]:
rg_results_gpt4 = pd.read_csv("/home/willy/instructod/src/RG/results/gpt-4_rg_full_output.csv")
rg_results_gpt3 = pd.read_csv("/home/willy/instructod/src/RG/results/gpt-3.5-turbo_rg_full_output_latestSave.csv")

In [27]:
rg_results_gpt4.head(0)

Unnamed: 0.1,Unnamed: 0,id,dialogue_id,dialogue_context,turn,prompt_dst,prompt_dst_update,prompt_rg,prompt_e2e,domains,turn_domain,gold_turn_bs,gold_bs,gold_act,gold_response,gold_database_result,preds


In [13]:
for idx, row in rg_results_gpt4.iterrows():
    print(row["gold_response"])
    print(row["preds"])
    print("=============")

SYSTEM: What time do you want to leave and what time do you want to arrive by?

When would you like to leave from Saint John's College and what time do you want to arrive at Pizza Hut Fen Ditton?
SYSTEM: 
Booking completed! your taxi will be blue honda Contact number is 07218068540

A blue Honda taxi will be available for you. You can contact the driver at 07218068540.
SYSTEM: You are welcome.  Is there anything else I can help you with today?

You're welcome! If you have any more questions or need further assistance, feel free to ask. Enjoy your ride!
SYSTEM: you too! thank you

You too! Have a great day! If you need any assistance in the future, feel free to ask. Goodbye!
SYSTEM: I don't seem to be finding anything called Nusha.  What type of food does the restaurant serve?

What type of food are you looking for at Nusha?
SYSTEM: Could you double check that you've spelled the name correctly? The closest I can find is Nandos.

I found a restaurant called Nandos, is that the one you me

In [66]:
import string
translator = str.maketrans("", "", string.punctuation)

idx = 4
gold = rg_results_gpt4['gold_response'].tolist()[idx].replace("SYSTEM: ", "").replace("\n", "").lower().translate(translator)
pred = rg_results_gpt4['preds'].tolist()[idx].lower().translate(translator)
# pred = rg_results_gpt4['preds'].tolist()[idx].lower()
print(gold)
print(pred)
print("=====")
print(nltk.word_tokenize(gold.lower()))
print(nltk.word_tokenize(pred.lower()))

i dont seem to be finding anything called nusha  what type of food does the restaurant serve
what type of food are you looking for at nusha
=====
['i', 'dont', 'seem', 'to', 'be', 'finding', 'anything', 'called', 'nusha', 'what', 'type', 'of', 'food', 'does', 'the', 'restaurant', 'serve']
['what', 'type', 'of', 'food', 'are', 'you', 'looking', 'for', 'at', 'nusha']


In [67]:
# sentence_bleu(nltk.word_tokenize(gold), nltk.word_tokenize(pred), weights=(1, 0, 0, 0))
sentence_bleu([nltk.word_tokenize(gold)], nltk.word_tokenize(pred))

0.13793247918215468

In [82]:
bleu_scores = []
for i in tqdm(range(len(rg_results_gpt4))):
    gold = rg_results_gpt4['gold_response'].tolist()[i].replace("SYSTEM: ", "").replace("\n", "").lower().translate(translator)
    try:
        pred = rg_results_gpt4['preds'].tolist()[i].lower().translate(translator)
    except:
        print(pred)
    bleu_score = sentence_bleu([nltk.word_tokenize(gold)], nltk.word_tokenize(pred), weights=(0, 0, 0, 1))
    bleu_scores.append(bleu_score)
    

The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
100%|██████████| 7372/7372 [00:05<00:00, 1446.46it/s]


In [83]:
#GPT4
sum(bleu_scores) / len(bleu_scores)

0.046774353549273895

In [81]:
#GPT3
sum(bleu_scores) / len(bleu_scores)

0.045344365404115794

In [85]:
rg_results_gpt4.head(2)

Unnamed: 0.1,Unnamed: 0,id,dialogue_id,dialogue_context,turn,prompt_dst,prompt_dst_update,prompt_rg,prompt_e2e,domains,turn_domain,gold_turn_bs,gold_bs,gold_act,gold_response,gold_database_result,preds
0,0,0,SNG0073.json,USER: I would like a taxi from Saint John's co...,0,Generate the belief state of the very last dia...,Generate the belief state of the very last dia...,"In a task oriented dialogue setting, generate ...",Generate the answer of the SYSTEM in the follo...,['taxi'],taxi,"{'Taxi-Inform': [['Dest', 'pizza hut fen ditto...",[taxi] dest pizza hut fenditton depart saint j...,"{'Taxi-Request': [['Leave', '?'], ['Arrive', '...",SYSTEM: What time do you want to leave and wha...,,When would you like to leave from Saint John's...
1,1,1,SNG0073.json,USER: I want to leave after 17:15.\n,1,Generate the belief state of the very last dia...,Generate the belief state of the very last dia...,"In a task oriented dialogue setting, generate ...",Generate the answer of the SYSTEM in the follo...,['taxi'],taxi,"{'Taxi-Inform': [['Leave', '17:15']]}",[taxi] leave 17:15 dest pizza hut fenditton de...,"{'Taxi-Inform': [['Car', 'blue honda'], ['Phon...",SYSTEM: \nBooking completed! your taxi will be...,,A blue Honda taxi will be available for you. Y...


In [11]:
# from word2number import w2n

# rg_results_gpt4 = pd.read_csv("/home/willy/instructod/src/RG/results/gpt-4_rg_full_output.csv")
# rg_results_gpt3 = pd.read_csv("/home/willy/instructod/src/RG/results/gpt-3.5-turbo_rg_full_output_latestSave.csv")
# rg_results_gpt3.dropna(subset=['preds'], inplace=True)

# def add_delexicalize_response(df_results):
#     import ast
#     delex_preds = []

#     for idx, row in df_results.iterrows():   
#         if isinstance(row["gold_act"], str):
#             gold_act = ast.literal_eval(row["gold_act"])
#         else:
#             gold_act = row["gold_act"]
#         pred = row["preds"].lower()
#         delex_pred = pred
#         for k, v in gold_act.items():
#             if "inform" in k.lower() or "recommend" in k.lower():
#                 for slot_values in v:
#                     placeholder = "[" + slot_values[0].lower() + "_value]"
#                     delex_pred = delex_pred.replace(slot_values[1].lower(), placeholder)
#                     if slot_values[1].lower() != "two 2":
#                         try:
#                             converted_nb = w2n.word_to_num(slot_values[1].lower())
#                             delex_pred = delex_pred.replace(str(converted_nb), placeholder)
#                         except:
#                             pass

#             else:
#                 continue
#         delex_preds.append(delex_pred)
        
#     df_results["delexicalized_preds"] = delex_preds
#     return df_results
                

    

In [4]:
# rg_results_gpt4 = pd.read_csv("/home/willy/instructod/src/RG/results/gpt-4_rg_full_output.csv")
# rg_results_gpt3 = pd.read_csv("/home/willy/instructod/src/RG/results/gpt-3.5-turbo_rg_full_output_latestSave.csv")

In [6]:
# rg_results_gpt4 = add_delexicalize_response(rg_results_gpt4)
# rg_results_gpt3 = add_delexicalize_response(rg_results_gpt3)

In [21]:
# for idx, row in rg_results_gpt4.iterrows():
#     print(row["gold_turn_bs"])
#     print(row["gold_act"])
#     print(row["delexicalized_preds"])
#     print("---------")

#     if idx > 10:
#         break

{'Taxi-Inform': [['Dest', 'pizza hut fen ditton'], ['Depart', "saint john 's college"]]}
{'Taxi-Request': [['Leave', '?'], ['Arrive', '?']]}
when would you like to leave from saint john's college and what time do you want to arrive at pizza hut fen ditton?
---------
{'Taxi-Inform': [['Leave', '17:15']]}
{'Taxi-Inform': [['Car', 'blue honda'], ['Phone', '07218068540']]}
a [car_value] taxi will be available for you. you can contact the driver at [phone_value].
---------
{'general-thank': [['none', 'none']]}
{'general-reqmore': [['none', 'none']]}
you're welcome! if you have any more questions or need further assistance, feel free to ask. enjoy your ride!
---------
{'general-bye': [['none', 'none']]}
{'general-bye': [['none', 'none']]}
you too! have a great day! if you need any assistance in the future, feel free to ask. goodbye!
---------
{'Restaurant-Inform': [['none', 'none']], 'Attraction-Inform': [['Name', 'Nusha']]}
{'Restaurant-Request': [['Food', '?']], 'Restaurant-NoOffer': [['Na

In [5]:
rg_results_gpt4 = pd.read_csv("/home/willy/instructod/src/RG/results/gpt-4_rg_full_output.csv")
rg_results_gpt3 = pd.read_csv("/home/willy/instructod/src/RG/results/gpt-3.5-turbo_rg_full_output_latestSave.csv")
e2e_results = pd.read_csv("/home/willy/instructod/src/e2e/results/gpt-3.5-turbo_e2e_full_output.csv")
e2e_multi_results = pd.read_csv("/home/willy/instructod/src/e2e/results/gpt-3.5-turbo_e2e_agents_full_output.csv")

In [19]:
rg_results_gpt3 = rg_results_gpt3[rg_results_gpt3["dialogue_id"].str.contains("SNG")]
rg_results_gpt3 = rg_results_gpt3.dropna(subset=["turn_domain"])
rg_results_gpt3 = rg_results_gpt3.dropna(subset=["preds"])

rg_results_gpt4 = rg_results_gpt4[rg_results_gpt4["dialogue_id"].str.contains("SNG")]
rg_results_gpt4 = rg_results_gpt4.dropna(subset=["turn_domain"])
rg_results_gpt4 = rg_results_gpt4.dropna(subset=["preds"])

# e2e_results = e2e_results[e2e_results["dialogue_id"].str.contains("SNG")]
# e2e_results = e2e_results.dropna(subset=["turn_domain"])
# e2e_results = e2e_results.dropna(subset=["preds"])

pptod_e2e = json.load(open("/home/willy/instructod/pptod/E2E_TOD/inference_result/base/full_training/inference_result_e2e_evaluation_inform_89.2_success_79.4_bleu_18.62_combine_score_102.92.json", "r"))
pptod_e2e = pd.DataFrame([dict(item, **{"id":idx}) for idx, item in enumerate(pptod_e2e)])
pptod_e2e = pptod_e2e.rename(columns={"resp_gen":"preds"})
pptod_e2e = pptod_e2e[pptod_e2e["dial_id"].str.contains("sng")]
pptod_e2e = pptod_e2e.rename(columns={"dial_id":"dialogue_id", "turn_num":"turn"})
#Change dialogue id from sng0023 to SNG0023.json
pptod_e2e['dialogue_id'] = pptod_e2e['dialogue_id'].apply(lambda x: x.upper() + ".json")
pptod_e2e['turn_domain'] = pptod_e2e['turn_domain'].apply(lambda x: x[0][1:-1])
pptod_e2e = pd.merge(dataset, pptod_e2e[["dialogue_id", "turn", "id", "preds"]], on=["dialogue_id", "turn"], how="right")


e2e_agent_results = pd.read_csv("/home/willy/instructod/src/e2e/results/gpt-3.5+4-turbo_e2e_agents_full_output.csv")
e2e_agent_results = e2e_agent_results.drop(columns=["gold_response", "turn_domain", "Unnamed: 0"])
# e2e_agent_results = pd.merge(dataset, e2e_agent_results[["id", "preds", "preds_e2e_dialog_acts"]], on=["id"], how="right")
e2e_agent_results = pd.merge(dataset, e2e_agent_results, on=["id"], how="right")
e2e_agent_results = e2e_agent_results.dropna(subset=["turn_domain"])
e2e_agent_results = e2e_agent_results.dropna(subset=["preds"])

e2e_results = pd.read_csv("/home/willy/instructod/src/e2e/results/gpt-3.5-turbo_e2e_full_output.csv")
e2e_results = pd.merge(dataset, e2e_results[["id", "preds"]], on=["id"], how="right")
e2e_results = e2e_results.dropna(subset=["turn_domain"])
e2e_results = e2e_results.dropna(subset=["preds"])

In [32]:
e2e_agent_results.keys()

Index(['id', 'dialogue_id', 'dialogue_context', 'turn', 'prompt_dst',
       'prompt_dst_update', 'prompt_rg', 'prompt_e2e', 'domains',
       'turn_domain', 'gold_turn_bs', 'gold_bs', 'gold_act', 'gold_response',
       'gold_database_result', 'preds', 'prompts_e2e_query_db',
       'preds_e2e_query_db', 'preds_e2e_dialog_acts', 'prompts_e2e_rg'],
      dtype='object')

In [44]:
# e2e_agent_results = e2e_agent_results.rename(columns={"preds":"instructod_preds"})
# df = pd.merge(e2e_agent_results, e2e_results[["preds", "dialogue_id", "turn"]], on=["dialogue_id", "turn"], how="right")
# df = df.rename(columns={"preds":"naive_preds"})
# df = pd.merge(df, pptod_e2e[["preds", "dialogue_id", "turn"]], on=["dialogue_id", "turn"], how="right")
# df = df.rename(columns={"preds":"pptod_preds"})
# df = pd.merge(df, rg_results_gpt4[["preds", "dialogue_id", "turn"]], on=["dialogue_id", "turn"], how="right")
# df = df.rename(columns={"preds":"rg_preds"})
# df = df[[c for c in df if c not in ['instructod_preds']] + ['instructod_preds']]
# df = df[[c for c in df if c not in ['gold_response']] + ['gold_response']]

# df.to_csv("/home/willy/instructod/src/e2e/results/instructod_results.csv")



In [42]:
df

Unnamed: 0,id,dialogue_id,dialogue_context,turn,prompt_dst,prompt_dst_update,prompt_rg,prompt_e2e,domains,turn_domain,gold_turn_bs,gold_bs,gold_act,gold_database_result,prompts_e2e_query_db,preds_e2e_query_db,preds_e2e_dialog_acts,prompts_e2e_rg,naive_preds,pptod_preds,rg_preds,instructod_preds,gold_response
0,0.0,SNG0073.json,USER: I would like a taxi from Saint John's college to Pizza Hut Fen Ditton.\n,0.0,"Generate the dialogue state of the following dialogue between a USER and a task-oriented dialogue SYSTEM. The results should be in a single python dictionary following this format: {""domain1-slot1"":value1, ""domain2-slot2"":""value2""}. Use the provided domain and slots, and nothing else:\n\nSLOTS:\nattraction-area\nattraction-name\nattraction-type\nhotel-day\nhotel-people\nhotel-stay\nhotel-area\nhotel-internet\nhotel-name\nhotel-parking\nhotel-pricerange\nhotel-stars\nhotel-type\nrestaurant-da...","Generate the dialogue state of the following dialogue between a USER and a task-oriented dialogue SYSTEM. The results should be in a single python dictionary following this format: {""domain1-slot1"":value1, ""domain2-slot2"":""value2""}. Use the provided domain and slots, and nothing else:\n\nSLOTS:\nattraction-area\nattraction-name\nattraction-type\nhotel-day\nhotel-people\nhotel-stay\nhotel-area\nhotel-internet\nhotel-name\nhotel-parking\nhotel-pricerange\nhotel-stars\nhotel-type\nrestaurant-da...","In a task oriented dialogue setting, generate a SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not answer with anything other than what is provided in the dialogue act:\n\nYou can follow this example:\nUSER: How much does the banana cost?\nACT: Inform the user that the price is 10$, the promotion is 80%, and the choice is 5, and request the amount that user wants.\nSYSTEM: There are 5 to...","Generate the answer of the SYSTEM in the following conversation between a USER and a SYSTEM in a task-oriented dialogue setting. You can either request more details to the user that is available in the knowledge base to complete the goal, or simply answer the user's request. Do not provide multiple choice for the user to choose, just recommend one, and generate nothing other that the SYSTEM reply. Use the following knowledge base to interact with the user:\n\nUSER: I would like a taxi from S...",[taxi],taxi,"{'Taxi-Inform': [['Dest', 'pizza hut fen ditton'], ['Depart', 'saint john 's college']]}","{'taxi-destination': 'pizza hut fenditton', 'taxi-departure': 'saint johns college'}","{'Taxi-Request': [['Leave', '?'], ['Arrive', '?']]}",,"Suppose you have access to a database with the all necessary information, what do you need to query to the databse in order to reply to the user in the following conversation?\n\nYou can follow these examples:USER: I need fruits.\nSYSTEM: Do you have any preferences?\nUSER: Yes, apples if possible. How expensive and how many are there?\n\nNeed: Price and Count information for Apples\n\nUSER: Cool, that asnwers my question.\nSYSTEM: I am happy to help. Anything else needed?\nUSER: I'm done, t...",none,none,"In a task oriented dialogue setting, generate a natural and helpful SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not mention that you are referring to a dataframe and don't overload the user with too many choices. You have the ability to perform confirm bookings for users:\n\nCONTEXT:\nYou can follow this example:USER: I need a place to fish\nSYSTEM: Any preference in the type of fish?...",What time would you like the taxi?,i can help with that . what time would you like to leave ?,When would you like to leave from Saint John's College and what time do you want to arrive at Pizza Hut Fen Ditton?,"Sure, I can help you with that. Would you like me to confirm the booking for you?",SYSTEM: What time do you want to leave and what time do you want to arrive by?\n
1,1.0,SNG0073.json,USER: I want to leave after 17:15.\n,1.0,"Generate the dialogue state of the following dialogue between a USER and a task-oriented dialogue SYSTEM. The results should be in a single python dictionary following this format: {""domain1-slot1"":value1, ""domain2-slot2"":""value2""}. Use the provided domain and slots, and nothing else:\n\nSLOTS:\nattraction-area\nattraction-name\nattraction-type\nhotel-day\nhotel-people\nhotel-stay\nhotel-area\nhotel-internet\nhotel-name\nhotel-parking\nhotel-pricerange\nhotel-stars\nhotel-type\nrestaurant-da...","Generate the dialogue state of the following dialogue between a USER and a task-oriented dialogue SYSTEM. The results should be in a single python dictionary following this format: {""domain1-slot1"":value1, ""domain2-slot2"":""value2""}. Use the provided domain and slots, and nothing else:\n\nSLOTS:\nattraction-area\nattraction-name\nattraction-type\nhotel-day\nhotel-people\nhotel-stay\nhotel-area\nhotel-internet\nhotel-name\nhotel-parking\nhotel-pricerange\nhotel-stars\nhotel-type\nrestaurant-da...","In a task oriented dialogue setting, generate a SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not answer with anything other than what is provided in the dialogue act:\n\nYou can follow this example:\nUSER: How much does the banana cost?\nACT: Inform the user that the price is 10$, the promotion is 80%, and the choice is 5, and request the amount that user wants.\nSYSTEM: There are 5 to...","Generate the answer of the SYSTEM in the following conversation between a USER and a SYSTEM in a task-oriented dialogue setting. You can either request more details to the user that is available in the knowledge base to complete the goal, or simply answer the user's request. Do not provide multiple choice for the user to choose, just recommend one, and generate nothing other that the SYSTEM reply. Use the following knowledge base to interact with the user:\n\nUSER: I would like a taxi from S...",[taxi],taxi,"{'Taxi-Inform': [['Leave', '17:15']]}","{'taxi-leaveAt': '17:15', 'taxi-destination': 'pizza hut fenditton', 'taxi-departure': 'saint johns college'}","{'Taxi-Inform': [['Car', 'blue honda'], ['Phone', '07218068540']]}",,"Suppose you have access to a database with the all necessary information, what do you need to query to the databse in order to reply to the user in the following conversation?\n\nYou can follow these examples:USER: I need fruits.\nSYSTEM: Do you have any preferences?\nUSER: Yes, apples if possible. How expensive and how many are there?\n\nNeed: Price and Count information for Apples\n\nUSER: Cool, that asnwers my question.\nSYSTEM: I am happy to help. Anything else needed?\nUSER: I'm done, t...",none,none,"In a task oriented dialogue setting, generate a natural and helpful SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not mention that you are referring to a dataframe and don't overload the user with too many choices. You have the ability to perform confirm bookings for users:\n\nCONTEXT:\nYou can follow this example:USER: I need a place to fish\nSYSTEM: Any preference in the type of fish?...",A taxi will be available for you at 17:30 to take you to Pizza Hut Fen Ditton. Estimated time of arrival is 17:45. Shall I book it for you?,i have booked you a [value_car] . the contact number is [value_phone] .,A blue Honda taxi will be available for you. You can contact the driver at 07218068540.,"Sure, I can arrange a taxi for you to leave after 17:15. What time would you like to arrive at Pizza Hut Fen Ditton?",SYSTEM: \nBooking completed! your taxi will be blue honda Contact number is 07218068540\n
2,2.0,SNG0073.json,USER: Thank you for all the help! I appreciate it.\n,2.0,"Generate the dialogue state of the following dialogue between a USER and a task-oriented dialogue SYSTEM. The results should be in a single python dictionary following this format: {""domain1-slot1"":value1, ""domain2-slot2"":""value2""}. Use the provided domain and slots, and nothing else:\n\nSLOTS:\nattraction-area\nattraction-name\nattraction-type\nhotel-day\nhotel-people\nhotel-stay\nhotel-area\nhotel-internet\nhotel-name\nhotel-parking\nhotel-pricerange\nhotel-stars\nhotel-type\nrestaurant-da...","Generate the dialogue state of the following dialogue between a USER and a task-oriented dialogue SYSTEM. The results should be in a single python dictionary following this format: {""domain1-slot1"":value1, ""domain2-slot2"":""value2""}. Use the provided domain and slots, and nothing else:\n\nSLOTS:\nattraction-area\nattraction-name\nattraction-type\nhotel-day\nhotel-people\nhotel-stay\nhotel-area\nhotel-internet\nhotel-name\nhotel-parking\nhotel-pricerange\nhotel-stars\nhotel-type\nrestaurant-da...","In a task oriented dialogue setting, generate a SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not answer with anything other than what is provided in the dialogue act:\n\nYou can follow this example:\nUSER: How much does the banana cost?\nACT: Inform the user that the price is 10$, the promotion is 80%, and the choice is 5, and request the amount that user wants.\nSYSTEM: There are 5 to...","Generate the answer of the SYSTEM in the following conversation between a USER and a SYSTEM in a task-oriented dialogue setting. You can either request more details to the user that is available in the knowledge base to complete the goal, or simply answer the user's request. Do not provide multiple choice for the user to choose, just recommend one, and generate nothing other that the SYSTEM reply. Use the following knowledge base to interact with the user:\n\nUSER: I would like a taxi from S...",[taxi],taxi,"{'general-thank': [['none', 'none']]}","{'taxi-leaveAt': '17:15', 'taxi-destination': 'pizza hut fenditton', 'taxi-departure': 'saint johns college'}","{'general-reqmore': [['none', 'none']]}",,"Suppose you have access to a database with the all necessary information, what do you need to query to the databse in order to reply to the user in the following conversation?\n\nYou can follow these examples:USER: I need fruits.\nSYSTEM: Do you have any preferences?\nUSER: Yes, apples if possible. How expensive and how many are there?\n\nNeed: Price and Count information for Apples\n\nUSER: Cool, that asnwers my question.\nSYSTEM: I am happy to help. Anything else needed?\nUSER: I'm done, t...",none,none,"In a task oriented dialogue setting, generate a natural and helpful SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not mention that you are referring to a dataframe and don't overload the user with too many choices. You have the ability to perform confirm bookings for users:\n\nCONTEXT:\nYou can follow this example:USER: I need a place to fish\nSYSTEM: Any preference in the type of fish?...",You're welcome! Have a pleasant trip.,you are welcome . is there anything else i can help you with today ?,"You're welcome! If you have any more questions or need further assistance, feel free to ask. Enjoy your ride!",You're welcome! Have a great day!,SYSTEM: You are welcome. Is there anything else I can help you with today?\n
3,3.0,SNG0073.json,"USER: No, I am all set. Have a nice day. Bye.\n",3.0,"Generate the dialogue state of the following dialogue between a USER and a task-oriented dialogue SYSTEM. The results should be in a single python dictionary following this format: {""domain1-slot1"":value1, ""domain2-slot2"":""value2""}. Use the provided domain and slots, and nothing else:\n\nSLOTS:\nattraction-area\nattraction-name\nattraction-type\nhotel-day\nhotel-people\nhotel-stay\nhotel-area\nhotel-internet\nhotel-name\nhotel-parking\nhotel-pricerange\nhotel-stars\nhotel-type\nrestaurant-da...","Generate the dialogue state of the following dialogue between a USER and a task-oriented dialogue SYSTEM. The results should be in a single python dictionary following this format: {""domain1-slot1"":value1, ""domain2-slot2"":""value2""}. Use the provided domain and slots, and nothing else:\n\nSLOTS:\nattraction-area\nattraction-name\nattraction-type\nhotel-day\nhotel-people\nhotel-stay\nhotel-area\nhotel-internet\nhotel-name\nhotel-parking\nhotel-pricerange\nhotel-stars\nhotel-type\nrestaurant-da...","In a task oriented dialogue setting, generate a SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not answer with anything other than what is provided in the dialogue act:\n\nYou can follow this example:\nUSER: How much does the banana cost?\nACT: Inform the user that the price is 10$, the promotion is 80%, and the choice is 5, and request the amount that user wants.\nSYSTEM: There are 5 to...","Generate the answer of the SYSTEM in the following conversation between a USER and a SYSTEM in a task-oriented dialogue setting. You can either request more details to the user that is available in the knowledge base to complete the goal, or simply answer the user's request. Do not provide multiple choice for the user to choose, just recommend one, and generate nothing other that the SYSTEM reply. Use the following knowledge base to interact with the user:\n\nUSER: I would like a taxi from S...",[taxi],taxi,"{'general-bye': [['none', 'none']]}","{'taxi-leaveAt': '17:15', 'taxi-destination': 'pizza hut fenditton', 'taxi-departure': 'saint johns college'}","{'general-bye': [['none', 'none']]}",,"Suppose you have access to a database with the all necessary information, what do you need to query to the databse in order to reply to the user in the following conversation?\n\nYou can follow these examples:USER: I need fruits.\nSYSTEM: Do you have any preferences?\nUSER: Yes, apples if possible. How expensive and how many are there?\n\nNeed: Price and Count information for Apples\n\nUSER: Cool, that asnwers my question.\nSYSTEM: I am happy to help. Anything else needed?\nUSER: I'm done, t...",none,none,"In a task oriented dialogue setting, generate a natural and helpful SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not mention that you are referring to a dataframe and don't overload the user with too many choices. You have the ability to perform confirm bookings for users:\n\nCONTEXT:\nYou can follow this example:USER: I need a place to fish\nSYSTEM: Any preference in the type of fish?...","Thank you, have a great day! Goodbye.",thank you for using our service today .,"You too! Have a great day! If you need any assistance in the future, feel free to ask. Goodbye!",Thank you for using our service. Have a great day!,SYSTEM: you too! thank you\n
4,79.0,SNG01608.json,USER: Are there any Portuguese restaurants in Cambridge?\n,0.0,"Generate the dialogue state of the following dialogue between a USER and a task-oriented dialogue SYSTEM. The results should be in a single python dictionary following this format: {""domain1-slot1"":value1, ""domain2-slot2"":""value2""}. Use the provided domain and slots, and nothing else:\n\nSLOTS:\nattraction-area\nattraction-name\nattraction-type\nhotel-day\nhotel-people\nhotel-stay\nhotel-area\nhotel-internet\nhotel-name\nhotel-parking\nhotel-pricerange\nhotel-stars\nhotel-type\nrestaurant-da...","Generate the dialogue state of the following dialogue between a USER and a task-oriented dialogue SYSTEM. The results should be in a single python dictionary following this format: {""domain1-slot1"":value1, ""domain2-slot2"":""value2""}. Use the provided domain and slots, and nothing else:\n\nSLOTS:\nattraction-area\nattraction-name\nattraction-type\nhotel-day\nhotel-people\nhotel-stay\nhotel-area\nhotel-internet\nhotel-name\nhotel-parking\nhotel-pricerange\nhotel-stars\nhotel-type\nrestaurant-da...","In a task oriented dialogue setting, generate a SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not answer with anything other than what is provided in the dialogue act:\n\nYou can follow this example:\nUSER: How much does the banana cost?\nACT: Inform the user that the price is 10$, the promotion is 80%, and the choice is 5, and request the amount that user wants.\nSYSTEM: There are 5 to...","Generate the answer of the SYSTEM in the following conversation between a USER and a SYSTEM in a task-oriented dialogue setting. You can either request more details to the user that is available in the knowledge base to complete the goal, or simply answer the user's request. Do not provide multiple choice for the user to choose, just recommend one, and generate nothing other that the SYSTEM reply. Use the following knowledge base to interact with the user:\n\naddress: Newmarket Road Fen Ditt...",[restaurant],restaurant,"{'Restaurant-Inform': [['Food', 'portuguese']]}",{'restaurant-food': 'portugese'},"{'Restaurant-Inform': [['Food', 'Portuguese'], ['Choice', 'two']]}",,"Suppose you have access to a database with the all necessary information, what do you need to query to the databse in order to reply to the user in the following conversation?\n\nYou can follow these examples:USER: I need fruits.\nSYSTEM: Do you have any preferences?\nUSER: Yes, apples if possible. How expensive and how many are there?\n\nNeed: Price and Count information for Apples\n\nUSER: Cool, that asnwers my question.\nSYSTEM: I am happy to help. Anything else needed?\nUSER: I'm done, t...",List of Portuguese restaurants in Cambridge,none.,"In a task oriented dialogue setting, generate a natural and helpful SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not mention that you are referring to a dataframe and don't overload the user with too many choices. You have the ability to perform confirm bookings for users:\n\nCONTEXT:\nYou can follow this example:USER: I need a place to fish\nSYSTEM: Any preference in the type of fish?...","Yes, there is a Portuguese restaurant in Cambridge called Nandos. It is located at Cambridge Leisure Park Clifton Way.","yes , there are [value_choice] [value_food] restaurant -s . [value_name] and [value_name] .","Yes, there are two Portuguese restaurants in Cambridge.","I'm sorry, I couldn't find any Portuguese restaurants in Cambridge. Would you like me to search for other types of restaurants in the area?","SYSTEM: Yes there is a Portuguese restaurant in Cambridge with two different locations, would you like the addresses?\n"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1031,7337.0,SNG02319.json,"USER: Thank you, good bye.\n",4.0,"Generate the dialogue state of the following dialogue between a USER and a task-oriented dialogue SYSTEM. The results should be in a single python dictionary following this format: {""domain1-slot1"":value1, ""domain2-slot2"":""value2""}. Use the provided domain and slots, and nothing else:\n\nSLOTS:\nattraction-area\nattraction-name\nattraction-type\nhotel-day\nhotel-people\nhotel-stay\nhotel-area\nhotel-internet\nhotel-name\nhotel-parking\nhotel-pricerange\nhotel-stars\nhotel-type\nrestaurant-da...","Generate the dialogue state of the following dialogue between a USER and a task-oriented dialogue SYSTEM. The results should be in a single python dictionary following this format: {""domain1-slot1"":value1, ""domain2-slot2"":""value2""}. Use the provided domain and slots, and nothing else:\n\nSLOTS:\nattraction-area\nattraction-name\nattraction-type\nhotel-day\nhotel-people\nhotel-stay\nhotel-area\nhotel-internet\nhotel-name\nhotel-parking\nhotel-pricerange\nhotel-stars\nhotel-type\nrestaurant-da...","In a task oriented dialogue setting, generate a SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not answer with anything other than what is provided in the dialogue act:\n\nYou can follow this example:\nUSER: How much does the banana cost?\nACT: Inform the user that the price is 10$, the promotion is 80%, and the choice is 5, and request the amount that user wants.\nSYSTEM: There are 5 to...","Generate the answer of the SYSTEM in the following conversation between a USER and a SYSTEM in a task-oriented dialogue setting. You can either request more details to the user that is available in the knowledge base to complete the goal, or simply answer the user's request. Do not provide multiple choice for the user to choose, just recommend one, and generate nothing other that the SYSTEM reply. Use the following knowledge base to interact with the user:\n\nUSER: I'd like to book a taxi to...",[taxi],taxi,"{'general-bye': [['none', 'none']]}","{'taxi-leaveAt': '03:00', 'taxi-destination': 'scott polar museum', 'taxi-departure': 'london kings cross train station'}","{'general-greet': [['none', 'none']]}",,"Suppose you have access to a database with the all necessary information, what do you need to query to the databse in order to reply to the user in the following conversation?\n\nYou can follow these examples:USER: I need fruits.\nSYSTEM: Do you have any preferences?\nUSER: Yes, apples if possible. How expensive and how many are there?\n\nNeed: Price and Count information for Apples\n\nUSER: Cool, that asnwers my question.\nSYSTEM: I am happy to help. Anything else needed?\nUSER: I'm done, t...",none,none,"In a task oriented dialogue setting, generate a natural and helpful SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not mention that you are referring to a dataframe and don't overload the user with too many choices. You have the ability to perform confirm bookings for users:\n\nCONTEXT:\nYou can follow this example:USER: I need a place to fish\nSYSTEM: Any preference in the type of fish?...",Your taxi is booked and will arrive at London Kings Cross train station at 3:00 PM. The contact number for the driver is 07089138985 and the car type is a grey Lexus. Have a safe journey to the Scott Polar Museum! Goodbye.,you are welcome . have a great day .,"You're welcome! If you have any more questions in the future, feel free to ask. Goodbye!","No problem, have a great day!","SYSTEM: Thank you for using Cambridge TownInfo centre, have a nice day.\n"
1032,7338.0,SNG0061.json,USER: I would like to book a taxi to leave after 11:00 going to Arbury Lodge Guesthouse.\n,0.0,"Generate the dialogue state of the following dialogue between a USER and a task-oriented dialogue SYSTEM. The results should be in a single python dictionary following this format: {""domain1-slot1"":value1, ""domain2-slot2"":""value2""}. Use the provided domain and slots, and nothing else:\n\nSLOTS:\nattraction-area\nattraction-name\nattraction-type\nhotel-day\nhotel-people\nhotel-stay\nhotel-area\nhotel-internet\nhotel-name\nhotel-parking\nhotel-pricerange\nhotel-stars\nhotel-type\nrestaurant-da...","Generate the dialogue state of the following dialogue between a USER and a task-oriented dialogue SYSTEM. The results should be in a single python dictionary following this format: {""domain1-slot1"":value1, ""domain2-slot2"":""value2""}. Use the provided domain and slots, and nothing else:\n\nSLOTS:\nattraction-area\nattraction-name\nattraction-type\nhotel-day\nhotel-people\nhotel-stay\nhotel-area\nhotel-internet\nhotel-name\nhotel-parking\nhotel-pricerange\nhotel-stars\nhotel-type\nrestaurant-da...","In a task oriented dialogue setting, generate a SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not answer with anything other than what is provided in the dialogue act:\n\nYou can follow this example:\nUSER: How much does the banana cost?\nACT: Inform the user that the price is 10$, the promotion is 80%, and the choice is 5, and request the amount that user wants.\nSYSTEM: There are 5 to...","Generate the answer of the SYSTEM in the following conversation between a USER and a SYSTEM in a task-oriented dialogue setting. You can either request more details to the user that is available in the knowledge base to complete the goal, or simply answer the user's request. Do not provide multiple choice for the user to choose, just recommend one, and generate nothing other that the SYSTEM reply. Use the following knowledge base to interact with the user:\n\nUSER: I would like to book a tax...",[taxi],taxi,"{'Taxi-Inform': [['Leave', '11:00'], ['Dest', 'arbury lodge guesthouse']]}","{'taxi-leaveAt': '11:00', 'taxi-destination': 'arbury lodge guesthouse'}","{'Taxi-Request': [['Depart', '?']]}",,"Suppose you have access to a database with the all necessary information, what do you need to query to the databse in order to reply to the user in the following conversation?\n\nYou can follow these examples:USER: I need fruits.\nSYSTEM: Do you have any preferences?\nUSER: Yes, apples if possible. How expensive and how many are there?\n\nNeed: Price and Count information for Apples\n\nUSER: Cool, that asnwers my question.\nSYSTEM: I am happy to help. Anything else needed?\nUSER: I'm done, t...",none,none,"In a task oriented dialogue setting, generate a natural and helpful SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not mention that you are referring to a dataframe and don't overload the user with too many choices. You have the ability to perform confirm bookings for users:\n\nCONTEXT:\nYou can follow this example:USER: I need a place to fish\nSYSTEM: Any preference in the type of fish?...","Sure, what date would you like to book the taxi for?",i can help with that . where are you departing from ?,Please provide the address from where you would like to depart for Arbury Lodge Guesthouse after 11:00.,"Sure, I can help you with that. Can you please confirm the date and time of your departure?",SYSTEM: Where are you departing from?\n
1033,7339.0,SNG0061.json,USER: from the gardenia\n,1.0,"Generate the dialogue state of the following dialogue between a USER and a task-oriented dialogue SYSTEM. The results should be in a single python dictionary following this format: {""domain1-slot1"":value1, ""domain2-slot2"":""value2""}. Use the provided domain and slots, and nothing else:\n\nSLOTS:\nattraction-area\nattraction-name\nattraction-type\nhotel-day\nhotel-people\nhotel-stay\nhotel-area\nhotel-internet\nhotel-name\nhotel-parking\nhotel-pricerange\nhotel-stars\nhotel-type\nrestaurant-da...","Generate the dialogue state of the following dialogue between a USER and a task-oriented dialogue SYSTEM. The results should be in a single python dictionary following this format: {""domain1-slot1"":value1, ""domain2-slot2"":""value2""}. Use the provided domain and slots, and nothing else:\n\nSLOTS:\nattraction-area\nattraction-name\nattraction-type\nhotel-day\nhotel-people\nhotel-stay\nhotel-area\nhotel-internet\nhotel-name\nhotel-parking\nhotel-pricerange\nhotel-stars\nhotel-type\nrestaurant-da...","In a task oriented dialogue setting, generate a SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not answer with anything other than what is provided in the dialogue act:\n\nYou can follow this example:\nUSER: How much does the banana cost?\nACT: Inform the user that the price is 10$, the promotion is 80%, and the choice is 5, and request the amount that user wants.\nSYSTEM: There are 5 to...","Generate the answer of the SYSTEM in the following conversation between a USER and a SYSTEM in a task-oriented dialogue setting. You can either request more details to the user that is available in the knowledge base to complete the goal, or simply answer the user's request. Do not provide multiple choice for the user to choose, just recommend one, and generate nothing other that the SYSTEM reply. Use the following knowledge base to interact with the user:\n\nUSER: I would like to book a tax...",[taxi],taxi,"{'Taxi-Inform': [['Depart', 'the gardenia']]}","{'taxi-leaveAt': '11:00', 'taxi-destination': 'arbury lodge guesthouse', 'taxi-departure': 'the gardenia'}","{'Taxi-Inform': [['Car', 'red skoda'], ['Phone', '07278222346']]}",,"Suppose you have access to a database with the all necessary information, what do you need to query to the databse in order to reply to the user in the following conversation?\n\nYou can follow these examples:USER: I need fruits.\nSYSTEM: Do you have any preferences?\nUSER: Yes, apples if possible. How expensive and how many are there?\n\nNeed: Price and Count information for Apples\n\nUSER: Cool, that asnwers my question.\nSYSTEM: I am happy to help. Anything else needed?\nUSER: I'm done, t...",none,none,"In a task oriented dialogue setting, generate a natural and helpful SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not mention that you are referring to a dataframe and don't overload the user with too many choices. You have the ability to perform confirm bookings for users:\n\nCONTEXT:\nYou can follow this example:USER: I need a place to fish\nSYSTEM: Any preference in the type of fish?...",What date would you like to book the taxi for?,i have booked you a [value_car] . the contact number is [value_phone] .,A red Skoda taxi will pick you up from the Gardenia. The driver's phone number is 07278222346.,"Great, I can confirm a taxi booking for you to leave from Gardenia to Arbury Lodge Guesthouse after 11:00. Is that okay?","SYSTEM: All right, a red skoda will pick you up. The contact number is 07278222346.\n\n"
1034,7340.0,SNG0061.json,USER: Great! Thank you for your help! \n,2.0,"Generate the dialogue state of the following dialogue between a USER and a task-oriented dialogue SYSTEM. The results should be in a single python dictionary following this format: {""domain1-slot1"":value1, ""domain2-slot2"":""value2""}. Use the provided domain and slots, and nothing else:\n\nSLOTS:\nattraction-area\nattraction-name\nattraction-type\nhotel-day\nhotel-people\nhotel-stay\nhotel-area\nhotel-internet\nhotel-name\nhotel-parking\nhotel-pricerange\nhotel-stars\nhotel-type\nrestaurant-da...","Generate the dialogue state of the following dialogue between a USER and a task-oriented dialogue SYSTEM. The results should be in a single python dictionary following this format: {""domain1-slot1"":value1, ""domain2-slot2"":""value2""}. Use the provided domain and slots, and nothing else:\n\nSLOTS:\nattraction-area\nattraction-name\nattraction-type\nhotel-day\nhotel-people\nhotel-stay\nhotel-area\nhotel-internet\nhotel-name\nhotel-parking\nhotel-pricerange\nhotel-stars\nhotel-type\nrestaurant-da...","In a task oriented dialogue setting, generate a SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not answer with anything other than what is provided in the dialogue act:\n\nYou can follow this example:\nUSER: How much does the banana cost?\nACT: Inform the user that the price is 10$, the promotion is 80%, and the choice is 5, and request the amount that user wants.\nSYSTEM: There are 5 to...","Generate the answer of the SYSTEM in the following conversation between a USER and a SYSTEM in a task-oriented dialogue setting. You can either request more details to the user that is available in the knowledge base to complete the goal, or simply answer the user's request. Do not provide multiple choice for the user to choose, just recommend one, and generate nothing other that the SYSTEM reply. Use the following knowledge base to interact with the user:\n\nUSER: I would like to book a tax...",[taxi],taxi,"{'general-thank': [['none', 'none']]}","{'taxi-leaveAt': '11:00', 'taxi-destination': 'arbury lodge guesthouse', 'taxi-departure': 'the gardenia'}","{'general-reqmore': [['none', 'none']]}",,"Suppose you have access to a database with the all necessary information, what do you need to query to the databse in order to reply to the user in the following conversation?\n\nYou can follow these examples:USER: I need fruits.\nSYSTEM: Do you have any preferences?\nUSER: Yes, apples if possible. How expensive and how many are there?\n\nNeed: Price and Count information for Apples\n\nUSER: Cool, that asnwers my question.\nSYSTEM: I am happy to help. Anything else needed?\nUSER: I'm done, t...",none,none,"In a task oriented dialogue setting, generate a natural and helpful SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not mention that you are referring to a dataframe and don't overload the user with too many choices. You have the ability to perform confirm bookings for users:\n\nCONTEXT:\nYou can follow this example:USER: I need a place to fish\nSYSTEM: Any preference in the type of fish?...",You're welcome. Have a safe trip!,you are welcome . is there anything else i can help you with today ?,"You're welcome! If you need any further assistance, feel free to ask. Have a great trip!",You're welcome! Is there anything else I can assist you with today?,SYSTEM: You're welcome! What else can I do for you? \n


In [15]:
e2e_agent_results.head(10)

Unnamed: 0.1,Unnamed: 0,id,gold_response,preds,prompts_e2e_query_db,preds_e2e_query_db,preds_e2e_dialog_acts,prompts_e2e_rg,turn_domain
0,0,0,SYSTEM: What time do you want to leave and what time do you want to arrive by?\n,"Sure, I can help you with that. Would you like me to confirm the booking for you?","Suppose you have access to a database with the all necessary information, what do you need to query to the databse in order to reply to the user in the following conversation?\n\nYou can follow these examples:USER: I need fruits.\nSYSTEM: Do you have any preferences?\nUSER: Yes, apples if possible. How expensive and how many are there?\n\nNeed: Price and Count information for Apples\n\nUSER: Cool, that asnwers my question.\nSYSTEM: I am happy to help. Anything else needed?\nUSER: I'm done, t...",none,none,"In a task oriented dialogue setting, generate a natural and helpful SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not mention that you are referring to a dataframe and don't overload the user with too many choices. You have the ability to perform confirm bookings for users:\n\nCONTEXT:\nYou can follow this example:USER: I need a place to fish\nSYSTEM: Any preference in the type of fish?...",taxi
1,1,1,SYSTEM: \nBooking completed! your taxi will be blue honda Contact number is 07218068540\n,"Sure, I can arrange a taxi for you to leave after 17:15. What time would you like to arrive at Pizza Hut Fen Ditton?","Suppose you have access to a database with the all necessary information, what do you need to query to the databse in order to reply to the user in the following conversation?\n\nYou can follow these examples:USER: I need fruits.\nSYSTEM: Do you have any preferences?\nUSER: Yes, apples if possible. How expensive and how many are there?\n\nNeed: Price and Count information for Apples\n\nUSER: Cool, that asnwers my question.\nSYSTEM: I am happy to help. Anything else needed?\nUSER: I'm done, t...",none,none,"In a task oriented dialogue setting, generate a natural and helpful SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not mention that you are referring to a dataframe and don't overload the user with too many choices. You have the ability to perform confirm bookings for users:\n\nCONTEXT:\nYou can follow this example:USER: I need a place to fish\nSYSTEM: Any preference in the type of fish?...",taxi
2,2,2,SYSTEM: You are welcome. Is there anything else I can help you with today?\n,You're welcome! Have a great day!,"Suppose you have access to a database with the all necessary information, what do you need to query to the databse in order to reply to the user in the following conversation?\n\nYou can follow these examples:USER: I need fruits.\nSYSTEM: Do you have any preferences?\nUSER: Yes, apples if possible. How expensive and how many are there?\n\nNeed: Price and Count information for Apples\n\nUSER: Cool, that asnwers my question.\nSYSTEM: I am happy to help. Anything else needed?\nUSER: I'm done, t...",none,none,"In a task oriented dialogue setting, generate a natural and helpful SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not mention that you are referring to a dataframe and don't overload the user with too many choices. You have the ability to perform confirm bookings for users:\n\nCONTEXT:\nYou can follow this example:USER: I need a place to fish\nSYSTEM: Any preference in the type of fish?...",taxi
3,3,3,SYSTEM: you too! thank you\n,Thank you for using our service. Have a great day!,"Suppose you have access to a database with the all necessary information, what do you need to query to the databse in order to reply to the user in the following conversation?\n\nYou can follow these examples:USER: I need fruits.\nSYSTEM: Do you have any preferences?\nUSER: Yes, apples if possible. How expensive and how many are there?\n\nNeed: Price and Count information for Apples\n\nUSER: Cool, that asnwers my question.\nSYSTEM: I am happy to help. Anything else needed?\nUSER: I'm done, t...",none,none,"In a task oriented dialogue setting, generate a natural and helpful SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not mention that you are referring to a dataframe and don't overload the user with too many choices. You have the ability to perform confirm bookings for users:\n\nCONTEXT:\nYou can follow this example:USER: I need a place to fish\nSYSTEM: Any preference in the type of fish?...",taxi
4,4,79,"SYSTEM: Yes there is a Portuguese restaurant in Cambridge with two different locations, would you like the addresses?\n","I'm sorry, I couldn't find any Portuguese restaurants in Cambridge. Would you like me to search for other types of restaurants in the area?","Suppose you have access to a database with the all necessary information, what do you need to query to the databse in order to reply to the user in the following conversation?\n\nYou can follow these examples:USER: I need fruits.\nSYSTEM: Do you have any preferences?\nUSER: Yes, apples if possible. How expensive and how many are there?\n\nNeed: Price and Count information for Apples\n\nUSER: Cool, that asnwers my question.\nSYSTEM: I am happy to help. Anything else needed?\nUSER: I'm done, t...",List of Portuguese restaurants in Cambridge,none.,"In a task oriented dialogue setting, generate a natural and helpful SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not mention that you are referring to a dataframe and don't overload the user with too many choices. You have the ability to perform confirm bookings for users:\n\nCONTEXT:\nYou can follow this example:USER: I need a place to fish\nSYSTEM: Any preference in the type of fish?...",restaurant
5,5,80,"SYSTEM: I have two Turkish restuarants, both in the centre and both expensive. May I recommend anatolia?\n","I'm sorry, but there are no Portuguese or Turkish restaurants in our system. Would you like me to search for other types of restaurants in Cambridge?","Suppose you have access to a database with the all necessary information, what do you need to query to the databse in order to reply to the user in the following conversation?\n\nYou can follow these examples:USER: I need fruits.\nSYSTEM: Do you have any preferences?\nUSER: Yes, apples if possible. How expensive and how many are there?\n\nNeed: Price and Count information for Apples\n\nUSER: Cool, that asnwers my question.\nSYSTEM: I am happy to help. Anything else needed?\nUSER: I'm done, t...","Address and price range information for Portuguese restaurants in Cambridge, or information on Turkish restaurants in Cambridge.",There are no Portuguese or Turkish restaurants in the dataframe.,"In a task oriented dialogue setting, generate a natural and helpful SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not mention that you are referring to a dataframe and don't overload the user with too many choices. You have the ability to perform confirm bookings for users:\n\nCONTEXT:\nYou can follow this example:USER: I need a place to fish\nSYSTEM: Any preference in the type of fish?...",restaurant
6,6,81,SYSTEM: I am sorry. I mistook that price range. The Anatolia is in the moderate range. Would that work for you?\n,How about the Portuguese restaurant at 123 Main Street? It has a moderate price range and good reviews. Would you like me to make a reservation for you?,"Suppose you have access to a database with the all necessary information, what do you need to query to the databse in order to reply to the user in the following conversation?\n\nYou can follow these examples:USER: I need fruits.\nSYSTEM: Do you have any preferences?\nUSER: Yes, apples if possible. How expensive and how many are there?\n\nNeed: Price and Count information for Apples\n\nUSER: Cool, that asnwers my question.\nSYSTEM: I am happy to help. Anything else needed?\nUSER: I'm done, t...",Moderately priced Portuguese restaurant in Cambridge or moderately priced Turkish restaurant in Cambridge,none.,"In a task oriented dialogue setting, generate a natural and helpful SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not mention that you are referring to a dataframe and don't overload the user with too many choices. You have the ability to perform confirm bookings for users:\n\nCONTEXT:\nYou can follow this example:USER: I need a place to fish\nSYSTEM: Any preference in the type of fish?...",restaurant
7,7,82,SYSTEM: \nBooking was successful. The table will be reserved for 15 minutes.\nReference number is : 96ZYHLN1.\n,"I apologize, but I am unable to make a reservation for Anatolia Restaurant at this time. However, I can suggest other moderately priced Turkish restaurants in the area. Would you like me to provide some options for you?","Suppose you have access to a database with the all necessary information, what do you need to query to the databse in order to reply to the user in the following conversation?\n\nYou can follow these examples:USER: I need fruits.\nSYSTEM: Do you have any preferences?\nUSER: Yes, apples if possible. How expensive and how many are there?\n\nNeed: Price and Count information for Apples\n\nUSER: Cool, that asnwers my question.\nSYSTEM: I am happy to help. Anything else needed?\nUSER: I'm done, t...",Reservation information for Anatolia restaurant for 1 person at 14:00 on Monday.,I cannot provide reservation information for Anatolia Restaurant as it is not present in the dataframe.,"In a task oriented dialogue setting, generate a natural and helpful SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not mention that you are referring to a dataframe and don't overload the user with too many choices. You have the ability to perform confirm bookings for users:\n\nCONTEXT:\nYou can follow this example:USER: I need a place to fish\nSYSTEM: Any preference in the type of fish?...",restaurant
8,8,83,SYSTEM: You are more than welcome!\n\n,Thank you for using our service. Have a great day!,"Suppose you have access to a database with the all necessary information, what do you need to query to the databse in order to reply to the user in the following conversation?\n\nYou can follow these examples:USER: I need fruits.\nSYSTEM: Do you have any preferences?\nUSER: Yes, apples if possible. How expensive and how many are there?\n\nNeed: Price and Count information for Apples\n\nUSER: Cool, that asnwers my question.\nSYSTEM: I am happy to help. Anything else needed?\nUSER: I'm done, t...",Reservation information for Anatolia restaurant for 1 person at 14:00 on Monday.,none.,"In a task oriented dialogue setting, generate a natural and helpful SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not mention that you are referring to a dataframe and don't overload the user with too many choices. You have the ability to perform confirm bookings for users:\n\nCONTEXT:\nYou can follow this example:USER: I need a place to fish\nSYSTEM: Any preference in the type of fish?...",restaurant
9,9,84,SYSTEM: I can help you with that. Do you have any special area you would like to stay? Or possibly a star request for the guesthouse?\n,"I found two guesthouses with moderate prices that might interest you. The first one is ""a and b guest house"" located at 124 Tenison Road and the second one is ""acorn guest house"" located at 154 Chesterton Road. Which one would you like me to book for you?","Suppose you have access to a database with the all necessary information, what do you need to query to the databse in order to reply to the user in the following conversation?\n\nYou can follow these examples:USER: I need fruits.\nSYSTEM: Do you have any preferences?\nUSER: Yes, apples if possible. How expensive and how many are there?\n\nNeed: Price and Count information for Apples\n\nUSER: Cool, that asnwers my question.\nSYSTEM: I am happy to help. Anything else needed?\nUSER: I'm done, t...",Guesthouse information with moderate prices,"The first two guesthouses with moderate prices are ""a and b guest house"" located at 124 Tenison Road and ""acorn guest house"" located at 154 Chesterton Road.","In a task oriented dialogue setting, generate a natural and helpful SYSTEM response to the USER query in the conversation provided in CONTEXT. You should follow the information provided in ACT to generate this answer. Do not mention that you are referring to a dataframe and don't overload the user with too many choices. You have the ability to perform confirm bookings for users:\n\nCONTEXT:\nYou can follow this example:USER: I need a place to fish\nSYSTEM: Any preference in the type of fish?...",hotel


In [102]:
def delexicalize_dbs(data_args, ontology_path):
    domains = ["restaurant", "hotel", "train", "attraction"]
    keep_data = {"restaurant":["address", "name", "food", "area", "pricerange", "phone", "postcode"],
                "attraction":["name", "area", "address", "type", "postcode", "entrance fee"],
                "hotel":["name", "address", "area", "phone", "postcode", "pricerange", "stars", "internet", "parking", "type"],
                "train":["departure", "destination", "arriveBy", "day", "leaveAt", "price", "trainID", "duration"]}
    dbs = {}
    for domain in domains:
        db_path = os.path.join(data_args.mwoz_path, f"{domain}_db.json")
        with open(db_path, "r") as f:
            db_data = json.load(f)
        db = {}
        for d in db_data: 
            for k, v in d.items():
                if k in keep_data[domain]:
                    if k in db:
                        if v not in db[k]:
                            db[k].append(v.lower())
                    else:
                        db[k] = [v.lower()]
        dbs[domain] = db

    with open(ontology_path, "r") as f:
        db_data = json.load(f)
    taxi_slots = ["departure", "destination", "arriveBy", "leaveAt"]
    book_slots = {"restaurant":["time", "day", "people"],
                  "hotel":["day", "people", "stay"],
                  "train":["people"]}

    dbs["taxi"] = {}
    for slot in taxi_slots:
        dbs["taxi"][slot] = db_data[f"taxi-semi-{slot}"]

    for domain, slots in book_slots.items():
        for slot in slots:
            if slot == "people":
                dbs[domain][slot] = [value+" people" for value in db_data[f"{domain}-book-{slot}"]] + [value+" person" for value in db_data[f"{domain}-book-{slot}"]]
            else:
                dbs[domain][slot] = db_data[f"{domain}-book-{slot}"]

    for domain in domains:
        if domain == "train":
            continue
        reordered = {k:v for k, v in dbs[domain].items() if k == "name"}
        for k, v in dbs[domain].items():
            if k != "name":
                reordered[k] = v
        dbs[domain] = reordered
    return dbs

# def delexicalize(df, delex_dbs):
#     delex_preds = []
#     for idx, row in df.iterrows():
#         pred = row["preds"]
#         domain = row["turn_domain"]
#         for k, values in delex_dbs[domain].items():
#             for v in values:
#                 if v in pred.lower():
#                     pred = pred.lower().replace(v, f"[{k.lower()}_value]")
#         delex_preds.append(pred)
#     df["delexicalized_preds"] = delex_preds
#     return df

VALUES_FIX = {#"fen ditton":"fenditton",
              "john's":"johns", "catherine's":"catherines",
              "the bridge guest":"bridge guest", "the rajmahal": "rajmahal", "the bedouin":"bedouin",
              "ian hong":"lan hong", "pizza express":"pizza hut",
              "express by holiday inn cambridge":"inn cambridge", "alpha-milton":"alpha-milton guest house", "el shaddai":"el shaddai guesthouse",
              "bringham new street":"birmingham new street",
              "king's lynn":"kings lynn",
              "nightclub":"night club", "concert hall":"concerthall", "guest house guest house":"guest house",
              "kettle's yard":"kettles yard",
              "3 00":"03:00", "9:30":"09:30", "2:30":"02:30", "1515hrs":"15:15", "9:15":"09:15", "109:30":"19:30", "9:45":"09:45", "7:15 p.m.":"07:15", "5:15":"05:15",
              "009:15":"09:15", "009:30":"09:30", "109:30":"19:30", "109:15":"19:15", "102:30":"12:30",
              "after ":"", " nights":"",
              "town centre":"centre",
             
              "free":"yes"}

def delexicalize(df, dbs, delex_column="preds"):
    delex_preds = []
    phone_pattern = (r"\d{5} \d{6}|\d{11}")
    postcode_pattern = (r"[a-z]{2}\d{1} \d{1}[a-z]{2}")
    reference_pattern = (r'reference number is (.*?)\.|reference number is (.*?) ')
    for idx, row in tqdm(df.iterrows()):
        pred = row[delex_column].lower().replace(",", "")
        domain = row["turn_domain"]
        # domain = row["turn_domain"][0][1:-1]
        if domain == "general":
            delex_preds.append(pred)
            continue
            
        for value_fix in VALUES_FIX:
            pred = pred.replace(value_fix, VALUES_FIX[value_fix])
        pred = re.sub(phone_pattern, "[value_phone]", pred)
        if re.search(postcode_pattern, pred):
            postcode = re.search(postcode_pattern, pred).group(0)
            if postcode.replace(" ", "") in dbs[domain]["postcode"]:
                pred = pred.replace(postcode, "[value_postcode]")
        if re.search(reference_pattern, pred):
            reference = re.search(reference_pattern, pred).group(0)
            delex_ref = " ".join(reference.split(" ")[:-1]) + " [value_reference]."
            pred = pred.replace(reference, delex_ref)
        
        for k, values in dbs[domain].items():
            for v in values:
                if v in pred:
                    pred = pred.replace(v, f"[value_{k.lower()}]")
        delex_preds.append(pred)
    df[f"delexicalized_{delex_column}"] = delex_preds
    return df

In [110]:
ontology_path = data_args.mwoz_path + "ontology.json"
delex_dbs = delexicalize_dbs(data_args, ontology_path)
df_updated_delex_e2e = delexicalize(e2e_results, delex_dbs)
df_updated_delex_e2e = delexicalize(df_updated_delex_e2e, delex_dbs, delex_column="gold_response")

df_updated_delex_e2e_agents = delexicalize(e2e_agent_results, delex_dbs)
df_updated_delex_e2e_agents = delexicalize(df_updated_delex_e2e_agents, delex_dbs, delex_column="gold_response")

df_updated_delex_rg = delexicalize(rg_results_gpt3, delex_dbs)
df_updated_delex_rg = delexicalize(df_updated_delex_rg, delex_dbs, delex_column="gold_response")


1053it [00:00, 5330.43it/s]
1053it [00:00, 6152.64it/s]
1022it [00:00, 4684.37it/s]
1022it [00:00, 4902.93it/s]
1032it [00:00, 5380.83it/s]
1032it [00:00, 6109.80it/s]


In [109]:
df_updated_delex_e2e["delexicalized_gold_response"]

0                                                system: what time do you want to leave and what time do you want to arrive by?\n
1                                     system: \nbooking completed! your taxi will be blue honda contact number is [value_phone]\n
2                                                   system: you are welcome.  is there anything else i can help you with today?\n
3                                                                                                    system: you too! thank you\n
79      system: yes there is a [value_food] restaurant in cambridge  with two different locations would you like the addresses?\n
                                                                  ...                                                            
7337                                            system: thank you for using c[value_departure] towninfo centre have a nice day.\n
7338                                                                                      

In [95]:
# df = pd.merge(df_updated_delex_e2e[["id", "dialogue_context", "preds", "delexicalized_preds"]], df_updated_delex_e2e_agents[["id", "preds", "delexicalized_preds"]], on=["id"], how="right")
# df = pd.merge(df, df_updated_delex_rg[["id", "preds", "delexicalized_preds"]], on=["id"], how="right")
# df = df.rename(columns={"preds_x":"preds_e2e",
#                    "delexicalized_preds_x":"delexicalized_preds_e2e",
#                    "preds_y":"preds_e2e_agents",
#                    "delexicalized_preds_y":"delexicalized_preds_e2e_agents",
#                    "preds":"preds_rg",
#                    "delexicalized_preds":"delexicalized_preds_rg"})
# df.to_csv("HE_correctness.csv")

In [94]:
len(df)

1032

In [60]:
import ast

list_fails_e2e = []

total_result = {}
cur_request_slots = {}
cur_inform_slots = {}

prev_dialogue_id = df_updated_delex["dialogue_id"][0]
for idx, row in df_updated_delex.iterrows():
    cur_dialogue_id = row["dialogue_id"]
    if cur_dialogue_id not in total_result:
        total_result[cur_dialogue_id] = {}

    if cur_dialogue_id != prev_dialogue_id:
        ##compute everything and reset variables, we are switching samples
        success = True
        if cur_request_slots:
            # print(prev_dialogue_id)
            # print(cur_request_slots)
            pass
        for k, v in cur_request_slots.items():
            if cur_request_slots:
                if v[1] == 0:
                    success = False
        if success:
            total_result[prev_dialogue_id]["success"] = 1
        else:
            total_result[prev_dialogue_id]["success"] = 0
            list_fails_e2e.append(prev_dialogue_id)
            # print(row["turn_domain"])
        
        if cur_inform_slots:
            inform_score = 0
            for k, v in cur_inform_slots.items():
                inform_score += v[1]/v[0]
            total_result[prev_dialogue_id]["inform"] = inform_score/len(cur_inform_slots)
        else:
             total_result[prev_dialogue_id]["inform"] = 1
                
        cur_request_slots = {}
        cur_inform_slots = {}
    
    delex_pred = row["delexicalized_preds"]

    #success
    if isinstance(row["gold_turn_bs"], str):
        gold_turn_bs = ast.literal_eval(row["gold_turn_bs"])
    else:
        gold_turn_bs = row["gold_turn_bs"]
    for k, v in gold_turn_bs.items():
        if "request" in k.lower():
            for slot_values in v:
                # print(slot_values[0].lower())
                if slot_values[0].lower() == "ticket":
                    slot = "price"
                elif slot_values[0].lower() == "time":
                    slot = "duration"
                elif slot_values[0].lower() == "fee":
                    slot = "entrance fee"
                elif slot_values[0].lower() == "ref":
                    slot = "reference"
                elif slot_values[0].lower() == "pricerange":
                    slot = "price"
                else:
                    slot = slot_values[0].lower()
                # request_slot = slot_values[0].lower() + "_value"
                request_slot = "value_" + slot
                if request_slot in cur_request_slots:
                    cur_request_slots[request_slot][0] += 1
                else:
                    cur_request_slots[request_slot] = [1, 0]
    
    #match
    if isinstance(row["gold_act"], str):
        gold_act = ast.literal_eval(row["gold_act"])
    else:
        gold_act = row["gold_act"]
    for k, v in gold_act.items():
        if "inform" in k.lower():
            for slot_values in v:
                inform_slot = slot_values[0].lower() + "_value"
                if inform_slot in cur_inform_slots:
                    cur_inform_slots[inform_slot][0] += 1
                else:
                    cur_inform_slots[inform_slot] = [1, 0]

                if inform_slot in delex_pred:
                    cur_inform_slots[inform_slot][1] += 1

    
    for slot in cur_request_slots:
        if slot in delex_pred:
            cur_request_slots[slot][1] += 1

    prev_dialogue_id = cur_dialogue_id
    

In [61]:
set1 = set(list_fails_e2e) - set(list_fails_e2e_agents)

In [62]:
set2 = set(list_fails_e2e_agents) - set(list_fails_e2e)

In [63]:
set2

{'SNG01391.json',
 'SNG01767.json',
 'SNG0296.json',
 'SNG0515.json',
 'SNG0527.json',
 'SNG0568.json',
 'SNG0601.json',
 'SNG0616.json',
 'SNG0690.json',
 'SNG0715.json',
 'SNG0721.json',
 'SNG0742.json',
 'SNG0767.json',
 'SNG0781.json',
 'SNG0797.json',
 'SNG0799.json',
 'SNG0832.json',
 'SNG0840.json',
 'SNG0892.json',
 'SNG0897.json',
 'SNG0940.json',
 'SNG0941.json',
 'SNG0954.json',
 'SNG1147.json'}

In [64]:
dialogue_id = "sng0456"
for idx, row in pptod_e2e[pptod_e2e["dialogue_id"] == dialogue_id].iterrows():
    print(row["resp"])
    print("----")

In [68]:
dialogue_id = "SNG0799.json"
print("domain", list(df_updated_delex[df_updated_delex["dialogue_id"] == dialogue_id]["turn_domain"])[0])
for idx, row in df_updated_delex[df_updated_delex["dialogue_id"] == dialogue_id].iterrows():
    # print(row["dialogue_context"])
    print(row["gold_turn_bs"])
    # print(row["gold_act"])
    print(row["preds"])
    print(row["delexicalized_preds"])
    print("--------")

domain hotel
{'Hotel-Inform': [['Price', 'expensive'], ['Internet', 'yes']]}
How many people will be staying with you?
how many people will be staying with you?
--------
{'Hotel-Inform': [['Type', 'guesthouse']]}
I'm sorry but none of the expensive options offer guesthouses. However, I have found that the Gonville Hotel in the city centre does offer free wifi and is an expensive hotel. Would you like me to book a room there for you?
i'm sorry but [value_internet]ne of the [value_pricerange] options offer [value_type]s. however i have found that the [value_name] in the city [value_area] does offer [value_internet] wifi and is an [value_pricerange] [value_type]. would you like me to book a room there for you?
--------
{'Hotel-Inform': [['Type', 'guesthouse'], ['Price', 'cheap']]}
Yes, we have several options for guesthouses in the cheap price range that offer free wifi. Would you like me to provide you with a list of options?
[value_internet] we have several options for [value_type]s in 

In [66]:
total_count = 0
total_multi = 0
total_single = 0
correct_multi_success = 0
correct_single_success = 0
correct_multi_match = 0
correct_single_match = 0
results = {}
L = len(total_result)
for k, v in total_result.items():
    if not v:
        continue
    if "MUL" in k:
        total_multi += 1
        correct_multi_success += v["success"]
        correct_multi_match += v["inform"]
    else:
        total_single += 1
        correct_single_success += v["success"]
        correct_single_match += v["inform"]
results["success_total"] = (correct_single_success+correct_multi_success) / L
results["success_single"] = correct_single_success / total_single
results["success_multi"] = correct_multi_success / total_multi if total_multi != 0 else 0
results["match_total"] = (correct_single_match+correct_multi_match) / L
results["match_single"] = correct_single_match / total_single
results["match_multi"] = correct_multi_match / total_multi if total_multi != 0 else 0

In [67]:
#Only 1 
results

{'success_total': 0.8161434977578476,
 'success_single': 0.8198198198198198,
 'success_multi': 0,
 'match_total': 0.026905829596412557,
 'match_single': 0.02702702702702703,
 'match_multi': 0}

## BLEU Delex

In [120]:
pptod_e2e = json.load(open("/home/willy/instructod/pptod/E2E_TOD/inference_result/base/full_training/inference_result_e2e_evaluation_inform_89.2_success_79.4_bleu_18.62_combine_score_102.92.json", "r"))
pptod_e2e = pd.DataFrame([dict(item, **{"id":idx}) for idx, item in enumerate(pptod_e2e)])
pptod_e2e = pptod_e2e.rename(columns={"resp_gen":"preds"})
pptod_e2e = pptod_e2e[pptod_e2e["dial_id"].str.contains("sng")]
pptod_e2e = pptod_e2e.rename(columns={"dial_id":"dialogue_id", "turn_num":"turn"})
#Change dialogue id from sng0023 to SNG0023.json
pptod_e2e['dialogue_id'] = pptod_e2e['dialogue_id'].apply(lambda x: x.upper() + ".json")
pptod_e2e['turn_domain'] = pptod_e2e['turn_domain'].apply(lambda x: x[0][1:-1])
pptod_e2e = pd.merge(dataset, pptod_e2e[["dialogue_id", "turn", "id", "preds", "resp"]], on=["dialogue_id", "turn"], how="right")

In [122]:
len(pptod_e2e)

1053

In [139]:
pptod_e2e.keys()

Index(['id_x', 'dialogue_id', 'dialogue_context', 'turn', 'prompt_dst',
       'prompt_dst_update', 'prompt_rg', 'prompt_e2e', 'domains',
       'turn_domain', 'gold_turn_bs', 'gold_bs', 'gold_act', 'gold_response',
       'gold_database_result', 'id_y', 'preds', 'resp'],
      dtype='object')

In [123]:
ontology_path = data_args.mwoz_path + "ontology.json"
delex_dbs = delexicalize_dbs(data_args, ontology_path)
df_updated_delex_e2e = delexicalize(e2e_results, delex_dbs)
# df_updated_delex_e2e = delexicalize(df_updated_delex_e2e, delex_dbs, delex_column="gold_response")

df_updated_delex_e2e_agents = delexicalize(e2e_agent_results, delex_dbs)
# df_updated_delex_e2e_agents = delexicalize(df_updated_delex_e2e_agents, delex_dbs, delex_column="gold_response")

df_updated_delex_rg = delexicalize(rg_results_gpt3, delex_dbs)
# df_updated_delex_rg = delexicalize(df_updated_delex_rg, delex_dbs, delex_column="gold_response")


1053it [00:00, 3845.95it/s]
1022it [00:00, 4632.59it/s]
1032it [00:00, 5437.75it/s]


In [128]:
# preds = list(pptod_e2e["preds"])
# golds = list(pptod_e2e["resp"])

In [160]:
df = pd.merge(pptod_e2e[["turn", "dialogue_id", "resp"]], df_updated_delex_rg,  on=["dialogue_id", "turn"], how="right")
df = df.dropna(subset=["preds"])
len(df)

1032

In [161]:
preds = list(df["delexicalized_preds"])
golds = list(df["resp"])

In [162]:
bleu_scores = []
for pred, gold in zip(preds, golds):
    bleu_score = nltk.translate.bleu_score.sentence_bleu([nltk.word_tokenize(gold)], nltk.word_tokenize(pred), weights=(0.25, 0.25, 0.25, 0.25))
    bleu_scores.append(bleu_score)

The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


In [163]:
sum(bleu_scores) / len(bleu_scores)

0.07787803524296565