In [24]:
import os
import ast
import json
import string
import pandas as pd

from tqdm import tqdm
from langchain import PromptTemplate

from src.DST.evaluate_utils import remapping
from src.DST.dst import SLOTS_DESCRIPTIONS
from src.config import CONFIG

from dataclasses import dataclass, field
from typing import Optional
from transformers import TrainingArguments
from src.DST.evaluate_utils import unpack_belief_states, fix_typos, nested_fix, remapping


pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', 500)



@dataclass
class ModelArguments:
    """
    Arguments pertaining to which model/config/tokenizer we are going to utilize.
    """
    model_name_or_path: Optional[str] = field(
        default=None,
        metadata={"help": "The path of the HuggingFace model."}
    )
    use_int8: Optional[bool] = field(
        default=False,
        metadata={"help": "Whether to use int8 model or not."}
    )
    use_deepspeed: Optional[bool] = field(
        default=False,
        metadata={"help": "Whether to use deepspeed model or not."}
    )
    

@dataclass
class DataArguments:
    """
    Arguments pertaining to the data loading and preprocessing pipeline.
    """
    dataset_name: Optional[str] = field(
        default=None,
        metadata={"help": "Train dataset path"}
    )
    dataset_names: Optional[str] = field(
        default=None,
        metadata={"help": "Train dataset paths"}
    )
    root_data_path: Optional[str] = field(
        default="./data", metadata={"help": "The path to the data directory."},
    )
    mwoz_path: Optional[str] = field(
        default="/home/willy/instructod/MultiWOZ_2.1/",
        metadata={"help": "MWOZ path"}
    )
    dialog_history_limit_dst: Optional[int] = field(
        default=0,
        metadata={"help": "Lenght of dialogue history for dst"}
    )
    dialog_history_limit_dst_recorrect: Optional[int] = field(
        default=0,
        metadata={"help": "Lenght of dialogue history for dst update"}
    )
    dialog_history_limit_rg: Optional[int] = field(
        default=20,
        metadata={"help": "Lenght of dialogue history for response generation"}
    )
    dialog_history_limit_e2e: Optional[int] = field(
        default=20,
        metadata={"help": "Lenght of dialogue history for e2e"}
    )
    single_domain_only: Optional[bool] = field(
        default=False,
        metadata={"help": "Whether to keep only the single domain sample or not"}
    )
    with_slot_description: Optional[bool] = field(
        default=False,
        metadata={"help": "Whether to use slot description or not for DST"}
    )
    with_req_inf_differentiation: Optional[bool] = field(
        default=False,
        metadata={"help": "Whether to differentiate between require and inform slot for DST"}
    )
    with_all_slots: Optional[bool] = field(
        default=True,
        metadata={"help": "Whether to use all slots or not"}
    )
    debug_mode: Optional[bool] = field(
        default=False,
        metadata={"help": "debug mode to only try 20 samples"}
    )
    start_idx: Optional[int] = field(
        default=0,
        metadata={"help": "Starting index to restart the prediction if needed"}
    )
    save_path: Optional[str] = field(
        default="results/",
        metadata={"help": "save path"}
    )
    save_every: Optional[int] = field(
        default=5,
        metadata={"help": "every step to save in case api fail"}
    )
    db_format_type: Optional[str] = field(
        default="1",
        metadata={"help": "1 is more precise, 2 is more concise for db integration"},
    )

@dataclass
class PromptingArguments(TrainingArguments):
    """
    Arguments pertraining to the prompting pipeline.
    """
    output_dir: Optional[str] = field(
        default="./out",
        metadata={"help": "Output directory"},
    )
    task: Optional[str] = field(
        default="dst",
        metadata={"help": "Task to perform"}
    )
    max_requests_per_minute: Optional[int] = field(
        default=20,
        metadata={"help": "Max number of requests for OpenAI API."}
    )
    openai_api_key_name: Optional[str] = field(
        default="OPENAI_API_KEY",
        metadata={"help": "OpenAI API key name."}
    )


data_args = DataArguments()
data_args.dialog_history_limit_e2e = -1
data_args.dialog_history_limit_rg = -1


In [25]:
class PromptConstructor():
    def __init__(self, 
                 config):
        self.config = config
        self.instructions = config["INSTRUCTIONS"]
        self.prompt_templates = config["PROMPT_TEMPLATES"]
        
    def _get_slots_from_domains(self, domains, with_slot_description, with_req_inf_differentiation, with_all_slots):
        # slot_description = self.config["slot_descrpition"]
        if with_all_slots:
            domains = "all"

        if with_slot_description:
            with_req_inf_differentiation = False #Slot description is the discriminator

        if domains == "all":
            if with_req_inf_differentiation:
                req_slots = ", ".join(self.config["multiwoz21"]["all_requestable_slots"])
                inf_slots = ", ".join(self.config["multiwoz21"]["all_informable_slots"])
            else:
                slots = set(self.config["multiwoz21"]["all_requestable_slots"] + 
                            self.config["multiwoz21"]["all_informable_slots"])
                slots = ", ".join(slots)
        elif not isinstance(domains, list):
            raise ValueError("""Provided domain should be either 'all' or list of valid domain names:
                                - for multiwoz2.1 and 2.4: taxi, restaurant, hotel, train, attraction 
                                - for SGD: To-do""")
        else:
            req_slots = ""
            inf_slots = ""
            domain_req_slots = []
            domain_inf_slots = []
            for domain in domains:
                domain_req_slots += self.config["multiwoz21"]["requestable_slots"][domain]
                domain_inf_slots += self.config["multiwoz21"]["informable_slots"][domain]
            if with_req_inf_differentiation:
                domain_req_slots = set(domain_req_slots)
                domain_inf_slots = set(domain_inf_slots)
                req_slots += ", ".join(domain_req_slots)
                inf_slots += ", ".join(domain_inf_slots)
            else:
                slots = set(domain_req_slots + domain_inf_slots)
                slots = ", ".join(slots)

        if with_req_inf_differentiation:
            slots_info = f"Requestable slots: {req_slots}\nInformable slots: {inf_slots}"
        else:
            slots_info = f"{slots}"

        if with_slot_description:
            slots = slots.split(", ")
            slots_info = ""
            for slot in slots:
                if slot not in self.config["multiwoz21"]["all_informable_slots"]:
                    continue
                slots_info += f"name: {slot}, description: {SLOTS_DESCRIPTIONS[slot]}\n"
            slots_info = slots_info[:-2]
        
        return slots_info
    
    
    def _build_prompt(self, mode="", dialogue_context="", ontology="", slots="", dialogue_acts="", belief_states="", database=""):
        prompt = ""
        if mode == "dst":
            instruction = self.instructions["instruction_with_slots"]
            template_variables = self.prompt_templates["template_with_slots"]
            template = PromptTemplate(input_variables= template_variables["input_variables"],
                                      template = template_variables["template"])
            prompt = template.format(instruction=instruction,
                                     slots=slots,
                                     dialogue_context=dialogue_context)
            
        elif mode == "dst_recorrect":
            instruction = self.instructions["instruction_with_slots_recorrect"]
            template_variables = self.prompt_templates["template_with_slots_recorrect"]
            template = PromptTemplate(input_variables= template_variables["input_variables"],
                                      template = template_variables["template"])            
            prompt = template.format(instruction=instruction,
                                    slots=slots,
                                    dialogue_context=dialogue_context,
                                    belief_states=belief_states)
            
        elif mode == "database_query":
            instruction = self.instructions["instruction_query_database"]
            template_variables = self.prompt_templates["template_query_database"]
            template = PromptTemplate(input_variables= template_variables["input_variables"],
                                      template = template_variables["template"])
            prompt = template.format(instruction=instruction,
                                     belief_states=belief_states)
            
        elif mode == "response_generation":
            example = self.config["EXAMPLES"]["response_generation"]
            
            instruction = self.instructions["instruction_response_generation"]
            template_variables = self.prompt_templates["template_response_generation"]
            template = PromptTemplate(input_variables = template_variables["input_variables"],
                                      template = template_variables["template"])
            prompt = template.format(instruction=instruction,
                                     example=example,
                                     dialogue_context=dialogue_context)
        elif mode == "e2e":
            instruction = self.instructions["instruction_e2e"]
            template_variables = self.prompt_templates["template_e2e"]
            template = PromptTemplate(input_variables = template_variables["input_variables"],
                                      template = template_variables["template"])
            prompt = template.format(instruction=instruction,
                                     database=database,
                                     dialogue_context=dialogue_context)

        else:
            raise ValueError("'mode' should be one of: [dst, dst_recorrect, database_query, response_generation, e2e]")
        
        return prompt


class MWOZ_Dataset(PromptConstructor):
    def __init__(self,
                 config,
                 data_args):
        PromptConstructor.__init__(self, config)
        self.dataset = {"id":[],
                        "dialogue_id":[],
                        "dialogue_context":[],
                        "turn":[],
                        "prompt_dst":[],
                        "prompt_dst_update":[],
                        "prompt_rg":[],
                        "prompt_e2e":[],
                        "domains":[],
                        "turn_domain":[],
                        "gold_turn_bs":[],
                        "gold_bs":[],
                        "gold_act":[],
                        "gold_response":[],
                        "gold_database_result":[],
                        }
        
        print("Loading data...")
        self.all_data, self.testfiles, self.system_acts = self._get_mwoz_data(data_args.mwoz_path)
        print("Loading databases...")
        self.dbs_lexicalized = self._get_dbs_lexicalized(data_args.mwoz_path)
        self.idx = 0
        self.dialog_history_limit_dst = data_args.dialog_history_limit_dst
        self.dialog_history_limit_rg = data_args.dialog_history_limit_rg
        self.dialog_history_limit_e2e = data_args.dialog_history_limit_e2e
        self.single_domain_only = data_args.single_domain_only
        self.with_slot_description = data_args.with_slot_description
        self.with_req_inf_differentiation = data_args.with_req_inf_differentiation
        self.with_all_slots = data_args.with_all_slots
        self.all_domains = ["restaurant", "taxi", "hotel", "train", "attraction"]

        print("Processing mwoz...")
        for sample in tqdm(self.all_data):
            if sample in self.testfiles:
                dialogue_log = self.all_data[sample]["log"]
                self._process_dialogue_log(sample=sample,
                                           dialogue_log=dialogue_log)

        self.dataset = pd.DataFrame(self.dataset)
        if self.single_domain_only:
            for index, row in tqdm(self.dataset.iterrows()):
                if len(row["domains"]) != 1:
                    self.dataset.drop(index, inplace=True)

                    
    def _get_mwoz_data(self, mwoz_path):
        data_path = os.path.join(mwoz_path, "data.json")
        testListFile_path = os.path.join(mwoz_path, "testListFile.txt")
        system_acts_path = os.path.join(mwoz_path, "system_acts.json")

        with open(data_path, "r") as f:
            all_data = json.load(f)
            
        with open(testListFile_path, "r") as f:
            testfiles = f.read()
        testfiles = testfiles.split("\n")
        
        with open(system_acts_path, "r") as f:
            system_acts = json.load(f)
            
        return all_data, testfiles, system_acts
    
    def _get_dbs_lexicalized(self, mwoz_path):
        domains = ["restaurant", "hotel", "train", "attraction"]
        keep_data = {"restaurant":["address", "area", "food", "name", "pricerange", "phone", "postcode"],
                    "attraction":["name", "area", "address", "type", "postcode"],
                    "hotel":["name", "address", "area", "phone", "postcode", "pricerange", "stars"],
                    "train":["departure", "destination"]}
        dbs_lexicalized = {}
        for domain in domains:
            db_path = os.path.join(mwoz_path, f"{domain}_db.json")
            with open(db_path, "r") as f:
                db_data = json.load(f)

            db_lexicalized = []
            for row in db_data:
                row_keep = []
                for key in keep_data[domain]:
                        if key in row:
                            row_keep.append(f"{key}: {row[key]}")
                db_lexicalized.append(", ".join(row_keep))
            dbs_lexicalized[domain] = "\n".join(set(db_lexicalized))

        return dbs_lexicalized
    
    def _process_dialogue_log(self, sample, dialogue_log):

        dialog_history_memory_dst = []
        dialog_history_memory_rg = []
        dialog_history_memory_e2e = []
        dialog_history_dst = ""
        dialog_history_rg = ""
        dialog_history_e2e = ""
        turn_domain = ""
        domains = self._get_domains_from_log(dialogue_log)
        slots = self._get_slots_from_domains(domains, 
                                             self.with_slot_description,
                                             self.with_req_inf_differentiation,
                                             self.with_all_slots) # or all

        for turn_nb, turn in enumerate(dialogue_log):

            if turn_nb % 2 == 0:
                speaker = "USER"
            else:
                speaker = "SYSTEM"
            
            utterance = f"""{speaker}: {turn["text"]}\n"""
            dialog_act = turn["dialog_act"]
            
            dialogue_context_dst = dialog_history_dst + utterance
            prompt_dst = self._build_prompt(mode="dst",
                                            slots=slots,
                                            dialogue_context=dialogue_context_dst)
            
            lexicalized_act = self._lexicalize_act(dialog_act)
            dialogue_context_rg = dialog_history_rg + utterance + f"ACT:{lexicalized_act}\nSYSTEM:"
            prompt_rg = self._build_prompt(mode="response_generation",
                                            dialogue_context=dialogue_context_rg)
            
            dialogue_context_e2e = dialog_history_e2e + utterance + "SYSTEM:"
            # need to have utterance level domain here
            cur_system_act = self.system_acts[sample.split(".")[0]][str((turn_nb//2)+1)]
            turn_domain = self._get_domain_from_turn(turn_domain, cur_system_act)
            if turn_domain and turn_domain != "taxi":
                database = self.dbs_lexicalized[turn_domain]
            else:
                database = ""
            prompt_e2e = self._build_prompt(mode="e2e",
                                            database=database,
                                            dialogue_context=dialogue_context_e2e)

            dialog_history_dst, dialog_history_memory_dst = self._update_dialogue_memory(utterance, 
                                                                                         dialogue_log, 
                                                                                         self.dialog_history_limit_dst, 
                                                                                         dialog_history_memory_dst)
            dialog_history_rg, dialog_history_memory_rg = self._update_dialogue_memory(utterance, 
                                                                                       dialogue_log, 
                                                                                       self.dialog_history_limit_rg,
                                                                                       dialog_history_memory_rg)
            dialog_history_e2e, dialog_history_memory_e2e = self._update_dialogue_memory(utterance, 
                                                                                         dialogue_log, 
                                                                                         self.dialog_history_limit_e2e, 
                                                                                         dialog_history_memory_e2e) 
                
            metadata = turn["metadata"]
            bspn_dict = {}
            if metadata:
                for domain in metadata:
                    slot_values = metadata[domain]["semi"]
                    for slot in slot_values:
                        value = slot_values[slot]
                        if value and value not in ["not mentioned", "none"]:
                            if domain in bspn_dict:
                                bspn_dict[domain].append(remapping(slot))
                                bspn_dict[domain].append(remapping(value))
                            else:
                                bspn_dict[domain] = [remapping(slot), remapping(value)]
                bspn = " ".join([f"[{domain}] {' '.join(bspn_dict[domain])}" for domain in bspn_dict])

            self.idx += 1
            if turn_nb % 2 == 0:
                self.dataset["gold_turn_bs"].append(dialog_act)
                self.dataset["dialogue_context"].append(dialogue_context_dst)
                self.dataset["gold_database_result"].append(None) 
                self.dataset["turn"].append(turn_nb//2)
                self.dataset["domains"].append(domains)
                self.dataset["id"].append(self.idx//2)
                self.dataset["dialogue_id"].append(sample)
                self.dataset["prompt_dst"].append(prompt_dst)
                self.dataset["prompt_dst_update"].append(prompt_dst)
                self.dataset["prompt_rg"].append(prompt_rg)
                self.dataset["prompt_e2e"].append(prompt_e2e)
                self.dataset["turn_domain"].append(turn_domain)
            else:
                self.dataset["gold_response"].append(utterance)
                self.dataset["gold_bs"].append(bspn)
                self.dataset["gold_act"].append(dialog_act)

    def _update_dialogue_memory(self, utterance, dialogue_log, dialog_history_limit, dialog_history_memory):
        if dialog_history_limit != 0:
            if dialog_history_limit == -1:
                dialog_history_limit = len(dialogue_log)
            if len(dialog_history_memory) >= dialog_history_limit:
                dialog_history_memory.pop(0)
            dialog_history_memory.append(utterance)

        dialog_history = "".join(dialog_history_memory)
        return dialog_history, dialog_history_memory
    
    def _lexicalize_act(self, act):
        lexicalized_acts = []
        lexicalize_mapping = {"leave": "leave time",
                              "arrive":"arrival time",
                              "departure":"departure place",
                              "post":"postcode",
                              "addr":"address"}

        for act, slot_values in act.items():


            if "request" in act.lower():
                requests = []
                for (slot, value) in slot_values:
                    slot = slot.lower()
                    if slot in lexicalize_mapping:
                        slot = lexicalize_mapping[slot]
                    if slot == "none":
                        break
                    else:
                        requests.append(slot)
                if requests:
                    lexicalized_act = "Request the user about " + ", ".join(requests) + "."
                    lexicalized_acts.append(lexicalized_act)

            elif "recommend" in act.lower():
                recommends = []
                for (slot, value) in slot_values:
                    slot, value = slot.lower(), value.lower()
                    if slot in lexicalize_mapping:
                        slot = lexicalize_mapping[slot]
                    if slot == "none":
                        break
                    else:
                        recommends.append(value)
                if recommends:
                    lexicalized_act = "Recommend the user for " + ", ".join(recommends) + "."
                    lexicalized_acts.append(lexicalized_act)

            elif "inform" in act.lower():
                informs = []
                for (slot, value) in slot_values:
                    slot, value = slot.lower(), value.lower()
                    if slot in lexicalize_mapping:
                        slot = lexicalize_mapping[slot]
                    if slot == "none":
                        break
                    else:
                        informs.append(f"the {slot} is {value}")
                if informs:
                    lexicalized_act = "Inform the user that " + ", ".join(informs) + "."  
                    lexicalized_acts.append(lexicalized_act)

            else:
                pass
        if lexicalized_acts:
            return " ".join(lexicalized_acts)
        else:
            return "None"
        
    def _get_domain_from_turn(self, domain, act):
        for k in act:
            turn_domain = k.lower().split("-")[0]
            if turn_domain in self.all_domains:
                return turn_domain
        return domain
            

    def _get_domains_from_log(self, dialogue_log):
        domains = []
        for log in dialogue_log:
            for domain_act in log["dialog_act"]:
                domain = domain_act.split("-")[0].lower()
                if domain in self.all_domains and domain not in domains:
                    domains.append(domain)
        return domains
                

In [26]:
mwoz = MWOZ_Dataset(CONFIG, data_args)
dataset = mwoz.dataset

Loading data...
Loading databases...
Processing mwoz...


100%|██████████| 10438/10438 [00:03<00:00, 3346.69it/s]


In [27]:
df_results = pd.read_csv("/home/willy/instructod/src/DST/results_single/gpt-3.5-turbo_0-end_singleDomainOnlyTrue_withSlotDescriptionFalse_withSlotDifferentiationFalse_dialogHistoryLimit0_prompt3.csv")
df_results = df_results.rename(columns={'gold_bs':'gold_turn_bs'})
df_results = df_results.merge(dataset[['id', 'turn_domain', 'gold_bs']], on='id', how='left')
df_results.shape

(1059, 16)

In [28]:
import copy

def add_running_accumulated_bs_column(df, mode = 'preds', new_column_suffix=''):

    running_bs_list = []
    new_turn_domains = []
    turn_domains = df['turn_domain']
    dialogue_ids = df['dialogue_id']
    column_name = 'preds' if mode == 'preds' else 'gold_turn_bs'
    if 'gold' in mode:
        mode = 'gold'
    items = df[column_name]
    for i, item in enumerate(items):
        
        # bug correction, take next turn domain when it's not available
        turn_domain = turn_domains[i] if turn_domains[i] != '' else turn_domains[i+1]

        if i == 0:
            running_bs = {}
            running_bs[turn_domain] = {}
        elif dialogue_ids[i] == dialogue_ids[i-1]:
            running_bs = copy.deepcopy(running_bs_list[i-1])
        else:
            running_bs = {}
            running_bs[turn_domain] = {}
            
        if mode == 'preds':
            unpacked_item = unpack_belief_states(item, 'pred')
            if unpacked_item != ['none-none']:
                item_dict = ast.literal_eval(item) 
                if turn_domain not in list(running_bs.keys()):
                    running_bs[turn_domain] = {}
                for item_slot in item_dict.keys():
                    running_bs[turn_domain][item_slot] = item_dict[item_slot]
        elif mode == 'gold':
            unpacked_item = unpack_belief_states(item, 'gold')
            if unpacked_item != ['none-none']:
                item_dict = ast.literal_eval(item) if type(item) != type({}) else item
                item_dict = {items[0]:items[1] for items in list(item_dict.values())[0]}
                print(item_dict)
                if turn_domain not in list(running_bs.keys()):
                    running_bs[turn_domain] = {}
                for item_slot in item_dict.keys():
                    print(item_dict[item_slot])
                    running_bs[turn_domain][item_slot] = item_dict[item_slot]

        running_bs_list.append(running_bs)
        new_turn_domains.append(turn_domain)

    df[mode+'_bs'+new_column_suffix] = running_bs_list
    df['turn_domain'] = new_turn_domains

In [29]:
add_running_accumulated_bs_column(df_results, mode = 'preds')
add_running_accumulated_bs_column(df_results, mode = 'golds', new_column_suffix='_new')

No belief state: as there is no mention of any slot in the last turn of the conversation, the belief state will be empty. here's the belief state in json format:

{}
No belief state: as there is no user input in the last turn, the belief state remains the same as the previous turn. without any context of the previous turn, it is impossible to generate the belief state.
No belief state: based on the last turn of the conversation, there is no new ?ation provided by the user to update the belief state. therefore, the belief state remains the same as the previous turn. without additional context, it is not possible to generate the belief state.
No belief state: as there is no context provided, i cannot generate the belief state of the last dialogue turn. please provide me with the previous conversation turns so that i can generate the belief state.
No belief state: as there is no context provided, i cannot generate the belief state for the last dialogue turn. please provide me with the pre

In [30]:
df_results[['dialogue_id', 'turn_domain', 'preds', 'preds_bs', 'gold_bs', 'gold_bs_new']].iloc[390:400]

Unnamed: 0,dialogue_id,turn_domain,preds,preds_bs,gold_bs,gold_bs_new
390,SNG0459.json,restaurant,{'time': '19:00'},"{'restaurant': {'area': 'centre', 'pricerange': 'moderate', 'people': '4', 'time': '19:00', 'day': 'Wednesday'}}",[restaurant] price moderate name yipee noodle bar area centre,"{'restaurant': {'Price': 'moderate', 'Area': 'centre', 'Time': '19:00', 'Day': 'wednesday', 'People': '4'}}"
391,SNG0459.json,restaurant,"{'food': None, 'address': None, 'reference': None, 'name': None, 'area': None, 'day': None, 'postcode': None, 'time': None, 'phone': None, 'people': None, 'pricerange': None}","{'restaurant': {'area': 'centre', 'pricerange': 'moderate', 'people': '4', 'time': '19:00', 'day': 'Wednesday'}}",[restaurant] price moderate name yipee noodle bar area centre,"{'restaurant': {'Price': 'moderate', 'Area': 'centre', 'Time': '19:00', 'Day': 'wednesday', 'People': '4', 'none': 'none'}}"
392,SNG0459.json,restaurant,"{'food': None, 'address': None, 'reference': None, 'name': None, 'area': None, 'day': None, 'postcode': None, 'time': None, 'phone': None, 'people': None, 'pricerange': None}","{'restaurant': {'area': 'centre', 'pricerange': 'moderate', 'people': '4', 'time': '19:00', 'day': 'Wednesday'}}",[restaurant] price moderate name yipee noodle bar area centre,"{'restaurant': {'Price': 'moderate', 'Area': 'centre', 'Time': '19:00', 'Day': 'wednesday', 'People': '4', 'none': 'none'}}"
393,SNG0897.json,hotel,{'pricerange': 'moderate'},{'hotel': {'pricerange': 'moderate'}},[hotel] price moderate,{'hotel': {'Price': 'moderate'}}
394,SNG0897.json,hotel,"{'area': None, 'pricerange': 'moderate', 'internet': 'free'}","{'hotel': {'pricerange': 'moderate', 'area': None, 'internet': 'free'}}",[hotel] area dontcare price moderate internet yes,"{'hotel': {'Price': 'moderate', 'Internet': 'yes', 'Area': 'do nt care'}}"
395,SNG0897.json,hotel,"{'stars': '4', 'parking': 'free'}","{'hotel': {'pricerange': 'moderate', 'area': None, 'internet': 'free', 'stars': '4', 'parking': 'free'}}",[hotel] area dontcare parking yes price moderate stars 4 internet yes,"{'hotel': {'Price': 'moderate', 'Internet': 'yes', 'Area': 'do nt care', 'Parking': 'yes', 'Stars': '4'}}"
396,SNG0897.json,hotel,"{'phone': 'requested', 'area': 'requested'}","{'hotel': {'pricerange': 'moderate', 'area': 'requested', 'internet': 'free', 'stars': '4', 'parking': 'free', 'phone': 'requested'}}",[hotel] area dontcare parking yes price moderate stars 4 internet yes,"{'hotel': {'Price': 'moderate', 'Internet': 'yes', 'Area': '?', 'Parking': 'yes', 'Stars': '4', 'Phone': '?'}}"
397,SNG0897.json,hotel,"{'stay': None, 'address': None, 'parking': None, 'reference': None, 'name': None, 'area': None, 'internet': None, 'postcode': None, 'stars': None, 'day': None, 'phone': None, 'people': None, 'pricerange': None, 'type': None}","{'hotel': {'pricerange': 'moderate', 'area': 'requested', 'internet': 'free', 'stars': '4', 'parking': 'free', 'phone': 'requested'}}",[hotel] area dontcare parking yes price moderate stars 4 internet yes,"{'hotel': {'Price': 'moderate', 'Internet': 'yes', 'Area': '?', 'Parking': 'yes', 'Stars': '4', 'Phone': '?', 'none': 'none'}}"
398,SNG01943.json,hotel,"{'pricerange': 'expensive', 'type': 'guesthouse'}","{'hotel': {'pricerange': 'expensive', 'type': 'guesthouse'}}",[hotel] price expensive type guesthouse,"{'hotel': {'Type': 'guesthouse', 'Price': 'expensive'}}"
399,SNG01943.json,hotel,{'parking': 'free'},"{'hotel': {'pricerange': 'expensive', 'type': 'guesthouse', 'parking': 'free'}}",[hotel] parking yes price expensive type guesthouse,"{'hotel': {'Type': 'guesthouse', 'Price': 'expensive'}}"


## Multidomain

In [75]:
df_results_multidomain_gpt4 = pd.read_csv("/home/willy/instructod/src/DST/results_single/gpt-4_0-end_debugFalse_singleDomainOnlyTrue_withSlotDescriptionFalse_withSlotDifferentiationFalse_withAllSlotsTrue_dialogHistoryLimit0_prompt3.csv")
df_results_multidomain_gpt4 = df_results_multidomain_gpt4.merge(dataset[['id', 'turn_domain', 'gold_bs', 'gold_turn_bs']], on='id', how='left')

df_results_multidomain = pd.read_csv("/home/willy/instructod/src/DST/results_single/gpt-3.5-turbo_0-end_singleDomainOnlyTrue_withSlotDescriptionFalse_withSlotDifferentiationFalse_dialogHistoryLimit0_prompt3.csv")
# df_results_multidomain = df_results_multidomain.merge(dataset[['id', 'dialogue_id', 'turn_domain', 'gold_bs', 'gold_turn_bs']], on='id', how='left')
df_results_multidomain = df_results_multidomain.merge(dataset[['id', 'turn_domain', 'gold_bs', 'gold_turn_bs']], on='id', how='left')


test_df = pd.read_csv("/home/willy/instructod/src/DST/results_single/gpt-3.5-turbo_0-end_recorrect_singleDomainOnlyTrue_withSlotDescriptionFalse_withSlotDifferentiationFalse_withAllSlotsTrue_dialogHistoryLimit0_prompt3.csv")
df_results = dataset.merge(test_df[['id', 'correct_preds']], on='id', how='right')
df_results_multidomain = df_results.rename(columns={"correct_preds":"preds"})

In [76]:
add_running_accumulated_bs_column(df_results_multidomain, mode = 'golds', new_column_suffix='_new')
add_running_accumulated_bs_column(df_results_multidomain, mode = 'preds')

{'Dest': 'pizza hut fen ditton', 'Depart': "saint john 's college"}
pizza hut fen ditton
saint john 's college
{'Leave': '17:15'}
17:15
{'none': 'none'}
none
{'none': 'none'}
none
{'Food': 'portuguese'}
portuguese
{'Food': 'turkish', 'Price': 'moderate'}
turkish
moderate
{'Price': 'moderate'}
moderate
{'Time': '14:00', 'Day': 'monday', 'People': '1'}
14:00
monday
1
{'none': 'none'}
none
{'Type': 'guesthouse', 'Price': 'moderate'}
guesthouse
moderate
{'Stars': '3'}
3
{'Stay': '2', 'Day': 'saturday', 'People': '4'}
2
saturday
4
{'Stay': '1'}
1
{'none': 'none'}
none
{'Name': 'city centre north b and b'}
city centre north b and b
{'Stay': '5', 'Day': 'friday', 'People': '1'}
5
friday
1
{'none': 'none'}
none
{'Food': 'british'}
british
{'Area': 'west'}
west
{'Name': 'graffiti'}
graffiti
{'none': 'none'}
none
{'Dest': 'birmingham new street', 'Depart': 'cambridge'}
birmingham new street
cambridge
{'Day': 'friday', 'Leave': '08:30'}
friday
08:30
{'Time': '?', 'Ticket': '?'}
?
?
{'none': 'none

In [77]:
add_running_accumulated_bs_column(df_results_multidomain_gpt4, mode = 'preds')
add_running_accumulated_bs_column(df_results_multidomain_gpt4, mode = 'golds', new_column_suffix='_new')

No belief state: since there is no ?ation about any slots in the given context, the belief state would be empty. here is the json format for the empty belief state:

{}
No belief state: {}

since there is no ?ation about any slots in the last dialogue turn, the belief state is an empty dictionary.
No belief state: {}

since there is no ?ation about any slots in the last dialogue turn, the belief state is an empty dictionary.
No belief state: {}

since there is no ?ation about any slots in the last dialogue turn, the belief state is an empty dictionary.
No belief state: {}

since there is no ?ation about any slots in the given dialogue turn, the belief state is an empty dictionary.
No belief state: {}

since the user is just saying goodbye and not providing any ?ation related to the slots, the belief state is empty.
No belief state: {}

since there is no specific ?ation about any slots in the last dialogue turn, the belief state is an empty json object.
No belief state: {}

since the la

In [78]:
df_results_multidomain[['dialogue_id', 'turn_domain', 'preds', 'preds_bs', 'gold_bs', 'gold_turn_bs']].iloc[390:400]

Unnamed: 0,dialogue_id,turn_domain,preds,preds_bs,gold_bs,gold_turn_bs
390,SNG0459.json,restaurant,{'time': '19:00'},"{'restaurant': {'pricerange': 'modest', 'area': 'center', 'people': '4', 'time': '19:00', 'day': 'Wednesday', 'type': 'table reservation'}}",[restaurant] price moderate name yipee noodle bar area centre,"{'Restaurant-Inform': [['Time', '19:00']]}"
391,SNG0459.json,restaurant,"{'stars': 'none', 'departure': 'none', 'arriveby': 'none', 'price': 'none', 'address': 'none', 'destination': 'none', 'day': 'none', 'stay': 'none', 'department': 'none', 'name': 'none', 'phone': 'none', 'reference': 'none', 'internet': 'none', 'area': 'none', 'car': 'none', 'parking': 'none', 'pricerange': 'none', 'postcode': 'none', 'leaveat': 'none', 'time': 'none', 'food': 'none', 'people': 'none', 'id': 'none', 'type': 'none'}","{'restaurant': {'pricerange': 'none', 'area': 'none', 'people': 'none', 'time': 'none', 'day': 'none', 'type': 'none', 'stars': 'none', 'departure': 'none', 'arriveby': 'none', 'price': 'none', 'address': 'none', 'destination': 'none', 'stay': 'none', 'department': 'none', 'name': 'none', 'phone': 'none', 'reference': 'none', 'internet': 'none', 'car': 'none', 'parking': 'none', 'postcode': 'none', 'leaveat': 'none', 'food': 'none', 'id': 'none'}}",[restaurant] price moderate name yipee noodle bar area centre,"{'general-thank': [['none', 'none']]}"
392,SNG0459.json,restaurant,{'end': 'true'},"{'restaurant': {'pricerange': 'none', 'area': 'none', 'people': 'none', 'time': 'none', 'day': 'none', 'type': 'none', 'stars': 'none', 'departure': 'none', 'arriveby': 'none', 'price': 'none', 'address': 'none', 'destination': 'none', 'stay': 'none', 'department': 'none', 'name': 'none', 'phone': 'none', 'reference': 'none', 'internet': 'none', 'car': 'none', 'parking': 'none', 'postcode': 'none', 'leaveat': 'none', 'food': 'none', 'id': 'none', 'end': 'true'}}",[restaurant] price moderate name yipee noodle bar area centre,"{'general-thank': [['none', 'none']]}"
393,SNG0897.json,hotel,"{'pricerange': 'moderate', 'stay': 'hotel'}","{'hotel': {'pricerange': 'moderate', 'stay': 'hotel'}}",[hotel] price moderate,"{'Hotel-Inform': [['Price', 'moderate']]}"
394,SNG0897.json,hotel,"{'pricerange': 'moderate', 'internet': 'yes'}","{'hotel': {'pricerange': 'moderate', 'stay': 'hotel', 'internet': 'yes'}}",[hotel] area dontcare price moderate internet yes,"{'Hotel-Inform': [['Internet', 'yes'], ['Area', 'do nt care']], 'Hotel-Request': [['Area', '?']]}"
395,SNG0897.json,hotel,"{'stars': '4', 'car': 'free parking'}","{'hotel': {'pricerange': 'moderate', 'stay': 'hotel', 'internet': 'yes', 'stars': '4', 'car': 'free parking'}}",[hotel] area dontcare parking yes price moderate stars 4 internet yes,"{'Hotel-Inform': [['Parking', 'yes'], ['Stars', '4']]}"
396,SNG0897.json,hotel,"{'phone': 'value', 'area': 'value'}","{'hotel': {'pricerange': 'moderate', 'stay': 'hotel', 'internet': 'yes', 'stars': '4', 'car': 'free parking', 'phone': 'value', 'area': 'value'}}",[hotel] area dontcare parking yes price moderate stars 4 internet yes,"{'Hotel-Request': [['Area', '?'], ['Phone', '?']]}"
397,SNG0897.json,hotel,"{'departure': 'none', 'arriveby': 'none', 'price': 'none', 'address': 'none', 'destination': 'none', 'day': 'none', 'stay': 'none', 'department': 'none', 'name': 'none', 'phone': 'none', 'reference': 'none', 'internet': 'none', 'area': 'none', 'car': 'none', 'parking': 'none', 'pricerange': 'none', 'postcode': 'none', 'leaveat': 'none', 'time': 'none', 'food': 'none', 'people': 'none', 'id': 'none', 'type': 'none', 'stars': 'none'}","{'hotel': {'pricerange': 'none', 'stay': 'none', 'internet': 'none', 'stars': 'none', 'car': 'none', 'phone': 'none', 'area': 'none', 'departure': 'none', 'arriveby': 'none', 'price': 'none', 'address': 'none', 'destination': 'none', 'day': 'none', 'department': 'none', 'name': 'none', 'reference': 'none', 'parking': 'none', 'postcode': 'none', 'leaveat': 'none', 'time': 'none', 'food': 'none', 'people': 'none', 'id': 'none', 'type': 'none'}}",[hotel] area dontcare parking yes price moderate stars 4 internet yes,"{'general-bye': [['none', 'none']]}"
398,SNG01943.json,hotel,"{'pricerange': 'expensive', 'hoteltype': 'guesthouse'}","{'hotel': {'pricerange': 'expensive', 'hoteltype': 'guesthouse'}}",[hotel] price expensive type guesthouse,"{'Hotel-Inform': [['Type', 'guesthouse'], ['Price', 'expensive']]}"
399,SNG01943.json,hotel,{'parking': 'free parking'},"{'hotel': {'pricerange': 'expensive', 'hoteltype': 'guesthouse', 'parking': 'free parking'}}",[hotel] parking yes price expensive type guesthouse,{}


In [79]:
import string

def drop_empty_keys(dictionary):
    if isinstance(dictionary, dict):
        return {key: drop_empty_keys(value) for key, value in dictionary.items() if value}
    else:
        return dictionary

def change_keys(dictionary, key_mapping):
    if isinstance(dictionary, dict):
        new_dict = {}
        for key, value in dictionary.items():
            if key == "type" or key == "car" or value == "?":
                continue             
            new_key = key_mapping(key)
            new_dict[new_key] = change_keys(value, key_mapping)
        return new_dict
    else:
        return dictionary

def nested_fix(d, fix):
    if not d or isinstance(d, bool):
        return ""
    elif isinstance(d, dict):  # if dict, apply to each key
        return {k.lower(): nested_fix(v, fix) for k, v in d.items()}
    elif isinstance(d, list):  # if list, apply to each element
        return [nested_fix(elem, fix) for elem in d]
    else:
        return fix(str(d))


def full_fix(d):
    d = nested_fix(d, fix_typos)
    d = drop_empty_keys(d)
    d = change_keys(d, remapping)
    return d

In [80]:
idx = 3

pred_bs = df_results_multidomain["preds_bs"][idx]
print(pred_bs)

gold_bs = df_results_multidomain["gold_bs_new"][idx]
print(gold_bs)
print("======")

pred_bs = drop_empty_keys(pred_bs)
pred_bs = change_keys(pred_bs, remapping)
pred_bs = nested_fix(pred_bs, fix_typos)
print(pred_bs)

gold_bs = drop_empty_keys(gold_bs)
gold_bs = change_keys(gold_bs, remapping)
gold_bs = nested_fix(gold_bs, fix_typos)
print(gold_bs)

{'taxi': {'departure': 'none', 'destination': 'none', 'time': 'none', 'stars': 'none', 'arriveby': 'none', 'price': 'none', 'address': 'none', 'day': 'none', 'stay': 'none', 'department': 'none', 'name': 'none', 'phone': 'none', 'reference': 'none', 'internet': 'none', 'area': 'none', 'car': 'none', 'parking': 'none', 'pricerange': 'none', 'postcode': 'none', 'leaveat': 'none', 'food': 'none', 'people': 'none', 'id': 'none', 'type': 'none', 'end': 'true'}}
{'taxi': {'Dest': 'pizza hut fen ditton', 'Depart': "saint john 's college", 'Leave': '17:15', 'none': 'none'}}
{'taxi': {'depart': '', 'dest': '', 'time': '', 'stars': '', 'arrive': '', 'price': '', 'addr': '', 'day': '', 'stay': '', 'department': '', 'name': '', 'phone': '', 'ref': '', 'internet': '', 'area': '', 'parking': '', 'post': '', 'leave': '', 'food': '', 'people': '', 'id': '', 'end': 'true'}}
{'taxi': {'dest': 'pizza hut fen ditton', 'depart': "saint john's college", 'leave': '17:15', '': ''}}


In [81]:
def JGA(df_result):
    L = len(df_result)
    bf_match = 0
    correct_slots = 0
    total_slots = 0
    total_F1 = 0
    results_per_domain = {"taxi":{"total_bf_match":0,
                                  "total_f1":0,
                                  "total_samples":0},
                          "attraction":{"total_bf_match":0,
                                        "total_f1":0,
                                        "total_samples":0},
                          "hotel":{"total_bf_match":0,
                                   "total_f1":0,
                                   "total_samples":0},
                          "restaurant":{"total_bf_match":0,
                                        "total_f1":0,
                                        "total_samples":0},
                          "train":{"total_bf_match":0,
                                   "total_f1":0,
                                   "total_samples":0},
                          "":{"total_bf_match":0,
                              "total_f1":0,
                              "total_samples":0}}
    for idx in range(L):
        turn_domain = df_result["turn_domain"][idx]

        pred_bs = df_result["preds_bs"][idx]
        # print(pred_bs)

        gold_bs = df_result["gold_bs_new"][idx]
        # print(gold_bs)
        # print("======")
        corrected_pred_bs = full_fix(pred_bs)
        corrected_gold_bs = full_fix(gold_bs)

        # slot-f1
        # turn_correct = 0
        turn_TP = 0
        turn_FN = 0
        turn_FP = 0
        turn_total = 0
        for k, v in corrected_gold_bs.items():
            if isinstance(v, dict):
                for k1, v1 in v.items():
                    turn_total += 1
                    try:
                        #if correct in pred
                        if corrected_pred_bs[k][k1] == v1:
                            # turn_correct += 1
                            turn_TP += 1
                        else:
                            turn_FN += 1
                    except:
                        turn_FN += 1

        for k, v in corrected_pred_bs.items():
            if isinstance(v, dict):
                for k1, v1 in v.items():
                    turn_total += 1
                    try:
                        #if correct in pred
                        if corrected_gold_bs[k][k1] != v1:
                            # turn_correct += 1
                            turn_FP += 1
                    except:
                        turn_FP += 1

        total_slots += turn_total
        # correct_slots += turn_correct
        turn_precision = turn_TP / float(turn_TP+turn_FP) if (turn_TP+turn_FP)!=0 else 0
        turn_recall = turn_TP / float(turn_TP+turn_FN) if (turn_TP+turn_FN)!=0 else 0
        turn_F1 = 2 * turn_precision * turn_recall / float(turn_precision + turn_recall) if (turn_precision+turn_recall)!=0 else 0

        total_F1 += turn_F1

        results_per_domain[turn_domain]["total_f1"] += turn_F1
        results_per_domain[turn_domain]["total_samples"] += 1

        if corrected_pred_bs == corrected_gold_bs:
            bf_match += 1
            results_per_domain[turn_domain]["total_bf_match"] += 1
            if turn_domain == "attraction":
                print("JGA 1")
                print(turn_F1)
                print(gold_bs)
                print(pred_bs)
                print("gold", corrected_gold_bs)
                print("pred", corrected_pred_bs)
                print("---------")
        else:
            if turn_domain == "attraction":
                print("JGA 0")
                print(turn_F1)
                print(gold_bs)
                print(pred_bs)
                print("gold", corrected_gold_bs)
                print("pred", corrected_pred_bs)
                print("---------")

    print(f"Total JGA: {bf_match/L}")
    print(f"Total F1: {total_F1/L}")
    print("----")
    for k, v in results_per_domain.items():
        if k == "":
            continue
        print(f"Domain: {k}")
        print(f"F1: {v['total_f1']/v['total_samples']}")
        print(f"JGA: {v['total_bf_match']/v['total_samples']}")            
            
    return results_per_domain

In [82]:
JGA(df_results_multidomain)

JGA 0
0.6666666666666666
{'attraction': {'Area': 'east', 'Type': 'entertainment'}}
{'attraction': {'area': 'east', 'department': 'entertainment'}}
gold {'attraction': {'area': 'east'}}
pred {'attraction': {'area': 'east', 'department': 'entertainment'}}
---------
JGA 0
0.4
{'attraction': {'Area': 'east', 'Type': 'entertainment', 'Addr': '?', 'Post': '?'}}
{'attraction': {'area': 'east', 'department': 'entertainment', 'address': '123 Main St', 'postcode': '12345'}}
gold {'attraction': {'area': 'east'}}
pred {'attraction': {'area': 'east', 'department': 'entertainment', 'addr': '123 main st', 'post': '12345'}}
---------
JGA 0
0.2857142857142857
{'attraction': {'Area': 'east', 'Type': 'entertainment', 'Addr': '?', 'Post': '?', 'Fee': '?'}}
{'attraction': {'area': 'east', 'department': 'entertainment', 'address': '123 Main St', 'postcode': '12345', 'task': 'ask_info', 'requested_slot': 'price'}}
gold {'attraction': {'area': 'east'}}
pred {'attraction': {'area': 'east', 'department': 'enter

{'taxi': {'total_bf_match': 24,
  'total_f1': 91.55634920634922,
  'total_samples': 189},
 'attraction': {'total_bf_match': 6,
  'total_f1': 12.538095238095236,
  'total_samples': 40},
 'hotel': {'total_bf_match': 38,
  'total_f1': 190.4312642912645,
  'total_samples': 373},
 'restaurant': {'total_bf_match': 42,
  'total_f1': 167.7045165945166,
  'total_samples': 291},
 'train': {'total_bf_match': 33,
  'total_f1': 91.72229437229437,
  'total_samples': 160},
 '': {'total_bf_match': 1, 'total_f1': 2.333333333333333, 'total_samples': 6}}

In [17]:
JGA(df_results_multidomain_gpt4)

Total JGA: 0.43909348441926344
Total F1: 0.7866465870715139
----
Domain: taxi
F1: 0.6719072814310918
JGA: 0.3386243386243386
Domain: attraction
F1: 0.5833333333333333
JGA: 0.45
Domain: hotel
F1: 0.8116499675480917
JGA: 0.4155495978552279
Domain: restaurant
F1: 0.8619293840943326
JGA: 0.5326460481099656
Domain: train
F1: 0.7823002344877344
JGA: 0.43125


{'taxi': {'total_bf_match': 64,
  'total_f1': 126.99047619047636,
  'total_samples': 189},
 'attraction': {'total_bf_match': 18,
  'total_f1': 23.333333333333332,
  'total_samples': 40},
 'hotel': {'total_bf_match': 155,
  'total_f1': 302.7454378954382,
  'total_samples': 373},
 'restaurant': {'total_bf_match': 155,
  'total_f1': 250.8214507714508,
  'total_samples': 291},
 'train': {'total_bf_match': 69,
  'total_f1': 125.16803751803751,
  'total_samples': 160},
 '': {'total_bf_match': 4, 'total_f1': 4.0, 'total_samples': 6}}

In [18]:
JGA(df_results)

Total JGA: 0.45325779036827196
Total F1: 0.7713877927758938
----
Domain: taxi
F1: 0.7902536323171248
JGA: 0.5608465608465608
Domain: attraction
F1: 0.5
JGA: 0.575
Domain: hotel
F1: 0.7714772864906916
JGA: 0.3914209115281501
Domain: restaurant
F1: 0.81867487228312
JGA: 0.5257731958762887
Domain: train
F1: 0.7346645021645022
JGA: 0.3


{'taxi': {'total_bf_match': 106,
  'total_f1': 149.35793650793659,
  'total_samples': 189},
 'attraction': {'total_bf_match': 23, 'total_f1': 20.0, 'total_samples': 40},
 'hotel': {'total_bf_match': 146,
  'total_f1': 287.76102786102797,
  'total_samples': 373},
 'restaurant': {'total_bf_match': 153,
  'total_f1': 238.23438783438792,
  'total_samples': 291},
 'train': {'total_bf_match': 48,
  'total_f1': 117.54632034632036,
  'total_samples': 160},
 '': {'total_bf_match': 4, 'total_f1': 4.0, 'total_samples': 6}}

In [19]:
for i in range(10):
    id = df_results["id"][i]
    pred = df_results["preds"][i]
    unpacked_pred = unpack_belief_states(pred, "pred")
    row = dataset.loc[dataset["id"] == id]
    print("unpacked pred: ", unpacked_pred)
    print("turn domain: ", row["turn_domain"].item())
    print("gold belief state: ", row["gold_bs"].item())
    print("-------")

unpacked pred:  ["depart-saint john's college", 'dest-pizza hut fen ditton']
turn domain:  taxi
gold belief state:  [taxi] dest pizza hut fenditton depart saint johns college
-------
unpacked pred:  ['leave-17:15']
turn domain:  taxi
gold belief state:  [taxi] leave 17:15 dest pizza hut fenditton depart saint johns college
-------
No belief state: as there is no mention of any slot in the last turn of the conversation, the belief state will be empty. here's the belief state in json format:

{}
unpacked pred:  ['none-none']
turn domain:  taxi
gold belief state:  [taxi] leave 17:15 dest pizza hut fenditton depart saint johns college
-------
unpacked pred:  ['none-none']
turn domain:  taxi
gold belief state:  [taxi] leave 17:15 dest pizza hut fenditton depart saint johns college
-------
unpacked pred:  ['food-portuguese', 'area-cambridge']
turn domain:  restaurant
gold belief state:  [restaurant] food portugese
-------
unpacked pred:  ['price-moderate']
turn domain:  restaurant
gold belie

In [66]:
df_domain_gpt3 = pd.read_csv("/home/willy/instructod/src/DST/results_single/gpt-3.5-turbo_0-end_singleDomainOnlyTrue_withSlotDescriptionFalse_withSlotDifferentiationFalse_withAllSlotsTrue_dialogHistoryLimit0_prompt3.csv")
df_domain_gpt4 = pd.read_csv("/home/willy/instructod/src/DST/results_single/gpt-4_0-end_debugFalse_singleDomainOnlyTrue_withSlotDescriptionFalse_withSlotDifferentiationFalse_withAllSlotsTrue_dialogHistoryLimit0_prompt3.csv")
df_test = pd.read_csv("/home/willy/instructod/src/DST/results_single/gpt-3.5-turbo_0-end_recorrect_singleDomainOnlyTrue_withSlotDescriptionFalse_withSlotDifferentiationFalse_withAllSlotsTrue_dialogHistoryLimit0_prompt3.csv")

In [63]:
for idx in range(100):
    print(df_domain_gpt3["preds"][idx])
    print("===")
    print(df_domain_gpt4["preds"][idx])
    print("------------------")

{'departure': 'Saint John\'s college', 'destination': 'Pizza Hut Fen Ditton'}
===
{
  "car": "taxi",
  "departure": "Saint John's college",
  "destination": "Pizza Hut Fen Ditton"
}
------------------
{'leaveat': 'after 17:15'}
===
{"leaveat": "after 17:15"}
------------------
As there is no specific information provided in the context, the belief state cannot be generated.
===
{
}
------------------
{'stars': None, 'departure': None, 'arriveby': None, 'price': None, 'address': None, 'destination': None, 'day': None, 'stay': None, 'department': None, 'name': None, 'phone': None, 'reference': None, 'internet': None, 'area': None, 'car': None, 'parking': None, 'pricerange': None, 'postcode': None, 'leaveat': None, 'time': None, 'food': None, 'people': None, 'id': None, 'type': None}
===
{
}
------------------
{'food': 'Portuguese', 'area': 'Cambridge'}
===
{
  "area": "Cambridge",
  "food": "Portuguese"
}
------------------
{'pricerange':'moderate', 'food':'turkish'}
===
{"pricerange": "

In [67]:
df_test.head(2)

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,id,dialogue_id,dialogue_context,turn,prompt,domains,gold_bs,gold_act,gold_response,gold_database_result,model_used,preds,completion_info,prompt_recorred,correct_preds
0,0,0,0,SNG0073.json,USER: I would like a taxi from Saint John's college to Pizza Hut Fen Ditton.\n,0,"Generate the belief state of the last dialogue turn in the following conversation between a USER and a SYSTEM in a task-oriented dialogue setting. The results should be in json format following this format: {'slot1':'value1', 'slot2':'value2', etc...}. Use the slot from SLOTS to generate the belief state:\n\nSLOTS:\nstars, departure, arriveby, price, address, destination, day, stay, department, name, phone, reference, internet, area, car, parking, pricerange, postcode, leaveat, time, food, p...",['taxi'],"{'Taxi-Inform': [['Dest', 'pizza hut fen ditton'], ['Depart', ""saint john 's college""]]}","{'Taxi-Request': [['Leave', '?'], ['Arrive', '?']]}",SYSTEM: What time do you want to leave and what time do you want to arrive by?\n,,gpt-3.5-turbo,"{'departure': 'Saint John\'s college', 'destination': 'Pizza Hut Fen Ditton'}","{\n ""choices"": [\n {\n ""finish_reason"": ""stop"",\n ""index"": 0,\n ""message"": {\n ""content"": ""{'departure': 'Saint John\\'s college', 'destination': 'Pizza Hut Fen Ditton'}"",\n ""role"": ""assistant""\n }\n }\n ],\n ""created"": 1684858883,\n ""id"": ""chatcmpl-7JP3DVlUtpy1NseVlyiqr7H9UGW1J"",\n ""model"": ""gpt-3.5-turbo-0301"",\n ""object"": ""chat.completion"",\n ""usage"": {\n ""completion_tokens"": 20,\n ""prompt_tokens"": 204,\n ""total_tokens"": 224\n }\n}","Using the following SLOTS provided with their description, another faulty system already generated the WRONG BELIEF STATES of the last dialogue turn in the following conversation between a USER and a SYSTEM in a task-oriented dialogue setting. You should generate the new and CORRECTED BELIEF STATES. The results should be in json format following this format: {'slot1':'value1', 'slot2':'value2', etc...}.\n\nSLOTS:\nstars, departure, arriveby, price, address, destination, day, stay, department...","{'departure': 'Saint John\'s college', 'destination': 'Pizza Hut Fen Ditton'}"
1,1,1,1,SNG0073.json,USER: I want to leave after 17:15.\n,1,"Generate the belief state of the last dialogue turn in the following conversation between a USER and a SYSTEM in a task-oriented dialogue setting. The results should be in json format following this format: {'slot1':'value1', 'slot2':'value2', etc...}. Use the slot from SLOTS to generate the belief state:\n\nSLOTS:\nstars, departure, arriveby, price, address, destination, day, stay, department, name, phone, reference, internet, area, car, parking, pricerange, postcode, leaveat, time, food, p...",['taxi'],"{'Taxi-Inform': [['Leave', '17:15']]}","{'Taxi-Inform': [['Car', 'blue honda'], ['Phone', '07218068540']]}",SYSTEM: \nBooking completed! your taxi will be blue honda Contact number is 07218068540\n,,gpt-3.5-turbo,{'leaveat': 'after 17:15'},"{\n ""choices"": [\n {\n ""finish_reason"": ""stop"",\n ""index"": 0,\n ""message"": {\n ""content"": ""{'time': 'after 17:15'}"",\n ""role"": ""assistant""\n }\n }\n ],\n ""created"": 1684858887,\n ""id"": ""chatcmpl-7JP3HsXQq9sGPSz0STkOYYNRxvcwP"",\n ""model"": ""gpt-3.5-turbo-0301"",\n ""object"": ""chat.completion"",\n ""usage"": {\n ""completion_tokens"": 10,\n ""prompt_tokens"": 186,\n ""total_tokens"": 196\n }\n}","Using the following SLOTS provided with their description, another faulty system already generated the WRONG BELIEF STATES of the last dialogue turn in the following conversation between a USER and a SYSTEM in a task-oriented dialogue setting. You should generate the new and CORRECTED BELIEF STATES. The results should be in json format following this format: {'slot1':'value1', 'slot2':'value2', etc...}.\n\nSLOTS:\nstars, departure, arriveby, price, address, destination, day, stay, department...",{'time': 'after 17:15'}


In [None]:
df_results = df_results.rename(columns={'gold_bs':'gold_turn_bs'})
# df_results = df_results.merge(dataset[['id', 'turn_domain', 'gold_bs']], on='id', how='left')
df_results = dataset.merge(df_results[['id', 'preds']], on='id', how='right')
df_results = df_results.drop(["preds"], axis=1)
df_results = df_results.rename(columns={"correct_preds":"preds"})

In [71]:
test_df = pd.read_csv("/home/willy/instructod/src/DST/results_single/gpt-3.5-turbo_0-end_recorrect_singleDomainOnlyTrue_withSlotDescriptionFalse_withSlotDifferentiationFalse_withAllSlotsTrue_dialogHistoryLimit0_prompt3.csv")

In [72]:
test_df = pd.read_csv("/home/willy/instructod/src/DST/results_single/gpt-3.5-turbo_0-end_recorrect_singleDomainOnlyTrue_withSlotDescriptionFalse_withSlotDifferentiationFalse_withAllSlotsTrue_dialogHistoryLimit0_prompt3.csv")
df_results = dataset.merge(test_df[['id', 'correct_preds']], on='id', how='right')
df_results = df_results.rename(columns={"correct_preds":"preds"})

In [74]:
test_df.head(2)

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,id,dialogue_id,dialogue_context,turn,prompt,domains,gold_bs,gold_act,gold_response,gold_database_result,model_used,preds,completion_info,prompt_recorred,correct_preds
0,0,0,0,SNG0073.json,USER: I would like a taxi from Saint John's college to Pizza Hut Fen Ditton.\n,0,"Generate the belief state of the last dialogue turn in the following conversation between a USER and a SYSTEM in a task-oriented dialogue setting. The results should be in json format following this format: {'slot1':'value1', 'slot2':'value2', etc...}. Use the slot from SLOTS to generate the belief state:\n\nSLOTS:\nstars, departure, arriveby, price, address, destination, day, stay, department, name, phone, reference, internet, area, car, parking, pricerange, postcode, leaveat, time, food, p...",['taxi'],"{'Taxi-Inform': [['Dest', 'pizza hut fen ditton'], ['Depart', ""saint john 's college""]]}","{'Taxi-Request': [['Leave', '?'], ['Arrive', '?']]}",SYSTEM: What time do you want to leave and what time do you want to arrive by?\n,,gpt-3.5-turbo,"{'departure': 'Saint John\'s college', 'destination': 'Pizza Hut Fen Ditton'}","{\n ""choices"": [\n {\n ""finish_reason"": ""stop"",\n ""index"": 0,\n ""message"": {\n ""content"": ""{'departure': 'Saint John\\'s college', 'destination': 'Pizza Hut Fen Ditton'}"",\n ""role"": ""assistant""\n }\n }\n ],\n ""created"": 1684858883,\n ""id"": ""chatcmpl-7JP3DVlUtpy1NseVlyiqr7H9UGW1J"",\n ""model"": ""gpt-3.5-turbo-0301"",\n ""object"": ""chat.completion"",\n ""usage"": {\n ""completion_tokens"": 20,\n ""prompt_tokens"": 204,\n ""total_tokens"": 224\n }\n}","Using the following SLOTS provided with their description, another faulty system already generated the WRONG BELIEF STATES of the last dialogue turn in the following conversation between a USER and a SYSTEM in a task-oriented dialogue setting. You should generate the new and CORRECTED BELIEF STATES. The results should be in json format following this format: {'slot1':'value1', 'slot2':'value2', etc...}.\n\nSLOTS:\nstars, departure, arriveby, price, address, destination, day, stay, department...","{'departure': 'Saint John\'s college', 'destination': 'Pizza Hut Fen Ditton'}"
1,1,1,1,SNG0073.json,USER: I want to leave after 17:15.\n,1,"Generate the belief state of the last dialogue turn in the following conversation between a USER and a SYSTEM in a task-oriented dialogue setting. The results should be in json format following this format: {'slot1':'value1', 'slot2':'value2', etc...}. Use the slot from SLOTS to generate the belief state:\n\nSLOTS:\nstars, departure, arriveby, price, address, destination, day, stay, department, name, phone, reference, internet, area, car, parking, pricerange, postcode, leaveat, time, food, p...",['taxi'],"{'Taxi-Inform': [['Leave', '17:15']]}","{'Taxi-Inform': [['Car', 'blue honda'], ['Phone', '07218068540']]}",SYSTEM: \nBooking completed! your taxi will be blue honda Contact number is 07218068540\n,,gpt-3.5-turbo,{'leaveat': 'after 17:15'},"{\n ""choices"": [\n {\n ""finish_reason"": ""stop"",\n ""index"": 0,\n ""message"": {\n ""content"": ""{'time': 'after 17:15'}"",\n ""role"": ""assistant""\n }\n }\n ],\n ""created"": 1684858887,\n ""id"": ""chatcmpl-7JP3HsXQq9sGPSz0STkOYYNRxvcwP"",\n ""model"": ""gpt-3.5-turbo-0301"",\n ""object"": ""chat.completion"",\n ""usage"": {\n ""completion_tokens"": 10,\n ""prompt_tokens"": 186,\n ""total_tokens"": 196\n }\n}","Using the following SLOTS provided with their description, another faulty system already generated the WRONG BELIEF STATES of the last dialogue turn in the following conversation between a USER and a SYSTEM in a task-oriented dialogue setting. You should generate the new and CORRECTED BELIEF STATES. The results should be in json format following this format: {'slot1':'value1', 'slot2':'value2', etc...}.\n\nSLOTS:\nstars, departure, arriveby, price, address, destination, day, stay, department...",{'time': 'after 17:15'}
