In [2]:
import os
import json 
import copy
import numpy as np
import pandas as pd

import seaborn as sns
from collections import defaultdict, Counter
import matplotlib.pyplot as plt
from refpydst.evaluate_metrics import evaluate
from refpydst.utils.dialogue_state import update_dialogue_state
from refpydst.prompt_formats.python.completion_parser import parse_python_completion, iterative_parsing, my_parsing
from refpydst.normalization.data_ontology_normalizer import DataOntologyNormalizer
from refpydst.db.ontology import Ontology
import refpydst.prompt_formats.python.demo as python_demo
from refpydst.prompt_formats.python.completion_parser import *


pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', None)  # or 1000
pd.set_option('display.max_colwidth', None)  # or 199



In [4]:
with open('../data/mw21_5p_train_v1.json', 'r') as f:
    train_data = json.load(f)
    
normalizer = DataOntologyNormalizer(
        Ontology.create_ontology(),
        # count labels from the train set
        supervised_set=train_data,
        # make use of existing surface form knowledge encoded in ontology.json, released with each dataset
        # see README.json within https://github.com/smartyfh/MultiWOZ2.4/raw/main/data/MULTIWOZ2.4.zip
        counts_from_ontology_file="../src/refpydst/db/multiwoz/2.4/ontology.json"
)

mapping supervised_set surface forms...: 100%|██████████| 2731/2731 [00:05<00:00, 510.25it/s] 
reading surface forms from ontology.json: 100%|██████████| 31/31 [00:03<00:00,  9.50it/s]


In [5]:
def sorted_dict(dict_a, by_key=True):
    if by_key:
        return dict(sorted(dict_a.items(), key=lambda item: item[0]))
    else:
        return dict(sorted(dict_a.items(), key=lambda item: item[1], reverse=True))

In [6]:
def collect_stats(experiment_folder_path, parsing_method='og_modified'):
    stats = pd.DataFrame()
    experiments = []
    for path, dir, files in os.walk(experiment_folder_path):
        if 'running_log.json' in files:
            exp_name = path.split('/split_v1')[-1]

            exp_name = path.split('/')[-2] + exp_name
            log_path = os.path.join(path, 'running_log.json')
            # if "topk_bm_5_fs_5_0523_0315" not in exp_name:
            #     continue
            with open(log_path, 'r') as f:
                logs = json.load(f)
            
            jga_by_turn_id = defaultdict(list)  # use to record the accuracy
            jga_by_dialog = defaultdict(list)  # use to record the accuracy
            
            total_acc, total_f1 = 0, 0
            n_correct = 0
            n_total = len(logs)
            
            right, right_shots, right_logs = [], [], []
            wrong, wrong_shots, wrong_logs = [], [], []
            
            prior_pred, prior_id = None, None
            for data_item in logs:
                # pred = data_item['pred']
                if data_item.get('completion') is None:
                    n_correct += 1
                    tmp = []
                    for ex in data_item.get('examples', []):
                        tmp.append(ex[0].replace('.json', '_')+str(ex[1]))
                    data_item.update({'examples':tmp})
                    right_shots.append({
                        data_item['ID'].replace('.json', '_')+str(data_item['turn_id']):tmp})
                    right_logs.append({data_item['ID'].replace('.json', '_')+str(data_item['turn_id']):data_item})
                    right.append(data_item['ID'].replace('.json', '_')+str(data_item['turn_id']))

                    prior_id = data_item['ID']
                    prior_pred = data_item['pred']
                    # prior_pred_2 = data_item['pred']
                    continue
                
                if data_item['ID'] != prior_id:
                    prior_pred = data_item['pred_prior_context']
                    # prior_pred_2 = data_item['pred_prior_context']
                if parsing_method == 'og': 
                    pred = data_item['pred']
                elif parsing_method == 'og_modified':
                    pred_delta = normalizer.normalize(my_parsing(data_item['completion'], prior_pred))
                    pred =  update_dialogue_state(prior_pred, pred_delta)
                elif parsing_method == 'iterative':
                    pred_delta =  normalizer.normalize(iterative_parsing(data_item['completion'], prior_pred))
                    pred =  update_dialogue_state(prior_pred, pred_delta)

                this_jga, this_acc, this_f1 = evaluate(pred, data_item['slot_values'])
                total_acc += this_acc
                total_f1 += this_f1

                if this_jga:
                    n_correct += 1
        
                prior_id = data_item['ID']
                prior_pred = pred

            jga = n_correct / n_total
            slot_acc = total_acc/n_total
            slot_f1 = total_f1/n_total

            stats = pd.concat([
                stats, pd.DataFrame({
                    'name': [exp_name], 'jga': [jga], 'right': n_correct, 'wrong': n_total-n_correct, 
                    'slot_acc': [slot_acc], 'slot_f1': [slot_f1]})])
    stats = stats.sort_values(by='jga', ascending=False).reset_index(drop=True)
    return stats

In [10]:
model = 'llama'
experiment_folder_path = '/home/haesungpyun/my_refpydst/outputs/runs/'
experiment_folder_path += 'table4'
sufix = '' if model == 'gpt' else '_llama'
experiment_folder_path += sufix
parsing_method = 'og_modified'

In [24]:
import warnings
warnings.filterwarnings('ignore')

stats = collect_stats(experiment_folder_path=experiment_folder_path, parsing_method=parsing_method)

----------------- MY PARSING ERROR -----------------
SyntaxError('unterminated string literal (detected at line 1)', ('<string>', 1, 19, 'agent.state.print(")', 1, 19))
got exception when execute statement: agent.state.print(")
 previous state: {'hotel-area': 'south', 'hotel-parking': 'yes', 'hotel-pricerange': 'expensive', 'hotel-type': 'hotel'}
current state: {}

----------------- MY PARSING ERROR -----------------
NameError("name 'hotel' is not defined")
got exception when execute statement: agent.state.taxi = agent.find_taxi(depart_time="02:30", leave_from=hotel.name, destination=restaurant.name)
 previous state: {'hotel-area': 'south', 'hotel-book day': 'friday', 'hotel-book people': '3', 'hotel-book stay': '5', 'hotel-parking': 'yes', 'hotel-pricerange': 'expensive', 'hotel-type': 'hotel', 'restaurant-area': 'south', 'restaurant-food': 'mexican'}
current state: {}

----------------- MY PARSING ERROR -----------------
NameError("name 'taxi' is not defined")
got exception when exec



----------------- MY PARSING ERROR -----------------
NameError("name 'restaurant' is not defined")
got exception when execute statement: agent.state.taxi = agent.find_taxi(destination=restaurant.name, leave_from="sheep's green and lammas land park fen causeway", arrive_by_time=restaurant.book_time)
 previous state: {'attraction-name': 'sheeps green and lammas land park fen causeway', 'restaurant-area': 'centre', 'restaurant-book day': 'sunday', 'restaurant-book people': '8', 'restaurant-book time': '12:45', 'restaurant-food': 'italian', 'restaurant-name': 'cafe uno', 'restaurant-pricerange': 'expensive'}
current state: {}

----------------- MY PARSING ERROR -----------------
NameError("name 'hotel' is not defined")
got exception when execute statement: agent.state.train = agent.find_train(day=hotel.book_day, depart_time="09:45", leave_from="cambridge")
 previous state: {'hotel-area': 'south', 'hotel-book day': 'sunday', 'hotel-book people': '5', 'hotel-book stay': '5', 'hotel-internet'



----------------- MY PARSING ERROR -----------------
NameError("name 'hotel' is not defined")
got exception when execute statement: agent.state.taxi = agent.find_taxi(depart_time="05:15", leave_from=hotel.name, destination=attraction.name)
 previous state: {'attraction-area': 'centre', 'attraction-name': 'castle galleries', 'attraction-type': 'museum', 'hotel-book day': 'sunday', 'hotel-book people': '3', 'hotel-book stay': '4', 'hotel-name': 'acorn guest house', 'hotel-parking': 'yes', 'hotel-stars': '4', 'hotel-type': 'guest house'}
current state: {}

----------------- MY PARSING ERROR -----------------
NameError("name 'attraction' is not defined")
got exception when execute statement: agent.state.attraction = agent.find_attraction(name=attraction.name, price_range="free")
 previous state: {'attraction-name': 'gallery at 12 a high street'}
current state: {}

----------------- MY PARSING ERROR -----------------
NameError("name 'train' is not defined")
got exception when execute statem



----------------- MY PARSING ERROR -----------------
AttributeError("type object 'BeliefState' has no attribute 'from_dict'")
got exception when execute statement: agent.state.agent.state = BeliefState.from_dict({"hotel": {"name": "nirala"}})
 previous state: {'taxi-arriveby': '15:30', 'taxi-departure': 'j restaurant', 'taxi-destination': 'high street', 'taxi-leaveat': '12:00'}
current state: {}

----------------- MY PARSING ERROR -----------------
SyntaxError('unterminated string literal (detected at line 1)', ('<string>', 1, 19, 'agent.state.print(")', 1, 19))
got exception when execute statement: agent.state.print(")
 previous state: {'taxi-arriveby': '15:30', 'taxi-departure': 'j restaurant', 'taxi-destination': 'high street', 'taxi-leaveat': '12:00'}
current state: {}

----------------- MY PARSING ERROR -----------------
NameError("name 'restaurant' is not defined")
got exception when execute statement: agent.state.restaurant = restaurant._replace(area=Area.west, price_range=Price

In [25]:
stats.to_csv(f'../stats{sufix}_{parsing_method}_parsing.csv', index=False, sep='\t')


In [30]:
stats

Unnamed: 0,name,jga,right,wrong,slot_acc,slot_f1
0,smapling_exp_topk_bm_5_fs_5_0620_1337,0.665515,963,484,0.700069,0.654518
1,mixed_retriever_fs_bm_sample_topk_70B_0710_0628,0.564616,817,630,0.973209,0.919344
2,bm25_10_all_sim_70B_0710_0037,0.563234,815,632,0.971988,0.918429
3,fine_tuned_sbert_70B_0710_0434,0.559088,809,638,0.972472,0.914667
4,mixed_retriever_fs_bm_topk_70B_0710_0822,0.54803,793,654,0.973025,0.91726
5,mixed_retriever_fs_bm_sum_topk_70B_0710_1026,0.544575,788,659,0.972495,0.916222
6,bm25_10_all_sim_div_70B_0710_0241,0.536973,777,670,0.968878,0.906766
7,fine_tuned_sbert_topk_70B_0709_0937,0.534209,773,674,0.971666,0.908936
8,pretrained_sbert_70B_0710_1413,0.524534,759,688,0.969247,0.908131
9,mixed_retriever_fs_bm_topk,0.498963,722,725,0.965999,0.893415


In [26]:
# 
stats_70b = stats.loc[stats['name'].str.contains('70B')]
stats_70b = stats_70b.loc[stats_70b['right'] + stats_70b['wrong'] > 1000]

In [27]:
stats_70b.reset_index(drop=True, inplace=True)

In [28]:
stats_70b.to_csv(f'../stats{sufix}_70B_{parsing_method}_parsing.csv', index=False, sep='\t')

In [29]:
stats_70b

Unnamed: 0,name,jga,right,wrong,slot_acc,slot_f1
0,mixed_retriever_fs_bm_sample_topk_70B_0710_0628,0.564616,817,630,0.973209,0.919344
1,bm25_10_all_sim_70B_0710_0037,0.563234,815,632,0.971988,0.918429
2,fine_tuned_sbert_70B_0710_0434,0.559088,809,638,0.972472,0.914667
3,mixed_retriever_fs_bm_topk_70B_0710_0822,0.54803,793,654,0.973025,0.91726
4,mixed_retriever_fs_bm_sum_topk_70B_0710_1026,0.544575,788,659,0.972495,0.916222
5,bm25_10_all_sim_div_70B_0710_0241,0.536973,777,670,0.968878,0.906766
6,fine_tuned_sbert_topk_70B_0709_0937,0.534209,773,674,0.971666,0.908936
7,pretrained_sbert_70B_0710_1413,0.524534,759,688,0.969247,0.908131
8,pretrained_sbert_topk_70B_0710_1220,0.484451,701,746,0.966459,0.903008
