In [3]:
import os
import json 
import copy
import numpy as np
import pandas as pd

from my_funcs import (
    default_transformation, read_json_from_data_dir,
    normalize, embed_query_retrieve_examples,
    make_two_type_msg, get_python_chat_prompt, 
    parse_python_completion, update_dialogue_state, 
    compute_acc, calculate_token_f1, evaluate,
    DataOntologyNormalizer, Ontology,
    copy, defaultdict, random,
    tiktoken, SentenceTransformer
)

import copy

from refpydst.prompt_formats.python.completion_parser import *

pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', None)  # or 1000
pd.set_option('display.max_colwidth', None)  # or 199

  from tqdm.autonotebook import tqdm, trange


In [4]:
with open('../data/mw21_0p_train.json', 'r') as f:
    train_data = json.load(f)

normalizer = DataOntologyNormalizer(
        Ontology.create_ontology(),
        # count labels from the train set
        supervised_set=train_data,
        # make use of existing surface form knowledge encoded in ontology.json, released with each dataset
        # see README.json within https://github.com/smartyfh/MultiWOZ2.4/raw/main/data/MULTIWOZ2.4.zip
        counts_from_ontology_file="../src/refpydst/db/multiwoz/2.4/ontology.json"
)

reading surface forms from ontology.json: 100%|██████████| 31/31 [00:02<00:00, 10.54it/s]


In [17]:
def unroll_or(gold, pred):
    for slot, val in gold.items():
            if '|' in val:
                for vv in val.split('|'):
                    if pred.get(slot) == vv:
                        pred[slot] = vv
                        gold[slot] = vv
                        break
    return gold, pred

def sort_dict(dict_a, by_key=True):
    if by_key:
        return dict(sorted(dict_a.items(), key=lambda item: item[0]))
    else:
        return dict(sorted(dict_a.items(), key=lambda item: item[1], reverse=True))

# ['ID', 'turn_id', 'domains', 'dialog', 'slot_values', 'turn_slot_values', 'last_slot_values', 'prompt', 'pred', 'pred_delta_slot_values', 'pred_prior_context', 'completion', 'all_completions', 'num_solutions', 'prompt_counts', 'examples', 'iter_parse_pred_delta', 'rights', 'error']
# def sort_data_item(data_item):
#     return_data_item = {}

#     return_data_item['ID'] = data_item['ID']
#     return_data_item['turn_id'] = data_item['turn_id']
#     return_data_item['domains'] = data_item['domains']
#     return_data_item['dialog'] = data_item['dialog']
#     return_data_item['slot_values'] = sort_dict(data_item['slot_values'])
#     return_data_item['pred'] = sort_dict(data_item['pred'])

#     return_data_item['turn_slot_values'] = sort_dict(data_item['turn_slot_values'])
#     return_data_item['iter_parse_pred_delta'] = sort_dict(data_item['iter_parse_pred_delta'])
#     return_data_item['completion'] = data_item['completion']
#     return_data_item['all_completions'] = data_item['all_completions']

#     return_data_item['last_slot_values'] = sort_dict(data_item['last_slot_values'])
#     return_data_item['pred_prior_context'] = sort_dict(data_item['pred_prior_context'])

#     return_data_item['pred_delta_slot_values'] = sort_dict(data_item['pred_delta_slot_values'])

#     return_data_item['prompt'] = data_item['prompt']
#     return_data_item['num_solutions'] = data_item['num_solutions']
#     return_data_item['prompt_counts'] = data_item['prompt_counts']
#     return_data_item['examples'] = data_item['examples']
#     return_data_item['rights'] = data_item['rights']
#     return_data_item['error'] = data_item['error']
#     return return_data_item

def sort_data_item(data_item, parsing_func='iterative_parsing'):
    return_data_item = {}

    return_data_item['ID'] = data_item['ID']
    return_data_item['turn_id'] = data_item['turn_id']
    return_data_item['domains'] = data_item['domains']
    return_data_item['dialog'] = data_item['dialog']
    return_data_item['slot_values'] = sort_dict(data_item['slot_values'])
    return_data_item['pred'] = sort_dict(data_item['pred'])

    return_data_item['turn_slot_values'] = sort_dict(data_item['turn_slot_values'])
    return_data_item['iter_parse_pred_delta'] = sort_dict(data_item['iter_parse_pred_delta'])
    return_data_item['completion'] = data_item['completion']
    
    return_data_item['error'] = data_item['error']

    # return_data_item['all_completions'] = data_item['all_completions']

    return_data_item['last_slot_values'] = sort_dict(data_item['last_slot_values'])
    return_data_item['pred_prior_context'] = sort_dict(data_item['pred_prior_context'])

    return_data_item['pred_delta_slot_values'] = sort_dict(data_item['pred_delta_slot_values'])

    # return_data_item['prompt'] = data_item['prompt']
    # return_data_item['num_solutions'] = data_item['num_solutions']
    # return_data_item['prompt_counts'] = data_item['prompt_counts']
    # return_data_item['examples'] = data_item['examples']
    # # return_data_item['rights'] = data_item['rights']

    # for key in data_item.keys():
    #     if key not in return_data_item.keys():
    #         return_data_item[key] = data_item[key]

    return return_data_item

In [18]:
def categorize_errors(slot, val, tmp, visited, gold, pred, pred_prev, prefix='delta', mode='hall'):
    diff_over = dict(set(pred.items()) - set(gold.items()))
    if mode == 'miss':
        if (val in diff_over.values()):
            pred_slot_val = [(k, v) for k, v in diff_over.items() if v == val]
            # s_v_gen = copy.deepcopy(iter(pred_slot_val))
            # iters = len(pred_slot_val)
            # while iters > 0:
            #     pred_s, pred_v = next(s_v_gen)
            #     if (pred_s, pred_v) in gold.items():
            #         pred_slot_val.remove((pred_s, pred_v))
            #     iters -= 1
            # if len(pred_slot_val) == 0:
            #     tmp['error'].append((f'{prefix}miss_total', (slot, val)))
            #     visited.append((slot, val))
            #     return tmp, visited 

            # else:
            for (confused_slot, v) in pred_slot_val:
                assert v == val
                tmp['error'].append((f'{prefix}miss_confuse', (slot, val, confused_slot, v)))
                visited.append((slot, val))
                visited.append((confused_slot, v))                         
            return tmp, visited 

        else:
            if val == 'dontcare'and pred.get(slot, None) == None:
                tmp['error'].append((f'{prefix}miss_dontcare', (slot, val)))
                visited.append((slot, val))
                return tmp, visited 
                
            if val == '[DELETE]' and pred.get(slot, None) == None:
                tmp['error'].append((f'{prefix}miss_delete', (slot, val)))
                visited.append((slot, val))
                return tmp, visited 
                
            if slot in pred:
                tmp['error'].append((f'{prefix}hall_val', (slot, val, slot, pred[slot])))
                visited.append((slot, val))
                visited.append((slot, pred[slot]))
                return tmp, visited 

            else:
                if tmp.get('error') is None:
                    print(tmp['error'])
                    print(slot, val)
                tmp['error'].append((f'{prefix}miss_total', (slot, val)))
                visited.append((slot, val))
                return tmp, visited 

    elif mode == 'hall':
        if (slot, val) in diff_over.items():
            if slot in gold:
                try:
                    tmp['error'].append((f'{prefix}hall_val', (slot, gold[slot], slot, val)))
                    visited.append((slot, gold[slot]))
                    visited.append((slot, val))
                    return tmp, visited 

                except:
                    print((slot, gold[slot]))
                    print(slot, val)
                    
            elif slot in pred_prev:
                if val == pred_prev[slot]:
                    # tmp['error'].append((f'{prefix}hall_parrot', (slot, val)))
                    pass
                else:
                    tmp['error'].append((f'{prefix}hall_overwrite', (slot, val)))
                    return tmp, visited 
            else:
                tmp['error'].append((f'{prefix}hall_total', (slot, val)))
                return tmp, visited         
    return tmp, visited


def find_error_case(tmp, prev_log, gold, pred, gold_delta, pred_delta, gold_prev, pred_prev):
    delta_miss = dict(set(gold_delta.items()) - set(pred_delta.items()))
    delta_over = dict(set(pred_delta.items()) - set(gold_delta.items()))

    prev_miss = dict(set(gold_prev.items()) - set(pred_prev.items()))
    prev_over = dict(set(pred_prev.items()) - set(gold_prev.items()))  

    over = dict(set(pred.items()) - set(gold.items()))
    miss = dict(set(gold.items()) - set(pred.items()))
    visited = []

    for err_name, err_s_v in tmp.get('error', []):
        if len(err_s_v) > 2:
            visited.append((err_s_v[-2], err_s_v[-1]))
        visited.append((err_s_v[0], err_s_v[1]))

    for gold_slot, gold_val in delta_miss.items():
        if (gold_slot, gold_val) in visited:
            continue
        tmp, visited = categorize_errors(gold_slot, gold_val, tmp, visited, gold_delta, pred_delta, pred_prev, prefix='delta_', mode='miss')

    for pred_slot, pred_val in delta_over.items():
        if (pred_slot, pred_val) in visited:
            continue
        tmp, visited = categorize_errors(pred_slot, pred_val, tmp, visited, gold_delta, pred_delta,pred_prev, prefix='delta_', mode='hall')
    
    # handle the case which is propagated from the previous turn
    for err_name, err_s_v in prev_log.get('error', []):
        if 'hall' in err_name:
            prev_err_slot, prev_err_val = err_s_v[-2], err_s_v[-1]
        if 'miss' in err_name :
            prev_err_slot, prev_err_val = err_s_v[0], err_s_v[1]

        if (prev_err_slot, prev_err_val) in visited:
            continue
        
        if (prev_err_slot, prev_err_val) in prev_miss.items() or (prev_err_slot, prev_err_val) in prev_over.items():
            if (prev_err_slot, prev_err_val) in over.items() or (prev_err_slot, prev_err_val) in miss.items():
                if 'delete' in err_name:
                    prop_name = 'error_prop_'+'_'.join(err_name.split('_')[-2:])
                    tmp['error'].append((prop_name, err_s_v))
                    visited.append((prev_err_slot, prev_err_val)) 
                if (prev_err_slot, prev_err_val) in delta_miss.items() or (prev_err_slot, prev_err_val) in delta_over.items():
                    continue
                prop_name = 'error_prop_'+'_'.join(err_name.split('_')[-2:])
                tmp['error'].append((prop_name, err_s_v))
                visited.append((prev_err_slot, prev_err_val))
    
    # for gold_slot, gold_val in miss.items():
    #     if (gold_slot, gold_val) in visited:
    #         continue
    #     tmp, visited = categorize_errors(gold_slot, gold_val, tmp, visited, gold, pred, pred_prev, prefix='', mode='miss')

    # for pred_slot, pred_val in over.items():
    #     if (pred_slot, pred_val) in visited:
    #         continue
    #     tmp, visited = categorize_errors(pred_slot, pred_val, tmp, visited, gold, pred, pred_prev, prefix='', mode='hall')
    
    return tmp


In [19]:
# with open('../outputs/runs/table4/zero_shot/split_v1_train/running_log.json', 'r') as f:
#     logs = json.load(f)

with open('/home/haesungpyun/my_refpydst/outputs/runs/table4_llama/zero_shot/split_v1_greedy_0620_1337/running_log.json', 'r') as f:
    logs = json.load(f)

# with open('train_zero_log.json', 'r') as f:
    # logs = json.load(f)

# with open('bm25_log.json', 'r') as f:
#     logs = json.load(f)

n_correct = 0
new_logs = []
prev_log = {}
for idx, data_item in enumerate(logs):
    # tmp = {}
    if data_item['turn_id'] == 0:
        prev_log = {}
    # pred_prev = log['pred_prior_context']
    # pred_prev = prev_log.get('pred_slot_values', {})
    pred_prev = prev_log.get('pred', {})
    gold_prev = data_item['last_slot_values']
    gold_prev, pred_prev = unroll_or(gold_prev, pred_prev)

    pred_delta = iterative_parsing(data_item['completion'], pred_prev)
    pred_delta = normalizer.normalize(pred_delta) if 'DELETE' not in str(pred_delta) else pred_delta
    # pred_delta = data_item['pred_last_slot_values']
    data_item['iter_parse_pred_delta'] = pred_delta
    gold_delta = data_item['turn_slot_values']
    gold_delta, pred_delta = unroll_or(gold_delta, pred_delta)

    pred = update_dialogue_state(pred_prev, pred_delta)
    # pred = log['pred']
    gold = data_item['slot_values']
    gold, pred = unroll_or(gold, pred)
    
    # tmp['ID'] = data_item['ID']
    # tmp['turn_id'] = data_item['turn_id']
    # tmp['IDS'] = f"{data_item['ID']}_{data_item['turn_id']}"
                
    if pred==gold:
        n_correct+=1

    # tmp['rights'] = (int(pred==gold), int(pred_delta==gold_delta), int(pred_prev==gold_prev))
    data_item['rights'] = (int(pred==gold), int(pred_delta==gold_delta), int(pred_prev==gold_prev))
    try:
        exec(f'f_{int(pred==gold)}_d_{int(pred_delta==gold_delta)}_p_{int(pred_prev==gold_prev)}.append(data_item)')
    except:
        exec(f'f_{int(pred==gold)}_d_{int(pred_delta==gold_delta)}_p_{int(pred_prev==gold_prev)} = []')
        exec(f'f_{int(pred==gold)}_d_{int(pred_delta==gold_delta)}_p_{int(pred_prev==gold_prev)}.append(data_item)') 
    
    # tmp['slot_values'] = sort_dict(gold)
    # tmp['pred_slot_values'] = sort_dict(pred)
    # tmp['pred_og_slot_values'] = sort_dict(data_item['pred'])
    # tmp['turn_slot_values'] =sort_dict(gold_delta)
    # tmp['pred_turn_slot_values'] = sort_dict(pred_delta)
    # tmp['completion'] = data_item['completion']
    # tmp['last_slot_values'] = sort_dict(gold_prev)
    # tmp['pred_last_slot_values'] = sort_dict(pred_prev)
    
    # tmp['dialog'] = log['dialog']

    # tmp['error'] = []
    data_item['error'] = []

    if pred != gold:
        data_item = find_error_case(data_item, prev_log, gold, pred, gold_delta, pred_delta, gold_prev, pred_prev)
        # tmp = find_error_case(tmp, prev_log, gold, pred, gold_delta, pred_delta, gold_prev, pred_prev)
        
    data_item = find_error_case(data_item, prev_log, gold, pred, gold_delta, pred_delta, gold_prev, pred_prev)
    # tmp = find_error_case(tmp, prev_log, gold, pred, gold_delta, pred_delta, gold_prev, pred_prev)
    
    # tmp['error'] = sorted(list(set(tuple(x) for x in tmp['error'])))
    data_item['error'] = sorted(list(set(tuple(x) for x in data_item['error'])))
    
    # tmp['dialog'] = []
    # for sys, user in zip(data_item['dialog']['sys'], data_item['dialog']['usr']):
    #     tmp['dialog'].append('sys: ' + sys)
    #     tmp['dialog'].append('usr: ' + user)
    data_item = sort_data_item(data_item)
    new_logs.append(data_item)
    prev_log = data_item
    
    # new_logs.append(tmp)
    # prev_log = tmp

----------------- ITERATIVE PARSING ERROR -----------------
IndexError('list index out of range')
got exception when splitting statement: agent.state.hotel.book_people = 7

----------------- ITERATIVE PARSING ERROR -----------------
SyntaxError('unterminated string literal (detected at line 1)', ('<string>', 1, 19, 'agent.state.print(")', 1, 19))
got exception when execute statement: agent.state.print(")
 previous state: {'hotel-name': 'express by holiday inn cambridge'}
current state: {}

----------------- ITERATIVE PARSING ERROR -----------------
NameError("name 'hotel' is not defined")
got exception when execute statement: agent.state.train = agent.find_train(day=hotel.book_day)
 previous state: {'hotel-book day': 'monday', 'hotel-book people': '7', 'hotel-book stay': '4', 'hotel-name': 'express by holiday inn cambridge'}
current state: {}

----------------- ITERATIVE PARSING ERROR -----------------
NameError("name 'hotel' is not defined")
got exception when execute statement: agent



----------------- ITERATIVE PARSING ERROR -----------------
AttributeError("type object 'BeliefState' has no attribute 'from_dict'")
got exception when execute statement: agent.state.agent.state = BeliefState.from_dict({"hotel": {"name": "nirala"}})
 previous state: {'taxi-arriveby': '15:30', 'taxi-departure': 'j restaurant', 'taxi-destination': 'high street', 'taxi-leaveat': '12:00'}
current state: {}

----------------- ITERATIVE PARSING ERROR -----------------
SyntaxError('unterminated string literal (detected at line 1)', ('<string>', 1, 19, 'agent.state.print(")', 1, 19))
got exception when execute statement: agent.state.print(")
 previous state: {'taxi-arriveby': '15:30', 'taxi-departure': 'j restaurant', 'taxi-destination': 'high street', 'taxi-leaveat': '12:00'}
current state: {}

----------------- ITERATIVE PARSING ERROR -----------------
NameError("name 'restaurant' is not defined")
got exception when execute statement: agent.state.restaurant = restaurant._replace(area=Area.we

In [20]:
# del logs
with open('train_zero_log_new_2.json', 'w') as f:
    json.dump(new_logs, f, indent=2)

#### save logs by error cases

In [None]:
error_ids = defaultdict(list)

for name in ["delta_miss_confuse", 'delta_miss_total', "delta_miss_delete", "delta_miss_dontcare",\
    "delta_hall_overwrite", "delta_hall_total", "delta_hall_val"]: 
    error_logs = []
    for log in new_logs:
        
        flag = False
        for err in log['error']:
            if name in err[0]:
                flag = True
                break
        if not flag:
            continue
        
        for idx, err in enumerate(log['error']):
            if name not in err[0]:
                continue
    
        #     if tmp == []:
        #         tmp.append(log['IDS'])
        #         # tmp['inputs'] = str(log['last_slot_values']) + '\n ' + str(log['last_turn'][0]) + '\n ' + str(log['last_turn'][1])
        #         # tmp.append(log['last_turn'][0])
        #         # tmp.append(log['last_turn'][1])
        #         tmp.append('')
        #         tmp.append('')
        
        #     tmp.append(f"{err[0]},  {err[1][0]}:  {err[1][1]}")
        # tmp.append('')
        # tmp.append('    ')
        # tmp.append('')
        # for ut in log['dialog']:
        #     tmp.append(ut)
        # tmp.append(log['slot_values'])
        # tmp.append(log['pred_slot_values'])
        # # tmp['ground truth'] = log['turn_slot_values']
        # # tmp['prediction'] = log['pred_turn_slot_values']
        # tmp.append(f"{log['turn_slot_values']}")
        # tmp.append(f"{log['pred_turn_slot_values']}")
        # tmp.append(log['last_slot_values'])
        # tmp.append(log['pred_last_slot_values'])
            
        error_logs.append(log)
        error_ids[name].append(f'{log["ID"]}_turn_{log["turn_id"]}')

    # with open(f'../data/error_shots/train_zero_{name}.json', 'w') as f:
    #     json.dump(error_logs, f, indent=4)
    
# with open(f'../data/error_shots/error_shots.json', 'w') as f:
#     json.dump(error_ids, f, indent=4)

In [None]:
f'{log["ID"]}_turn_{log["turn_id"]}'


'PMUL3130.json_turn_4'

In [None]:
import os
import json
for path in os.listdir('/home/haesungpyun/my_refpydst'):
    error_case = {}
    error_cnt ={}
    if 'error_analysis_delta' not in path:
        # print(path)
        continue
    print(path)

    with open(path, 'r') as f:
        error = json.load(f)

    for err in error:
        err_string = err[5]
        if err[5] == '' or err[5].startswith('delta'):
            for el in err[5:]:
                if el != '' and not el.startswith('delta'):
                    err_string = el
                    break
        err_list = err_string.split(',')
        for err_name in err_string.split(','):
            err_name = err_name.strip()
            if error_case.get(err_name, None) is None:
                error_case[err_name] = []
            if error_cnt.get(err_name, None) is None:
                error_cnt[err_name] = 0
            error_case[err_name].append(err[0])
            error_cnt[err_name] += 1     
    
    print(path)
    print(sum(list(error_cnt.values())))
    print(sort_dict(error_cnt, by_key=False))
    print()

In [None]:
"""
context
    {hotel-area: south}
3=th dialog: 
    sys: '호텔 뭐할래?'
    user: '3박 예약'

gold {hotel-day:3}
pred {hotel-day: 4}

JGA, slot f1, slot acc

1. delta_miss_total
    usr_state: 유저가 직접 slot, value를 말함
    usr_request: 유저의 질문이나 정보 요구를 통해서 slot, value 유추 가능
    
    usr_im_confirm: 유저가 명확한 확답보다 추가 정보를 요구하거나 질문
    usr_ex_confirm: 유저가 명확히 긍정의 답을 함 (e.g. yes, sure, why not, the first seems great)

    co-refrence: history에서 나온 정보를 가지고 와서 사용해야 하는 경우
    annotation: annotation이 잘못되어 있는 경우

    왜 가장 명확한 usr_state를 못하는지 모르겠음. (전체 dataset에 usr_state의 비율이 높아서 그럴 것으로 추측됨)

2. delta_miss_dontcare
    miss_utter: user의 신경 안 쓴다는 발화를 dontcare로 연결하지 못함 (slot도 잡지 못하고, 기존에 예측 정보를 그대로 parroting)

3. delta_miss_delete
    usr_refusal: user가 해당 slot의 value를 거절하였으나, 그것을 잡지 못함
    context_hallucination: context에서 나온 정보를 잘못 사용하여 hallucination

4. delta_miss_confuse
    context_change: 대화의 주제가 변하여 domain이 변경되었는데 이를 포착하지 못함
    mixed_intent: user의 발화가 여러 의도를 가지고 있어서 이를 잘못 파악함
    slot_confuse: 두 slot을 헷갈림

5. delta_hall_total
    context_hallucination: context에서 나온 정보를 잘못 사용하여 hallucination
    infer_hallucination: 추론을 할 수 있는 내용이지만 hallucination
    update_hallucination: 어떤 예약을 확정할 때, 그 정보로 update하는 경우
        (e.g. 6시 이후에 출발하는 열차 예약 -> 6시 15분에 출발하는 열차 예약
            => train-leaveate를 6시 15분으로 update)
    intent_hallucination: user의 의도를 잘못 파악하여 hallucination
    late_hallucination: 이전 turn에서 예측해야 하는 slot, value pair를 이번 turn에서 잘못 예측

    late: 이전 turn에서 예측했어야 하는 slot, value pair인데, 이것을 이번 turn에서 올바르게 예측함
"""

#### error stats

In [10]:
total_error_slot_value = 0
total_error_turn = []

# 큰 오류 범주
total_miss_slot_value, total_hall_slot_value = 0, 0
total_miss_turn, total_hall_turn = [], []

total_delta_slot_value, total_delta_miss_slot_value, total_delta_hall_slot_value = 0, 0, 0
total_delta_turn, total_delta_miss_turn, total_delta_hall_turn = [], [], [] 

total_error_prop_slot_value, total_error_prop_miss_slot_value, total_error_prop_hall_slot_value = 0, 0, 0
total_error_prop_turn, total_error_prop_miss_turn, total_error_prop_hall_turn = [], [], []

# 각 오류 별 카운트
delta_hall_val_slot_value, delta_hall_overwrite_slot_value, delta_hall_parrot_slot_value, delta_hall_total_slot_value = 0,0,0,0
delta_hall_val_turn, delta_hall_overwrite_turn, delta_hall_parrot_turn, delta_hall_total_turn = [], [], [], []

delta_miss_delete_slot_value, delta_miss_dontcare_slot_value, delta_miss_confuse_slot_value, delta_miss_total_slot_value = 0, 0, 0, 0
delta_miss_delete_turn, delta_miss_dontcare_turn, delta_miss_confuse_turn, delta_miss_total_turn = [], [], [], []

error_prop_miss_dontcare_slot_value, error_prop_miss_delete_slot_value, error_prop_miss_slot_value, error_prop_miss_confuse_slot_value = 0,0,0, 0
error_prop_miss_dontcare_turn, error_prop_miss_delete_turn, error_prop_miss_turn, error_prop_miss_confuse_turn = [], [], [], []

error_prop_hall_val_slot_value, error_prop_hall_overwrite_slot_value, error_prop_hall_slot_value = 0,0,0
error_prop_hall_val_turn, error_prop_hall_overwrite_turn, error_prop_hall_turn = [], [], []


error_dict = {}
for data_item in new_logs:
    ids = data_item['ID']+'_'+str(data_item['turn_id'])
    # ids = log
    if error_dict.get(data_item['ID'], None) is None:
        error_dict[data_item['ID']] = []
    
    tmp_list = []
    for error, err_slot_val in data_item['error']:       
        tmp_list.append(error)

        total_error_slot_value += 1
        total_error_turn.append(ids)
    
        if 'miss' in error:
            total_miss_slot_value += 1
            total_miss_turn.append(ids)
        elif 'hall' in error:
            total_hall_slot_value += 1
            total_hall_turn.append(ids)
 
        if 'delta' in error:
            total_delta_slot_value += 1
            total_delta_turn.append(ids)
            if 'miss' in error:
                total_delta_miss_slot_value += 1
                total_delta_miss_turn.append(ids)
                if 'delete' in error:
                    delta_miss_delete_slot_value += 1
                    delta_miss_delete_turn.append(ids)
                elif 'dontcare' in error:
                    delta_miss_dontcare_slot_value += 1
                    delta_miss_dontcare_turn.append(ids)
                elif 'confuse' in error:
                    delta_miss_confuse_slot_value += 1
                    delta_miss_confuse_turn.append(ids)
                elif 'total' in error:
                    delta_miss_total_slot_value += 1
                    delta_miss_total_turn.append(ids)
            elif 'hall' in error:
                total_delta_hall_slot_value += 1
                total_delta_hall_turn.append(ids)
                if 'val' in error:
                    delta_hall_val_slot_value += 1
                    delta_hall_val_turn.append(ids)
                elif 'overwrite' in error:
                    delta_hall_overwrite_slot_value += 1
                    delta_hall_overwrite_turn.append(ids)
                elif 'parrot' in error:
                    delta_hall_parrot_slot_value += 1
                    delta_hall_parrot_turn.append(ids)
                elif 'total' in error:
                    delta_hall_total_slot_value += 1
                    delta_hall_total_turn.append(ids)
        elif 'error_prop' in error:
            total_error_prop_slot_value += 1
            total_error_prop_turn.append(ids)
            if 'miss' in error:
                total_error_prop_miss_slot_value += 1
                total_error_prop_miss_turn.append(ids)
                if 'delete' in error:
                    error_prop_miss_delete_slot_value += 1
                    error_prop_miss_delete_turn.append(ids)
                elif 'dontcare' in error:
                    error_prop_miss_dontcare_slot_value += 1
                    error_prop_miss_dontcare_turn.append(ids)
                elif 'total' in error:
                    error_prop_miss_slot_value += 1
                    error_prop_miss_turn.append(ids)
                elif 'confuse' in error:
                    error_prop_miss_confuse_slot_value += 1
                    error_prop_miss_confuse_turn.append(ids)
            elif 'hall' in error:
                total_error_prop_hall_slot_value += 1
                total_error_prop_hall_turn.append(ids)
                if 'overwrite' in error:
                    error_prop_hall_overwrite_slot_value += 1
                    error_prop_hall_overwrite_turn.append(ids)
                elif 'val' in error:
                    error_prop_hall_val_slot_value += 1
                    error_prop_hall_val_turn.append(ids)
                elif 'total' in error:
                    error_prop_hall_slot_value += 1
                    error_prop_hall_turn.append(ids)
    error_dict[data_item['ID']].append(tmp_list)

list_dict = {}
var_list = list(locals().keys())
for var_name in var_list:
    var = locals()[var_name]
    if 'total' in var_name and type(var) == list:
        # print(var_name, len(var))
        list_dict.update({var_name: var})   
        exec(f'{var_name} = {list(set(var))}')
        # print(var_name, len(eval(var_name)))

In [11]:
print(delta_miss_delete_slot_value, delta_miss_dontcare_slot_value, delta_miss_confuse_slot_value, delta_miss_total_slot_value)
print(delta_miss_delete_turn.__len__(), delta_miss_dontcare_turn.__len__(), delta_miss_confuse_turn.__len__(), delta_miss_total_turn.__len__())

14 35 12 109
14 35 12 98


In [12]:
print(delta_hall_val_slot_value, delta_hall_overwrite_slot_value, delta_hall_parrot_slot_value, delta_hall_total_slot_value)
print(len(delta_hall_val_turn), delta_hall_overwrite_turn.__len__(), delta_hall_parrot_turn.__len__(), delta_hall_total_turn.__len__())

24 8 0 79
24 8 0 64


In [None]:
delta_jga = 0
full_jga = 0
for data_item in new_logs:
    full_jga += data_item['rights'][0]
    delta_jga += data_item['rights'][1]
full_jga, delta_jga, len(new_logs), delta_jga/len(new_logs), full_jga/len(new_logs)

In [None]:
# Miss vs Hall
print('--------MISS--------')
print(f'miss error turns / total turns:\t\t{len(total_miss_turn)}  /  1447  =  {len(total_miss_turn)/1447}')
print(f'miss error turns / total error turns:\t{len(total_miss_turn)}  /  {len(total_error_turn)}  =  {len(total_miss_turn)/len(total_error_turn)}')
print(f'miss s-v pairs / total error pairs:\t{total_miss_slot_value}  /  {total_error_slot_value}  =  {total_miss_slot_value/total_error_slot_value}')

print('--------HALL--------')
print(f'miss error turns / total turns:\t\t{len(total_hall_turn)}  /  1447  =  {len(total_hall_turn)/1447}')
print(f'hall error turns / total error turns:\t{len(total_hall_turn)}  /  {len(total_error_turn)}  =  {len(total_hall_turn)/len(total_error_turn)}')
print(f'hall s-v pairs / total error pairs:\t{total_hall_slot_value}  /  {total_error_slot_value}  =  {total_hall_slot_value/total_error_slot_value}')

In [None]:
# delta error vs error propagation error
print('----------------DELTA----------------')
print(f'delta error turns / total turns:\t\t{len(total_delta_turn)}  /  1447  =  {len(total_delta_turn)/1447}')
print(f'delta error turns / total error turns:\t\t{len(total_delta_turn)}  /  {len(total_error_turn)}  =  {len(total_delta_turn)/len(total_error_turn)}')
print(f'delta error s-v pairs / total error pairs:\t{total_delta_slot_value}  /  {total_error_slot_value}  =  {total_delta_slot_value/total_error_slot_value}')
print('--------DELTA-MISS--------')
print(f'delta-miss turns / total turns:\t\t\t{len(total_delta_miss_turn)}  /  1447  =  {len(total_delta_miss_turn)/1447}')
print(f'delta-miss turns / total error turns:\t\t{len(total_delta_miss_turn)}  /  {len(total_error_turn)}  =  {len(total_delta_miss_turn)/len(total_error_turn)}')
print(f'delta-miss turns / delta error turns:\t\t{len(total_delta_miss_turn)}  /  {len(total_delta_turn)}  =  {len(total_delta_miss_turn)/len(total_delta_turn)}')
print(f'delta-miss s-v pairs / total error pairs:\t{total_delta_miss_slot_value}  /  {total_error_slot_value}  =  {total_delta_miss_slot_value/total_error_slot_value}')
print(f'delta-miss s-v pairs / delta error pairs:\t{total_delta_miss_slot_value}  /  {total_delta_slot_value}  =  {total_delta_miss_slot_value/total_delta_slot_value}')
print('--------DELTA-HALL--------')
print(f'delta-miss turns / total turns:\t\t\t{len(total_delta_hall_turn)}  /  1447  =  {len(total_delta_hall_turn)/1447}')
print(f'delta-hall turns / total error turns:\t\t{len(total_delta_hall_turn)}  /  {len(total_error_turn)}  =  {len(total_delta_hall_turn)/len(total_error_turn)}')
print(f'delta-hall turns / delta error turns:\t\t{len(total_delta_hall_turn)}  /  {len(total_delta_turn)}  =  {len(total_delta_hall_turn)/len(total_delta_turn)}')
print(f'delta-hall s-v pairs / total error pairs:\t{total_delta_hall_slot_value}  /  {total_error_slot_value}  =  {total_delta_hall_slot_value/total_error_slot_value}')
print(f'delta-hall s-v pairs / delta error pairs:\t{total_delta_hall_slot_value}  /  {total_delta_slot_value}  =  {total_delta_hall_slot_value/total_delta_slot_value}')
print()
print('----------------ERROR PROPAGATION----------------')
print(f'delta-miss turns / total turns:\t\t\t{len(total_error_prop_turn)}  /  1447  =  {len(total_error_prop_turn)/1447}')
print(f'error prop turns / total error turns:   \t{len(total_error_prop_turn)}  /  {len(total_error_turn)}  =  {len(total_error_prop_turn)/len(total_error_turn)}')
print(f'error prop s-v pairs / total error pairs:\t{total_error_prop_slot_value}  /  {total_error_slot_value}  =  {total_error_prop_slot_value/total_error_slot_value}')
print('--------ERROR PROP-MISS--------')
print(f'delta-miss turns / total turns:\t\t\t{len(total_error_prop_miss_turn)}  /  1447  =  {len(total_error_prop_miss_turn)/1447}')
print(f'prop-miss turns / total error turns:\t\t{len(total_error_prop_miss_turn)}  /  {len(total_error_turn)}  =  {len(total_error_prop_miss_turn)/len(total_error_turn)}')
print(f'prop-miss turns / error prop turns:\t\t{len(total_error_prop_miss_turn)}  /  {len(total_error_prop_turn)}  =  {len(total_error_prop_miss_turn)/len(total_error_prop_turn)}')
print(f'prop-miss s-v pairs / total error pairs:\t{total_error_prop_miss_slot_value}  /  {total_error_slot_value}  =  {total_error_prop_miss_slot_value/total_error_slot_value}')
print(f'prop-miss s-v pairs / error prop pairs:\t\t{total_error_prop_miss_slot_value}  /  {total_error_prop_slot_value}  =  {total_error_prop_miss_slot_value/total_error_prop_slot_value}')
print('--------ERROR PROP-HALL--------')
print(f'delta-miss turns / total turns:\t\t\t{len(total_error_prop_hall_turn)}  /  1447  =  {len(total_error_prop_hall_turn)/1447}')
print(f'prop-hall turns / total error turns:\t\t{len(total_error_prop_hall_turn)}  /  {len(total_error_turn)}  =  {len(total_error_prop_hall_turn)/len(total_error_turn)}')
print(f'prop-hall turns / error prop turns:\t\t{len(total_error_prop_hall_turn)}  /  {len(total_error_prop_turn)}  =  {len(total_error_prop_hall_turn)/len(total_error_prop_turn)}')
print(f'prop-hall s-v pairs / total error pairs:\t{total_error_prop_hall_slot_value}  /  {total_error_slot_value}  =  {total_error_prop_hall_slot_value/total_error_slot_value}')
print(f'prop-hall s-v pairs / error prop pairs:\t\t{total_error_prop_hall_slot_value}  /  {total_error_prop_slot_value}  =  {total_error_prop_hall_slot_value/total_error_prop_slot_value}')


In [None]:
# count the number of each error in delta

print('----------------DELTA-MISS----------------')
print(f'delta-miss turns / total turns:\t\t\t\t{len(total_delta_miss_turn)}  /  1447  =  {len(total_delta_miss_turn)/1447}')
print(f'delta-miss turns / total error turns:\t\t\t{len(total_delta_miss_turn)}  /  {len(total_error_turn)}  =  {len(total_delta_miss_turn)/len(total_error_turn)}')
print(f'delta-miss s-v pairs / total error pairs:\t\t{total_delta_miss_slot_value}  /  {total_error_slot_value}  =  {total_delta_miss_slot_value/total_error_slot_value}')
print('--------DELTA-MISS-TOTAL--------')
print(f'delta-miss-total turns / total turns:\t\t\t{len(delta_miss_total_turn)}  /  1447  =  {len(delta_miss_total_turn)/1447}')
print(f'delta-miss-total turns / total error turns:\t\t{len(delta_miss_total_turn)}  /  {len(total_error_turn)}  =  {len(delta_miss_total_turn)/len(total_error_turn)}')
print(f'delta-miss-total turns / delta error turns:\t\t{len(delta_miss_total_turn)}  /  {len(total_delta_turn)}  =  {len(delta_miss_total_turn)/len(total_delta_turn)}')
print(f'delta-miss-total turns / delta-miss error turns:\t{len(delta_miss_total_turn)}  /  {len(total_delta_miss_turn)}  =  {len(delta_miss_total_turn)/len(total_delta_miss_turn)}')
print(f'delta-miss-total s-v pairs / total error pairs:\t\t{delta_miss_total_slot_value}  /  {total_error_slot_value}  =  {delta_miss_total_slot_value/total_error_slot_value}')
print(f'delta-miss-total s-v pairs / delta error pairs:\t\t{delta_miss_total_slot_value}  /  {total_delta_slot_value}  =  {delta_miss_total_slot_value/total_delta_slot_value}')
print(f'delta-miss-total s-v pairs / delta-miss error pairs:\t{delta_miss_total_slot_value}  /  {total_delta_miss_slot_value}  =  {delta_miss_total_slot_value/total_delta_miss_slot_value}')
print('--------DELTA-MISS-DONTCARE--------')
print(f'delta-miss-dontcare turns / total turns:\t\t{len(delta_miss_dontcare_turn)}  /  1447  =  {len(delta_miss_dontcare_turn)/1447}')
print(f'delta-miss-dontcare turns / total error turns:\t\t{len(delta_miss_dontcare_turn)}  /  {len(total_error_turn)}  =  {len(delta_miss_dontcare_turn)/len(total_error_turn)}')
print(f'delta-miss-dontcare turns / delta error turns:\t\t{len(delta_miss_dontcare_turn)}  /  {len(total_delta_turn)}  =  {len(delta_miss_dontcare_turn)/len(total_delta_turn)}')
print(f'delta-miss-dontcare turns / delta-miss error turns:\t{len(delta_miss_dontcare_turn)}  /  {len(total_delta_miss_turn)}  =  {len(delta_miss_dontcare_turn)/len(total_delta_miss_turn)}')
print(f'delta-miss-dontcare s-v pairs / total error pairs:\t{delta_miss_dontcare_slot_value}  /  {total_error_slot_value}  =  {delta_miss_dontcare_slot_value/total_error_slot_value}')
print(f'delta-miss-dontcare s-v pairs / delta error pairs:\t{delta_miss_dontcare_slot_value}  /  {total_delta_slot_value}  =  {delta_miss_dontcare_slot_value/total_delta_slot_value}')
print(f'delta-miss-dontcare s-v pairs / delta-miss error pairs:\t{delta_miss_dontcare_slot_value}  /  {total_delta_miss_slot_value}  =  {delta_miss_dontcare_slot_value/total_delta_miss_slot_value}')
print('--------DELTA-MISS-CONFUSE--------')
print(f'delta-miss-confuse turns / total turns:\t\t\t{len(delta_miss_confuse_turn)}  /  1447  =  {len(delta_miss_confuse_turn)/1447}')
print(f'delta-miss-confuse turns / total error turns:\t\t{len(delta_miss_confuse_turn)}  /  {len(total_error_turn)}  =  {len(delta_miss_confuse_turn)/len(total_error_turn)}')
print(f'delta-miss-confuse turns / delta error turns:\t\t{len(delta_miss_confuse_turn)}  /  {len(total_delta_turn)}  =  {len(delta_miss_confuse_turn)/len(total_delta_turn)}')
print(f'delta-miss-confuse turns / delta-miss error turns:\t{len(delta_miss_confuse_turn)}  /  {len(total_delta_miss_turn)}  =  {len(delta_miss_confuse_turn)/len(total_delta_miss_turn)}')
print(f'delta-miss-confuse s-v pairs / total error pairs:\t{delta_miss_confuse_slot_value}  /  {total_error_slot_value}  =  {delta_miss_confuse_slot_value/total_error_slot_value}')
print(f'delta-miss-confuse s-v pairs / delta error pairs:\t{delta_miss_confuse_slot_value}  /  {total_delta_slot_value}  =  {delta_miss_confuse_slot_value/total_delta_slot_value}')
print(f'delta-miss-confuse s-v pairs / delta-miss error pairs:\t{delta_miss_confuse_slot_value}  /  {total_delta_miss_slot_value}  =  {delta_miss_confuse_slot_value/total_delta_miss_slot_value}')
print('--------DELTA-MISS-DELETE--------')
print(f'delta-miss-delete turns / total turns:\t\t\t{len(delta_miss_delete_turn)}  /  1447  =  {len(delta_miss_delete_turn)/1447}')
print(f'delta-miss-delete turns / total error turns:\t\t{len(delta_miss_delete_turn)}  /  {len(total_error_turn)}  =  {len(delta_miss_delete_turn)/len(total_error_turn)}')
print(f'delta-miss-delete turns / delta error turns:\t\t{len(delta_miss_delete_turn)}  /  {len(total_delta_turn)}  =  {len(delta_miss_delete_turn)/len(total_delta_turn)}')
print(f'delta-miss-delete turns / delta-miss error turns:\t{len(delta_miss_delete_turn)}  /  {len(total_delta_miss_turn)}  =  {len(delta_miss_delete_turn)/len(total_delta_miss_turn)}')
print(f'delta-miss-delete s-v pairs / total error pairs:\t{delta_miss_delete_slot_value}  /  {total_error_slot_value}  =  {delta_miss_delete_slot_value/total_error_slot_value}')
print(f'delta-miss-delete s-v pairs / delta error pairs:\t{delta_miss_delete_slot_value}  /  {total_delta_slot_value}  =  {delta_miss_delete_slot_value/total_delta_slot_value}')
print(f'delta-miss-delete s-v pairs / delta-miss error pairs:\t{delta_miss_delete_slot_value}  /  {total_delta_miss_slot_value}  =  {delta_miss_delete_slot_value/total_delta_miss_slot_value}')

In [None]:
# count the number of each error in delta
print('----------------DELTA-HALL----------------')
print(f'delta-hall turns / total turns:\t\t\t\t{len(total_delta_hall_turn)}  /  1447  =  {len(total_delta_hall_turn)/1447}')
print(f'delta-hall turns / total error turns:\t\t\t{len(total_delta_hall_turn)}  /  {len(total_error_turn)}  =  {len(total_delta_hall_turn)/len(total_error_turn)}')
print(f'delta-hall s-v pairs / total error pairs:\t\t{total_delta_hall_slot_value}  /  {total_error_slot_value}  =  {total_delta_hall_slot_value/total_error_slot_value}')
print('--------DELTA-HALL-TOTAL--------')
print(f'delta-hall turns / total turns:\t\t\t\t{len(delta_hall_total_turn)}  /  1447  =  {len(delta_hall_total_turn)/1447}')
print(f'delta-hall-total turns / total error turns:\t\t{len(delta_hall_total_turn)}  /  {len(total_error_turn)}  =  {len(delta_hall_total_turn)/len(total_error_turn)}')
print(f'delta-hall-total turns / delta error turns:\t\t{len(delta_hall_total_turn)}  /  {len(total_delta_turn)}  =  {len(delta_hall_total_turn)/len(total_delta_turn)}')
print(f'delta-hall-total turns / delta-hall error turns:\t{len(delta_hall_total_turn)}  /  {len(total_delta_hall_turn)}  =  {len(delta_hall_total_turn)/len(total_delta_hall_turn)}')
print(f'delta-hall-total s-v pairs / total error pairs:\t\t{delta_hall_total_slot_value}  /  {total_error_slot_value}  =  {delta_hall_total_slot_value/total_error_slot_value}')
print(f'delta-hall-total s-v pairs / delta error pairs:\t\t{delta_hall_total_slot_value}  /  {total_delta_slot_value}  =  {delta_hall_total_slot_value/total_delta_slot_value}')
print(f'delta-hall-total s-v pairs / delta-hall error pairs:\t{delta_hall_total_slot_value}  /  {total_delta_hall_slot_value}  =  {delta_hall_total_slot_value/total_delta_hall_slot_value}')
print('--------DELTA-HALL-OVERWRITE--------')
print(f'delta-hall turns / total turns:\t\t\t\t{len(delta_hall_overwrite_turn)}  /  1447  =  {len(delta_hall_overwrite_turn)/1447}')
print(f'delta-hall-overwrite turns / total error turns:\t\t{len(delta_hall_overwrite_turn)}  /  {len(total_error_turn)}  =  {len(delta_hall_overwrite_turn)/len(total_error_turn)}')
print(f'delta-hall-overwirte turns / delta error turns:\t\t{len(delta_hall_overwrite_turn)}  /  {len(total_delta_turn)}  =  {len(delta_hall_overwrite_turn)/len(total_delta_turn)}')
print(f'delta-hall-overwirte turns / delta-hall error turns:\t{len(delta_hall_overwrite_turn)}  /  {len(total_delta_hall_turn)}  =  {len(delta_hall_overwrite_turn)/len(total_delta_hall_turn)}')
print(f'delta-hall-overwrite s-v pairs / total error pairs:\t{delta_hall_overwrite_slot_value}  /  {total_error_slot_value}  =  {delta_hall_overwrite_slot_value/total_error_slot_value}')
print(f'delta-hall-overwrite s-v pairs / delta error pairs:\t{delta_hall_overwrite_slot_value}  /  {total_delta_slot_value}  =  {delta_hall_overwrite_slot_value/total_delta_slot_value}')
print(f'delta-hall-overwirte s-v pairs / delta-hall error pairs:{delta_hall_overwrite_slot_value}  /  {total_delta_hall_slot_value}  =  {delta_hall_overwrite_slot_value/total_delta_hall_slot_value}')
print('--------DELTA-HALL-VALUE--------')
print(f'delta-hall turns / total turns:\t\t\t\t{len(delta_hall_val_turn)}  /  1447  =  {len(delta_hall_val_turn)/1447}')
print(f'delta-hall-val turns / total error turns:\t\t{len(delta_hall_val_turn)}  /  {len(total_error_turn)}  =  {len(delta_hall_val_turn)/len(total_error_turn)}')
print(f'delta-hall-val turns / delta error turns:\t\t{len(delta_hall_val_turn)}  /  {len(total_delta_turn)}  =  {len(delta_hall_val_turn)/len(total_delta_turn)}')
print(f'delta-hall-val turns / delta-hall error turns:\t\t{len(delta_hall_val_turn)}  /  {len(total_delta_hall_turn)}  =  {len(delta_hall_val_turn)/len(total_delta_hall_turn)}')
print(f'delta-hall-val s-v pairs / total error pairs:\t\t{delta_hall_val_slot_value}  /  {total_error_slot_value}  =  {delta_hall_val_slot_value/total_error_slot_value}')
print(f'delta-hall-val s-v pairs / delta error pairs:\t\t{delta_hall_val_slot_value}  /  {total_delta_slot_value}  =  {delta_hall_val_slot_value/total_delta_slot_value}')
print(f'delta-hall-val s-v pairs / delta-hall error pairs:\t{delta_hall_val_slot_value}  /  {total_delta_hall_slot_value}  =  {delta_hall_val_slot_value/total_delta_hall_slot_value}')


In [None]:
# count the number of each error in delta
print('----------------ERROR PROP-MISS----------------')
print(f'error prop-miss turns / total turns:\t\t\t\t{len(total_error_prop_miss_turn)}  /  1447  =  {len(total_error_prop_miss_turn)/1447}')
print(f'error prop-miss turns / total error turns:\t\t\t{len(total_error_prop_miss_turn)}  /  {len(total_error_turn)}  =  {len(total_error_prop_miss_turn)/len(total_error_turn)}')
print(f'error prop-miss s-v pairs / total error pairs:\t\t\t{total_error_prop_miss_slot_value}  /  {total_error_slot_value}  =  {total_error_prop_miss_slot_value/total_error_slot_value}')
print('--------ERROR PROP-MISS-TOTAL--------')
print(f'error prop-miss-total turns / total turns:\t\t\t{len(error_prop_miss_turn)}  /  1447  =  {len(error_prop_miss_turn)/1447}')
print(f'error prop-miss-total turns / total error turns:\t\t{len(error_prop_miss_turn)}  /  {len(total_error_turn)}  =  {len(error_prop_miss_turn)/len(total_error_turn)}')
print(f'error prop-miss-total turns / delta error turns:\t\t{len(error_prop_miss_turn)}  /  {len(total_error_prop_turn)}  =  {len(error_prop_miss_turn)/len(total_error_prop_turn)}')
print(f'error prop-miss-total turns / delta-miss error turns:\t\t{len(error_prop_miss_turn)}  /  {len(total_error_prop_miss_turn)}  =  {len(error_prop_miss_turn)/len(total_error_prop_miss_turn)}')
print(f'error prop-miss-total s-v pairs / total error pairs:\t\t{error_prop_miss_slot_value}  /  {total_error_slot_value}  =  {error_prop_miss_slot_value/total_error_slot_value}')
print(f'error prop-miss-total s-v pairs / delta error pairs:\t\t{error_prop_miss_slot_value}  /  {total_error_prop_slot_value}  =  {error_prop_miss_slot_value/total_error_prop_slot_value}')
print(f'error prop-miss-total s-v pairs / delta-miss error pairs:\t{error_prop_miss_slot_value}  /  {total_error_prop_miss_slot_value}  =  {error_prop_miss_slot_value/total_error_prop_miss_slot_value}')
print('--------ERROR PROP-MISS-DONTCARE--------')
print(f'error prop-miss-dontcare turns / total turns:\t\t\t{len(error_prop_miss_dontcare_turn)}  /  1447  =  {len(error_prop_miss_dontcare_turn)/1447}')
print(f'error prop-miss-dontcare turns / total error turns:\t\t{len(error_prop_miss_dontcare_turn)}  /  {len(total_error_turn)}  =  {len(error_prop_miss_dontcare_turn)/len(total_error_turn)}')
print(f'error prop-miss-dontcare turns / delta error turns:\t\t{len(error_prop_miss_dontcare_turn)}  /  {len(total_error_prop_turn)}  =  {len(error_prop_miss_dontcare_turn)/len(total_error_prop_turn)}')
print(f'error prop-miss-dontcare turns / delta-miss error turns:\t\t{len(error_prop_miss_dontcare_turn)}  /  {len(total_error_prop_miss_turn)}  =  {len(error_prop_miss_dontcare_turn)/len(total_error_prop_miss_turn)}')
print(f'error prop-miss-dontcare s-v pairs / total error pairs:\t\t{error_prop_miss_dontcare_slot_value}  /  {total_error_slot_value}  =  {error_prop_miss_dontcare_slot_value/total_error_slot_value}')
print(f'error prop-miss-dontcare s-v pairs / delta error pairs:\t\t{error_prop_miss_dontcare_slot_value}  /  {total_error_prop_slot_value}  =  {error_prop_miss_dontcare_slot_value/total_error_prop_slot_value}')
print(f'error prop-miss-dontcare s-v pairs / delta-miss error pairs:\t{error_prop_miss_dontcare_slot_value}  /  {total_error_prop_miss_slot_value}  =  {error_prop_miss_dontcare_slot_value/total_error_prop_miss_slot_value}')
print('--------ERROR PROP-MISS-CONFUSE--------')
print(f'error prop-miss-confuse turns / total turns:\t\t\t{len(error_prop_miss_confuse_turn)}  /  1447  =  {len(error_prop_miss_confuse_turn)/1447}')
print(f'error prop-miss-confuse turns / total error turns:\t\t{len(error_prop_miss_confuse_turn)}  /  {len(total_error_turn)}  =  {len(error_prop_miss_confuse_turn)/len(total_error_turn)}')
print(f'error prop-miss-confuse turns / delta error turns:\t\t{len(error_prop_miss_confuse_turn)}  /  {len(total_error_prop_turn)}  =  {len(error_prop_miss_confuse_turn)/len(total_error_prop_turn)}')
print(f'error prop-miss-confuse turns / delta-miss error turns:\t\t{len(error_prop_miss_confuse_turn)}  /  {len(total_error_prop_miss_turn)}  =  {len(error_prop_miss_confuse_turn)/len(total_error_prop_miss_turn)}')
print(f'error prop-miss-confuse s-v pairs / total error pairs:\t\t{error_prop_miss_confuse_slot_value}  /  {total_error_slot_value}  =  {error_prop_miss_confuse_slot_value/total_error_slot_value}')
print(f'error prop-miss-confuse s-v pairs / delta error pairs:\t\t{error_prop_miss_confuse_slot_value}  /  {total_error_prop_slot_value}  =  {error_prop_miss_confuse_slot_value/total_error_prop_slot_value}')
print(f'error prop-miss-confuse s-v pairs / delta-miss error pairs:\t{error_prop_miss_confuse_slot_value}  /  {total_error_prop_miss_slot_value}  =  {error_prop_miss_confuse_slot_value/total_error_prop_miss_slot_value}')
print('--------ERROR PROP-MISS-DELETE--------')
print(f'error prop-miss-delete turns / total turns:\t\t\t{len(error_prop_miss_delete_turn)}  /  1447  =  {len(error_prop_miss_delete_turn)/1447}')
print(f'error prop-miss-delete turns / total error turns:\t\t{len(error_prop_miss_delete_turn)}  /  {len(total_error_turn)}  =  {len(error_prop_miss_delete_turn)/len(total_error_turn)}')
print(f'error prop-miss-delete turns / delta error turns:\t\t{len(error_prop_miss_delete_turn)}  /  {len(total_error_prop_turn)}  =  {len(error_prop_miss_delete_turn)/len(total_error_prop_turn)}')
print(f'error prop-miss-delete turns / delta-miss error turns:\t\t{len(error_prop_miss_delete_turn)}  /  {len(total_error_prop_miss_turn)}  =  {len(error_prop_miss_delete_turn)/len(total_error_prop_miss_turn)}')
print(f'error prop-miss-delete s-v pairs / total error pairs:\t\t{error_prop_miss_delete_slot_value}  /  {total_error_slot_value}  =  {error_prop_miss_delete_slot_value/total_error_slot_value}')
print(f'error prop-miss-delete s-v pairs / delta error pairs:\t\t{error_prop_miss_delete_slot_value}  /  {total_error_prop_slot_value}  =  {error_prop_miss_delete_slot_value/total_error_prop_slot_value}')
print(f'error prop-miss-delete s-v pairs / delta-miss error pairs:\t{error_prop_miss_delete_slot_value}  /  {total_error_prop_miss_slot_value}  =  {error_prop_miss_delete_slot_value/total_error_prop_miss_slot_value}')

In [None]:
# count the number of each error in delta
print('----------------ERROR PROP-HALL----------------')
print(f'error prop-hall turns / total turns:\t\t\t\t{len(total_error_prop_hall_turn)}  /  1447  =  {len(total_error_prop_hall_turn)/1447}')
print(f'error prop-hall turns / total error turns:\t\t\t{len(total_error_prop_hall_turn)}  /  {len(total_error_turn)}  =  {len(total_error_prop_hall_turn)/len(total_error_turn)}')
print(f'error prop-hall s-v pairs / total error pairs:\t\t\t{total_error_prop_hall_slot_value}  /  {total_error_slot_value}  =  {total_error_prop_hall_slot_value/total_error_slot_value}')

print('--------ERROR PROP-HALL-TOTAL--------')
print(f'error prop-hall turns / total turns:\t\t\t\t{len(error_prop_hall_turn)}  /  1447  =  {len(error_prop_hall_turn)/1447}')
print(f'error prop-hall-total turns / total error turns:\t\t{len(error_prop_hall_turn)}  /  {len(error_prop_hall_turn)}  =  {len(delta_hall_total_turn)/len(total_error_turn)}')
print(f'error prop-hall-total turns / delta error turns:\t\t{len(error_prop_hall_turn)}  /  {len(error_prop_hall_turn)}  =  {len(delta_hall_total_turn)/len(total_error_prop_turn)}')
print(f'error prop-hall-total turns / delta-hall error turns:\t\t{len(error_prop_hall_turn)}  /  {len(error_prop_hall_turn)}  =  {len(delta_hall_total_turn)/len(total_error_prop_hall_turn)}')
print(f'error prop-hall-total s-v pairs / total error pairs:\t\t{error_prop_hall_slot_value}  /  {total_error_slot_value}  =  {error_prop_hall_slot_value/total_error_slot_value}')
print(f'error prop-hall-total s-v pairs / delta error pairs:\t\t{error_prop_hall_slot_value}  /  {total_error_prop_slot_value}  =  {error_prop_hall_slot_value/total_error_prop_slot_value}')
print(f'error prop-hall-total s-v pairs / delta-hall error pairs:\t{error_prop_hall_slot_value}  /  {total_error_prop_hall_slot_value}  =  {error_prop_hall_slot_value/total_error_prop_hall_slot_value}')
print('--------ERROR PROP-HALL-OVERWRITE--------')
print(f'error prop-hall turns / total turns:\t\t\t\t{len(error_prop_hall_overwrite_turn)}  /  1447  =  {len(error_prop_hall_overwrite_turn)/1447}')
print(f'error prop-hall-overwrite turns / total error turns:\t\t{len(error_prop_hall_overwrite_turn)}  /  {len(total_error_turn)}  =  {len(error_prop_hall_overwrite_turn)/len(total_error_turn)}')
print(f'error prop-hall-overwirte turns / delta error turns:\t\t{len(error_prop_hall_overwrite_turn)}  /  {len(total_error_prop_turn)}  =  {len(error_prop_hall_overwrite_turn)/len(total_error_prop_turn)}')
print(f'error prop-hall-overwirte turns / delta-hall error turns:\t{len(error_prop_hall_overwrite_turn)}  /  {len(total_error_prop_hall_turn)}  =  {len(error_prop_hall_overwrite_turn)/len(total_error_prop_hall_turn)}')
print(f'error prop-hall-overwrite s-v pairs / total error pairs:\t{error_prop_hall_overwrite_slot_value}  /  {total_error_slot_value}  =  {error_prop_hall_overwrite_slot_value/total_error_slot_value}')
print(f'error prop-hall-overwrite s-v pairs / delta error pairs:\t{error_prop_hall_overwrite_slot_value}  /  {total_error_prop_slot_value}  =  {error_prop_hall_overwrite_slot_value/total_error_prop_slot_value}')
print(f'error prop-hall-overwirte s-v pairs / delta-hall error pairs:\t{error_prop_hall_overwrite_slot_value}  /  {total_error_prop_hall_slot_value}  =  {error_prop_hall_overwrite_slot_value/total_error_prop_hall_slot_value}')
print('--------ERROR PROP-HALL-VALUE--------')
print(f'error prop-hall turns / total turns:\t\t\t\t{len(error_prop_hall_val_turn)}  /  1447  =  {len(error_prop_hall_val_turn)/1447}')
print(f'error prop-hall-val turns / total error turns:\t\t\t{len(error_prop_hall_val_turn)}  /  {len(total_error_turn)}  =  {len(error_prop_hall_val_turn)/len(total_error_turn)}')
print(f'error prop-hall-val turns / delta error turns:\t\t\t{len(error_prop_hall_val_turn)}  /  {len(total_error_prop_turn)}  =  {len(error_prop_hall_val_turn)/len(total_error_prop_turn)}')
print(f'error prop-hall-val turns / delta-hall error turns:\t\t{len(error_prop_hall_val_turn)}  /  {len(total_error_prop_hall_turn)}  =  {len(error_prop_hall_val_turn)/len(total_error_prop_hall_turn)}')
print(f'error prop-hall-val s-v pairs / total error pairs:\t\t{error_prop_hall_val_slot_value}  /  {total_error_slot_value}  =  {error_prop_hall_val_slot_value/total_error_slot_value}')
print(f'error prop-hall-val s-v pairs / delta error pairs:\t\t{error_prop_hall_val_slot_value}  /  {total_error_prop_slot_value}  =  {error_prop_hall_val_slot_value/total_error_prop_slot_value}')
print(f'error prop-hall-val s-v pairs / delta-hall error pairs:\t\t{error_prop_hall_val_slot_value}  /  {total_error_prop_hall_slot_value}  =  {error_prop_hall_val_slot_value/total_error_prop_hall_slot_value}')


### error analysis ver.2.1

In [None]:
def hall_error_case(tmp, prev_log, gold, pred, gold_delta, pred_delta, gold_prev, pred_prev, delta=True):
    for pred_slot, pred_val in set(pred.items()) - set(gold.items()):
        if gold_delta.get(pred_slot, None) == "[DELETE]" and not delta:
            tmp['error'].append(('delta_miss_delete', (pred_slot, pred_val)))
            continue
        
        if (pred_slot, pred_val) in pred_prev.items():
            if (pred_slot, pred_val) not in gold_prev.items():
                for err_name, err_s_v in prev_log.get('error', []):
                    if 'miss' in err_name and 'delete' not in err_name:
                        continue
                    if pred_slot == err_s_v[0] and pred_val in err_s_v:
                        prop_name = '_'.join(err_name.split('_')[-2:])
                        prop_name = 'error_prop_' + prop_name
                        tmp['error'].append((prop_name, err_s_v))

        elif pred_slot in pred_delta:
            captured_error = False
            for err_name, err_s_v in tmp.get('error', []):
                if 'confuse' in err_name and err_s_v[2] == pred_slot:
                    captured_error = True
                    continue
            if captured_error:
                continue
            if pred_slot in gold_delta:
                if pred_val != gold_delta[pred_slot] and gold_delta[pred_slot] != '[DELETE]':
                    captured_error = False
                    for err_name, err_s_v in tmp.get('error', []):
                        if ('confuse' in err_name or 'hall_val' in err_name) and err_s_v[0] == pred_slot:
                            captured_error = True
                            continue
                    if not captured_error:
                        # if pred_slot  in gold_prev and pred_slot not in pred_prev:
                        #     tmp['error'].append(('delta_hall_total_late', (pred_slot, pred_val)))
                        #     continue
                        # else:
                        #     context = ''
                        #     for utt in tmp['dialog']:
                        #         context += utt
                        #     if str(pred_val) in context:
                        #         tmp['error'].append(('delta_hall_total_context', (pred_slot, pred_val)))
                        #         continue
                        #     tmp['error'].append(('delta_hall_total', (pred_slot, pred_val)))
                        #     continue
                        tmp['error'].append(('delta_hall_total', (pred_slot, pred_val)))
                    continue
            
            elif pred_slot in pred_prev:
                # if pred_prev == gold_prev:
                if pred_val == pred_prev[pred_slot]:
                    # tmp['error'].append(('delta_hall_parrot', (pred_slot, pred_val)))
                    continue
                else:
                    tmp['error'].append(('delta_hall_overwrite', (pred_slot, pred_val)))
                    continue
            else:
                captured_error = False
                for err_name, err_s_v in tmp.get('error', []):
                    if 'confuse' in err_name and err_s_v[2] == pred_slot:
                        captured_error = True
                        continue
                if not captured_error:
                    tmp['error'].append(('delta_hall_total', (pred_slot, pred_val)))
                continue
                           
        else:
            tmp['error'].append(('not in hall category',(pred_slot, pred_val)))
    return tmp

def miss_error_case(tmp, prev_log, gold, pred, gold_delta, pred_delta, gold_prev, pred_prev, delta=True):
    for gold_slot, gold_val in set(gold.items()) - set(pred.items()):
        if gold_delta.get(gold_slot, None) == "[DELETE]" and not delta:
            tmp['error'].append(('delta_miss_delete', (gold_slot, gold_val)))
            continue

        elif (gold_slot, gold_val) in gold_prev.items():
            if (gold_slot, gold_val) not in pred_prev.items():
                for err_name, err_s_v in prev_log.get('error', []):
                    if 'hall' in err_name:
                        continue
                    if err_s_v[0] == gold_slot and err_s_v[1] == gold_val:
                        prop_name = '_'.join(err_name.split('_')[-2:])
                        prop_name = 'error_prop_' + prop_name
                        tmp['error'].append((prop_name, err_s_v))
            
        elif gold_slot in gold_delta:
            if gold_val in pred_delta.values():
                conf_slot_vals = [(k, v) for k, v in pred_delta.items() if v == gold_val and k != gold_slot]
                for conf_s, g_val in conf_slot_vals:
                    if conf_s in gold_delta or conf_s in gold_prev:
                        if g_val == gold_delta.get(conf_s, None) or g_val == gold_prev.get(conf_s, None):
                            continue    # the other pair which is correct
                        else:
                            tmp['error'].append(('delta_miss_confuse1', (gold_slot, gold_val, conf_s, g_val)))
                            continue    # confuse k <=> slot
                    else: 
                        tmp['error'].append(('delta_miss_confuse2', (gold_slot, gold_val, conf_s, g_val)))
                        continue    # confuse k <=> slot
                if len(conf_slot_vals) == 0:
                    tmp['error'].append(('delta_miss_unknown', (gold_slot, gold_val)))
                    raise ValueError
                    continue    # unknown
            else:
                if gold_slot in pred_delta and gold_val != pred_delta[gold_slot] and gold_val != '[DELETE]':
                    tmp['error'].append(('delta_hall_val', (gold_slot, gold_val, gold_slot, pred_delta[gold_slot])))
                    continue
                if gold_val == '[DELETE]' and delta:
                    continue
                if gold_val == 'dontcare':

                    tmp['error'].append(('delta_miss_dontcare', (gold_slot, gold_val)))
                    continue
                else:
                    # context = ''
                    # for utt in tmp['dialog'][:-2]:
                    #     context += utt
                    # cur_turn = str(tmp['dialog'][-2])+ ' ' + str(tmp['dialog'][-1])
                    # if str(gold_val.strip()) in cur_turn:
                    #     tmp['error'].append(('delta_miss_total_turn', (gold_slot, gold_val)))
                    #     continue
                    # elif str(gold_val.strip()) in context:
                    #     tmp['error'].append(('delta_miss_total_context', (gold_slot, gold_val)))
                    #     continue
                    tmp['error'].append(('delta_miss_total', (gold_slot, gold_val)))
                    continue
                    # tmp['error'].append(('delta_miss_total', (gold_slot, gold_val)))
                    # continue    # total miss
        
        else:
            tmp['error'].append(('not in miss category',(gold_slot, gold_val)))
    return tmp

### error analysis ver.2

In [None]:
f_1_d_1_p_1 = []
f_1_d_1_p_0 = []
f_1_d_0_p_1 = []
f_1_d_0_p_0 = []
f_0_d_1_p_1 = []
f_0_d_1_p_0 = []
f_0_d_0_p_1 = []
f_0_d_0_p_0 = []

with open('outputs/runs/table4/5p/bm25/split_v1_10_all_sim/running_log.json', 'r') as f:
    logs = json.load(f)

new_logs = []
for data_item in logs:
    tmp = {}
    pred = data_item['pred']
    gold = data_item['slot_values']

    pred_delta =  normalizer.normalize(iterative_parsing(data_item['completion'], data_item['last_slot_values']))
    data_item['iter_parse_pred_delta'] = pred_delta
    gold_delta = data_item['turn_slot_values']

    pred_prev = data_item['pred_prior_context']
    gold_prev = data_item['last_slot_values']

    gold, pred = unroll_or(gold, pred)
    gold_delta, pred_delta = unroll_or(gold_delta, pred_delta)
    gold_prev, pred_prev = unroll_or(gold_prev, pred_prev)
    
    tmp['ID'] = data_item['ID']
    tmp['turn_id'] = data_item['turn_id']

    
    if pred == gold and pred_delta == gold_delta and pred_prev == gold_prev:
        f_1_d_1_p_1.append(data_item)
        tmp['rights'] = (1, 1, 1)
    elif pred == gold and pred_delta == gold_delta and pred_prev != gold_prev:
        f_1_d_1_p_0.append(data_item)
        tmp['rights'] = (1, 1, 0)        
    elif pred == gold and pred_delta != gold_delta and pred_prev == gold_prev:
        f_1_d_0_p_1.append(data_item)
        tmp['rights'] = (1, 0, 1)
    elif pred == gold and pred_delta != gold_delta and pred_prev != gold_prev:
        f_1_d_0_p_0.append(data_item)
        tmp['rights'] = (1, 0, 0)
    elif pred != gold and  pred_delta == gold_delta and pred_prev == gold_prev:
        f_0_d_1_p_1.append(data_item)
        tmp['rights'] = (0, 1, 1)
    elif pred != gold and pred_delta == gold_delta and pred_prev != gold_prev:
        f_0_d_1_p_0.append(data_item)
        tmp['rights'] = (0, 1, 0)
    elif pred != gold and pred_delta != gold_delta and pred_prev == gold_prev:
        f_0_d_0_p_1.append(data_item)
        tmp['rights'] = (0, 0, 1)
    else:
        f_0_d_0_p_0.append(data_item) 
        tmp['rights'] = (0, 0, 0)
    
    tmp['slot_values'] = sort_dict(gold)
    tmp['pred_slot_values'] = sort_dict(pred)
    tmp['turn_slot_values'] =sort_dict(gold_delta)
    tmp['pred_turn_slot_values'] = sort_dict(pred_delta)
    tmp['completion'] = data_item['completion']
    tmp['last_slot_values'] = sort_dict(gold_prev)
    tmp['pred_last_slot_values'] = sort_dict(pred_prev)
    
    tmp['error'] = []
    # tmp['dialog'] = log['dialog']

    if pred != gold:
        for slot, val in set(pred.items()) - set(gold.items()):
            if gold_delta.get(slot, None) == "[DELETE]":
                tmp['error'].append(('miss_delete', (slot, val)))
                continue

            if slot in pred_delta:
                if slot in gold_delta :
                    if val != gold_delta[slot]:
                        tmp['error'].append(('delta_hall_val', (slot, gold_delta[slot], slot, val)))
                        continue
                
                elif slot in pred_prev:
                    # if pred_prev == gold_prev:
                    if val == pred_prev[slot]:
                        tmp['error'].append(('delta_hall_-parrot', (slot, val)))
                        continue
                    else:
                        tmp['error'].append(('delta_hall_-overwrite', (slot, val)))
                        continue
                else:
                    tmp['error'].append(('delta_hall_-total', (slot, val)))
                    continue
            
            elif (slot, val) in pred_prev.items():
                if (slot, val) not in gold_prev.items():
                    tmp['error'].append(('error_prop_hall', (slot, val)))
                    continue
                
        for slot, val in set(gold.items()) - set(pred.items()):
            if gold_delta.get(slot, None) == "[DELETE]":
                tmp['error'].append(('miss-delete', (slot, val)))
                continue
                
            elif slot in gold_delta:
                if val in pred_delta.values():
                    conf_slot_vals = [(k, v) for k, v in pred_delta.items() if v == val and k != slot]
                    for k, v in conf_slot_vals:
                        if k in gold_delta:
                            if v == gold_delta[k]:
                                continue    # the other pair which is correct
                            else:
                                tmp['error'].append(('delta_miss_confuse', (slot, val, k, v)))
                                continue    # confuse k <=> slot
                        else: 
                            tmp['error'].append(('delta_miss_confuse', (slot, val, k, v)))
                            continue    # confuse k <=> slot
                else:
                    tmp['error'].append(('delta_miss_total', (slot, val)))
                    continue    # total miss
            
            elif (slot, val) in gold_prev.items():
                if (slot, val) not in pred_prev.items():
                    tmp['error'].append(('error_prop_miss', (slot, val)))
                    continue    # miss from previous

    for slot, val in set(pred_delta.items()) - set(gold_delta.items()):
        if slot in pred_delta:
            if slot in gold_delta :
                if val != gold_delta[slot]:
                    tmp['error'].append(('delta_hall_val', (slot, gold_delta[slot], slot, val)))
                    continue
            
            elif slot in pred_prev:
                # if pred_prev == gold_prev:
                if val == pred_prev[slot]:
                    tmp['error'].append(('delta_hall_parrot', (slot, val)))
                    continue
                else:
                    tmp['error'].append(('delta_hall_overwrite', (slot, val)))
                    continue
            else:
                tmp['error'].append(('delta_hall_total', (slot, val)))
                continue
            
    for slot, val in set(gold_delta.items()) - set(pred_delta.items()):
        if slot in gold_delta:
            if val in pred_delta.values():
                conf_slot_vals = [(k, v) for k, v in pred_delta.items() if v == val and k != slot]
                for k, v in conf_slot_vals:
                    if k in gold_delta:
                        if v == gold_delta[k]:
                            continue    # the other pair which is correct
                        else:
                            tmp['error'].append(('delta_miss_confuse', (slot, val, k, v)))
                            continue    # confuse k <=> slot
                    else: 
                        tmp['error'].append(('delta_miss_confuse', (slot, val, k, v)))
                        continue    # confuse k <=> slot
            else:
                if val == '[DELETE]':
                    continue
                tmp['error'].append(('delta_miss_total', (slot, val)))
                continue    # total miss
        
    tmp['error'] = sorted(list(set(tuple(x) for x in tmp['error'])))
    new_logs.append(tmp)


In [None]:
with open('./error-analysis-log-og.json', 'w') as f:
    json.dump(new_logs, f, indent=4)

### error analysis ver.1

In [None]:
new_logs = []
prev_log = None
for i, data_item in enumerate(logs):
    new_log = {}

    new_log['ID'], new_log['turn_id'], new_log['dialog'] = data_item['ID'], data_item['turn_id'], data_item['dialog']
    
    gold = dict(sorted(data_item['slot_values'].items(), key=lambda x: x[0]))
    gold_prev = dict(sorted(data_item['last_slot_values'].items(), key=lambda x: x[0]))
    gold_delta = dict(sorted(data_item['turn_slot_values'].items(), key=lambda x: x[0]))
    pred = dict(sorted(data_item['pred'].items(), key=lambda x: x[0]))
    pred_prev = dict(sorted(data_item['pred_prior_context'].items(), key=lambda x: x[0]))
    pred_delta = dict(sorted(data_item['pred_delta_slov_values'].items(), key=lambda x: x[0]))
   
    new_log['error_cases'] = []

    new_log['error_cases'] = []

    for pred_slot, pred_value in pred_delta.items():
        if pred_slot in list(gold_delta.keys()):
            for gold_value in [v for k, v in gold_delta.items() if k == pred_slot]:
                if gold_value == pred_value:
                    continue
                new_log['error_cases'].append({'error_type': 'confusion-delta', 'slot_values':[{gold_slot:pred_value}, {pred_slot:pred_value}]})
        else:
            if pred_value in list(gold_delta.values()):
                new_log['error_cases'].append({'error_type': 'hall-value-delta', 'slot_values': {pred_slot:pred_value}})
            else:
                new_log['error_cases'].append({'error_type': 'hall-slot-value-delta', 'slot_values': {pred_slot:pred_value}})

    for gold_slot, gold_value in gold_delta.items():
        if gold_slot not in list(pred_delta.keys()):
            if gold_value in list(pred_delta.values()):
                new_log['error_cases'].append({'error_type': 'miss-value-delta', 'slot_values': {gold_slot:gold_value}})
            else:
                if gold_slot in list(pred_prev.keys()):
                    new_log['error_cases'].append({'error_type': 'miss-update-delta', 'slot_values': [{gold_slot:gold_value}, {gold_slot:pred_prev[gold_slot]}]})
                else:
                    new_log['error_cases'].append({'error_type': 'miss-slot-value-delta', 'slot_values': {gold_slot:gold_value}})


    for (pred_slot, pred_value) in (set(pred.items())-set(gold.items())):
        if (pred_slot, pred_value) in set(pred_prev.items())-set(gold_prev.items()):
            if pred_value in list(gold_prev.values()):
                new_log['error_cases'].append({'error_case': 'hall-value-propagation', 'slot_values':{pred_slot:pred_value}})
            else:
                new_log['error_cases'].append({'error_case': 'hall-slot-value-propagation', 'slot_values':{pred_slot:pred_value}})
        # elif (pred_slot, pred_value) in set(gold_delta.items()):
    
    for (gold_slot, gold_value) in (set(gold.items())-set(pred.items())):
        if (gold_slot, gold_value) in set(gold_prev.items())-set(pred_prev.items()):
            for pred_value in [v for k, v in pred_prev.items() if k == gold_slot]:
                if pred_value == gold_value:
                    continue
                new_log['error_cases'].append({'error_type': 'confusion-propagation', 'slot_values':[{gold_slot:pred_value}, {pred_slot:pred_value}]})
            if gold_value in list(pred_prev.values()):
                new_log['error_cases'].append({'error_case': 'miss-value-propagation', 'slot_values':{gold_slot:gold_value}})
            else:
                new_log['error_cases'].append({'error_case': 'miss-slot-value-propagation', 'slot_values':{gold_slot:gold_value}})
    
    
    new_log['slot_values'] = dict(sorted(data_item['slot_values'].items(), key=lambda x: x[0]))
    new_log['pred'] =  dict(sorted(data_item['pred'].items(), key=lambda x: x[0]))

    new_log['turn_slot_values'] = dict(sorted(data_item['turn_slot_values'].items(), key=lambda x: x[0]))
    new_log['pred_turn_slot_values'] = dict(sorted(data_item['pred_delta_slov_values'].items(), key=lambda x: x[0]))
    new_log['completion'] = data_item['completion']

    new_log['last_slot_values'] = dict(sorted(data_item['last_slot_values'].items(), key=lambda x: x[0]))
    new_log['pred_last_slot_values'] = dict(sorted(data_item['pred_prior_context'].items(), key=lambda x: x[0]))

    new_logs.append(new_log)
    prev_log = new_log

with open('./bm25_all_sim_wrong_log_error_case.json', 'w') as f:
    json.dump(new_logs, f, indent=4)