In [8]:
import os
import numpy as np
import pandas as pd
import plotly
import plotly.graph_objs as go
import plotly.express as px
# import plotly.express as px
# import plotly.graph_objects as go
from IPython.display import display, HTML

In [9]:
def cal_turn_percentage(x):
    
    if int(x['total_turns']) == 0:
        return 1
    
    return int(x['turn_idx'])/int(x['total_turns'])

In [10]:
def merge_df(dialogue2len_df, selected_turn_df):
    
    selected_turn_df['dialogue_id'] = selected_turn_df['selected_turn_id'].apply(lambda x: x.split('-')[0])
    selected_turn_df['turn_idx'] = selected_turn_df['selected_turn_id'].apply(lambda x: int(x.split('-')[1]))
#     display(selected_turn_df)
    
    # merge 
    merged_df = pd.merge(selected_turn_df, dialogue2len_df, on='dialogue_id', how='left')
    merged_df['dialogue'] = merged_df.index
#     merged_df['total_turns'] = merged_df['total_turns'] - 1
    # turn_idx+1 because the index starts from 0
    merged_df['turn_idx'] = merged_df['turn_idx'] + 1
    
    ### metric1: calculate turn percentage
    # to see the model tends to select which turn of each dialogue in each round
    # should be in [0, 1], 0 means select the first turn, 1 means select the last turn
    merged_df['turn_percentage'] = merged_df.apply(lambda x: cal_turn_percentage(x), axis=1)
    
    display(merged_df)
    
    ### metric2: # of turns that are read by annotators
    # if total_turns is 10, select turn_idx is 3, then annotator needs to read 3/10 turns to 
    # label the turn_idx=3 turn
    annotate_turns_percent = round(merged_df['turn_percentage'].mean(), 4)
    std_annotate_turns_percent = round(merged_df['turn_percentage'].std(), 4)
    print(f'# of turns read by annotators: mean - {annotate_turns_percent} std - {std_annotate_turns_percent}')
    
#     merged_df_wo_budget = merged_df[merged_df['round'] != -1].reset_index(drop=True)
#     annotate_turns_percent_wo_budget = round(merged_df_wo_budget['turn_percentage'].mean(), 4)
#     print(f'# of turns read by annotators without budget: {annotate_turns_percent_wo_budget}')
    print('--------------------------------------------------')
    
#     merged_df['turn_percentage_by_round'] = merged_df.groupby('round')['turn_percentage'].transform('mean')
#     annotate_turns_percent_by_round = merged_df.groupby('round')['turn_percentage'].mean()
#     std_annotate_turns_percent_by_round = merged_df.groupby('round')['turn_percentage'].std()
#     for idx in annotate_turns_percent_by_round.index:
#         print(f'# of turns read by annotators by round {idx}: mean - '
#               f'{round(annotate_turns_percent_by_round[idx], 4)} '
#               f'std - {round(std_annotate_turns_percent_by_round[idx], 4)}'
             
#              )
    
    
    return merged_df

In [11]:
def read_all_by_folder_name(folder_name, dialog2len_df):
    selected_turn_path_list = []
    for filename in os.listdir(folder_name):
        if not filename.endswith('selected_turn_id.csv'):
            continue
#         print(filename)
        selected_turn_path_list.append(f'{folder_name}/{filename}')
#     print(selected_turn_path_list)
    
    df_list = []
    for path in selected_turn_path_list:
        df = pd.read_csv(path, usecols=[0,1])
        df_list.append(df)
        
    merged = pd.concat(df_list)
    merged_statis = merge_df(dialog2len_df, merged)

# MWZ 2.0

In [12]:
kage_mwz20_dialogue2len_path = './data/mwz20/train_dialogue2len.csv'
kage_mwz21_dialogue2len_path = './data/mwz21/train_dialogue2len.csv'
pptod_mwz20_dialogue2len_path = './data/mwz20/pptod_train_dialogue2len.csv'
pptod_mwz21_dialogue2len_path = './data/mwz21/pptod_train_dialogue2len.csv'

In [13]:
kage_mwz20_dialogue2len_df = pd.read_csv(kage_mwz20_dialogue2len_path)
display(kage_mwz20_dialogue2len_df)

kage_mwz21_dialogue2len_df = pd.read_csv(kage_mwz21_dialogue2len_path)
display(kage_mwz21_dialogue2len_df)

pptod_mwz20_dialogue2len_df = pd.read_csv(pptod_mwz20_dialogue2len_path)
display(pptod_mwz20_dialogue2len_df)

pptod_mwz21_dialogue2len_df = pd.read_csv(pptod_mwz21_dialogue2len_path)
display(pptod_mwz21_dialogue2len_df)

Unnamed: 0,dialogue_id,total_turns
0,MUL0001,10
1,MUL0002,7
2,MUL0005,9
3,MUL0006,8
4,MUL0007,7
...,...,...
7883,WOZ20671,3
7884,WOZ20672,5
7885,WOZ20673,5
7886,WOZ20674,4


Unnamed: 0,dialogue_id,total_turns
0,MUL0001,10
1,MUL0002,7
2,MUL0005,9
3,MUL0006,8
4,MUL0007,7
...,...,...
7883,WOZ20671,3
7884,WOZ20672,5
7885,WOZ20673,5
7886,WOZ20674,4


Unnamed: 0,dialogue_id,total_turns
0,SNG01856,5
1,MUL2168,8
2,MUL2105,9
3,PMUL1690,11
4,MUL2395,7
...,...,...
7896,PMUL4251,9
7897,MUL1383,10
7898,SNG0827,5
7899,PMUL2395,7


Unnamed: 0,dialogue_id,total_turns
0,SNG01856,5
1,MUL2168,8
2,MUL2105,9
3,PMUL1690,11
4,MUL2395,7
...,...,...
7896,PMUL4251,9
7897,MUL1383,10
7898,SNG0827,5
7899,PMUL2395,7


### Baseline: KAGE-LastTurn & PPTOD-LastTurn

KAGE MWZ 2.0 uses paper's results directly

In [14]:
PPTOD_mwz20_lt_df = pd.DataFrame({
    'joint_acc': [0.4538054538054538, 0.43833943833943834, 0.4228734228734229],
})
display(PPTOD_mwz20_lt_df)
print(round(PPTOD_mwz20_lt_df.mean() * 100, 2))
print(round(PPTOD_mwz20_lt_df.std() * 100, 2))

Unnamed: 0,joint_acc
0,0.453805
1,0.438339
2,0.422873


joint_acc    43.83
dtype: float64
joint_acc    1.55
dtype: float64


### Baseline: KAGE-Random & PPTOD-Random

In [15]:
KAGE_mwz20_random_df = pd.DataFrame({
    'joint_acc': [0.490369, 0.497016],
    'slot_acc': [0.969407, 0.969457]
})
display(KAGE_mwz20_random_df)

print(round(KAGE_mwz20_random_df.mean() * 100, 2))
print(round(KAGE_mwz20_random_df.std() * 100, 2))

Unnamed: 0,joint_acc,slot_acc
0,0.490369,0.969407
1,0.497016,0.969457


joint_acc    49.37
slot_acc     96.94
dtype: float64
joint_acc    0.47
slot_acc     0.00
dtype: float64


In [16]:
PPTOD_mwz20_random_df = pd.DataFrame({
    'joint_acc': [0.46153846153846156, 0.4306064306064306],
})
display(PPTOD_mwz20_random_df)

print(round(PPTOD_mwz20_random_df.mean() * 100, 2))
print(round(PPTOD_mwz20_random_df.std() * 100, 2))

Unnamed: 0,joint_acc
0,0.461538
1,0.430606


joint_acc    44.61
dtype: float64
joint_acc    2.19
dtype: float64


In [17]:
print(f'============= ./data/mwz20/KAGE/random_without_al/ =============')
read_all_by_folder_name('./data/mwz20/KAGE/random_without_al/', kage_mwz20_dialogue2len_df)



Unnamed: 0,round,selected_turn_id,dialogue_id,turn_idx,total_turns,dialogue,turn_percentage
0,-1,SNG1174-0,SNG1174,1,4,0,0.250000
1,-1,PMUL1539-1,PMUL1539,2,9,1,0.222222
2,-1,PMUL0951-12,PMUL0951,13,13,2,1.000000
3,-1,PMUL1694-6,PMUL1694,7,9,3,0.777778
4,-1,PMUL1845-4,PMUL1845,5,8,4,0.625000
...,...,...,...,...,...,...,...
15766,-1,PMUL2860-8,PMUL2860,9,11,15766,0.818182
15767,-1,PMUL0201-6,PMUL0201,7,7,15767,1.000000
15768,-1,SNG0926-1,SNG0926,2,6,15768,0.333333
15769,-1,MUL0684-7,MUL0684,8,8,15769,1.000000


# of turns read by annotators: mean - 0.5818 std - 0.2875
--------------------------------------------------


In [18]:
print(f'============= ./data/mwz20/PPTOD/random_without_al/ =============')
read_all_by_folder_name('./data/mwz20/PPTOD/random_without_al/', pptod_mwz20_dialogue2len_df)



Unnamed: 0,round,selected_turn_id,dialogue_id,turn_idx,total_turns,dialogue,turn_percentage
0,-1,SNG01856-3,SNG01856,4,5,0,0.800000
1,-1,MUL2168-4,MUL2168,5,8,1,0.625000
2,-1,MUL2105-6,MUL2105,7,9,2,0.777778
3,-1,PMUL1690-0,PMUL1690,1,11,3,0.090909
4,-1,MUL2395-0,MUL2395,1,7,4,0.142857
...,...,...,...,...,...,...,...
15797,-1,PMUL4251-0,PMUL4251,1,9,15797,0.111111
15798,-1,MUL1383-9,MUL1383,10,10,15798,1.000000
15799,-1,SNG0827-0,SNG0827,1,5,15799,0.200000
15800,-1,PMUL2395-0,PMUL2395,1,7,15800,0.142857


# of turns read by annotators: mean - 0.5866 std - 0.2883
--------------------------------------------------


# MWZ 2.1

### Baseline: KAGE-LastTurn & PPTOD-LastTurn

In [15]:
KAGE_mwz21_lt_df = pd.DataFrame({
    'joint_acc': [0.492128, 0.490228],
    'slot_acc': [0.970367, 0.970643],
})
display(KAGE_mwz21_lt_df)
print(round(KAGE_mwz21_lt_df.mean() * 100, 2))
print(round(KAGE_mwz21_lt_df.std() * 100, 2))

Unnamed: 0,joint_acc,slot_acc
0,0.492128,0.970367
1,0.490228,0.970643


joint_acc    49.12
slot_acc     97.05
dtype: float64
joint_acc    0.13
slot_acc     0.02
dtype: float64


In [19]:
PPTOD_mwz21_lt_df = pd.DataFrame({
    'joint_acc': [0.4644504748982361, 0.4542740841248304],
})
display(PPTOD_mwz21_lt_df)
print(round(PPTOD_mwz21_lt_df.mean() * 100, 2))
print(round(PPTOD_mwz21_lt_df.std() * 100, 2))

Unnamed: 0,joint_acc
0,0.46445
1,0.454274


joint_acc    45.94
dtype: float64
joint_acc    0.72
dtype: float64


### Baseline: KAGE-Random & PPTOD-Random

In [21]:
KAGE_mwz21_random_df = pd.DataFrame({
    'joint_acc': [0.49335, 0.486564],
    'slot_acc': [0.96987, 0.970055]
})
display(KAGE_mwz21_random_df)

print(round(KAGE_mwz21_random_df.mean() * 100, 2))
print(round(KAGE_mwz21_random_df.std() * 100, 2))

Unnamed: 0,joint_acc,slot_acc
0,0.49335,0.96987
1,0.486564,0.970055


joint_acc    49.0
slot_acc     97.0
dtype: float64
joint_acc    0.48
slot_acc     0.01
dtype: float64


In [20]:
PPTOD_mwz21_random_df = pd.DataFrame({
    'joint_acc': [0.47598371777476256, 0.4682496607869742]
})
display(PPTOD_mwz21_random_df)

print(round(PPTOD_mwz21_random_df.mean() * 100, 2))
print(round(PPTOD_mwz21_random_df.std() * 100, 2))

Unnamed: 0,joint_acc
0,0.475984
1,0.46825


joint_acc    47.21
dtype: float64
joint_acc    0.55
dtype: float64


In [18]:
print(f'============= ./data/mwz21/KAGE/random_without_al/ =============')
read_all_by_folder_name('./data/mwz21/KAGE/random_without_al/', kage_mwz21_dialogue2len_df)



Unnamed: 0,round,selected_turn_id,dialogue_id,turn_idx,total_turns,dialogue,turn_percentage
0,-1,PMUL2201-5,PMUL2201,6,6,0,1.000000
1,-1,SNG02167-1,SNG02167,2,4,1,0.500000
2,-1,SSNG0242-5,SSNG0242,6,6,2,1.000000
3,-1,MUL2030-3,MUL2030,4,7,3,0.571429
4,-1,PMUL1605-5,PMUL1605,6,7,4,0.857143
...,...,...,...,...,...,...,...
15770,-1,SNG0743-2,SNG0743,3,4,15770,0.750000
15771,-1,PMUL1169-3,PMUL1169,4,4,15771,1.000000
15772,-1,MUL1909-9,MUL1909,10,13,15772,0.769231
15773,-1,MUL1545-2,MUL1545,3,7,15773,0.428571


# of turns read by annotators: mean - 0.5864 std - 0.2864
--------------------------------------------------


In [22]:
print(f'============= ./data/mwz21/PPTOD/random_without_al/ =============')
read_all_by_folder_name('./data/mwz21/PPTOD/random_without_al/', pptod_mwz21_dialogue2len_df)



Unnamed: 0,round,selected_turn_id,dialogue_id,turn_idx,total_turns,dialogue,turn_percentage
0,-1,SNG01856-4,SNG01856,5,5,0,1.000000
1,-1,MUL2168-3,MUL2168,4,8,1,0.500000
2,-1,MUL2105-8,MUL2105,9,9,2,1.000000
3,-1,PMUL1690-5,PMUL1690,6,11,3,0.545455
4,-1,MUL2395-5,MUL2395,6,7,4,0.857143
...,...,...,...,...,...,...,...
15797,-1,PMUL4251-5,PMUL4251,6,9,15797,0.666667
15798,-1,MUL1383-3,MUL1383,4,10,15798,0.400000
15799,-1,SNG0827-2,SNG0827,3,5,15799,0.600000
15800,-1,PMUL2395-5,PMUL2395,6,7,15800,0.857143


# of turns read by annotators: mean - 0.5796 std - 0.2867
--------------------------------------------------
