In [260]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import itertools
import matplotlib

In [261]:
import os
os.makedirs('plots/TC', exist_ok=True)
os.makedirs('plots/MSC', exist_ok=True)
os.makedirs('processed/TC', exist_ok=True)
os.makedirs('processed/MSC', exist_ok=True)

In [262]:
#sheet = 'Multi Session Chat View 1'
sheet = 'Topical Chat View 1'
df = pd.read_excel('ITL_20230521.xlsx', sheet_name=sheet)
df = df.loc[df.Model != "Gap"]

# Remove unnamed columns
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

In [263]:
if 'Persona Signal' in df:
    dataset_prefix = "MSC"
    # title_fontsize = 36
    legend_fontsize = 18
    label_fontsize = 24
else:
    dataset_prefix = "TC"
    # title_fontsize = 
    legend_fontsize = 20
    label_fontsize = 20

    # Rename "Knowledge" as "Persona Signal"
    df = df.rename(columns={"Knowledge": "Persona Signal"})

# Set to a common linux font
fonts = {'family' : 'serif',
        'serif' : 'Caladea',
        # 'weight' : 'bold',
        'size'   : 36
        }
# For some wierd reason you need to set the 
# font size like following
# 36 for MSC
# 40 for TC 

matplotlib.rc('font', **fonts)

In [264]:
df['Prompt Type'].unique()

array([nan, 'Orig', 'Short', 'None', 'Orig (ppl)', 'Short (ppl)', 'ppl1',
       'ppl2', 'Exp1', 'Exp2'], dtype=object)

In [265]:
df['Prompt Type'].replace("Orig(.*)", "FS-ICL\\1", regex=True, inplace=True)
df['Prompt Type'].replace("Short(.*)", "ZS-ICL\\1", regex=True, inplace=True)

In [266]:
# Drop for values ppl1, ppl2, Exp1, Exp2. Or if it is null
df = df.loc[~df['Prompt Type'].isin(['ppl1', 'ppl2', 'Exp1', 'Exp2'])]
df = df.loc[~df['Prompt Type'].isnull()]

In [267]:
df['Prompt Type'].unique()

array(['FS-ICL', 'ZS-ICL', 'None', 'FS-ICL (ppl)', 'ZS-ICL (ppl)'],
      dtype=object)

In [268]:
df["History Signal"].unique()

array(['None', 'Pegasus cnn/dm', 'Prev 4', 'Full', 'Prev 10', 'Prev 8',
       'Prev 2', 'Prev 1', '10 semantic sim (simcse)',
       '8 semantic sim (simcse)', '4 semantic sim (simcse)',
       '2 semantic sim (simcse)', '1 semantic sim (simcse)',
       'Threshold semantic sim (dialogcse)',
       'Threshold semantic sim (dmi)', 'BART', 'PegasusFT',
       'Summ (ext+abs)', 'Summ (ext+abs) + top 10',
       'Summ (ext+abs) + top 8', 'Summ (ext+abs) + top 2',
       'Summ (ext+abs) + top 1'], dtype=object)

In [269]:
# MSC Rules
df['History Signal'].replace(r"Prev (\d+)", r"Recent-\1", regex=True, inplace=True)
df['History Signal'].replace("4 Semant.*", "Semantic-4", regex=True, inplace=True)
df['History Signal'].replace(r"(\d+) semantic sim \(simcse\)", r"Semantic-\1", regex=True, inplace=True)
df['History Signal'].replace("semantic sim (simcse)", "Semantic-4", inplace=True)
df['History Signal'].replace("Pegasus cnn/dm", "Pegasus-CD", inplace=True)
df['Persona Signal'].replace("Pegasus cnn/dm", "Pegasus-CD", inplace=True)
# # TC Conversion Rules
df['History Signal'].replace("4 semantic sim (simcse)", "Semantic-4", inplace=True)
df['History Signal'].replace("4 semantic sim", "Semantic-4", regex=True, inplace=True)
df['Model'].replace("T-k instruct", "Tk-Instruct", inplace=True)
df['Model'].replace("Tk-instruct", "Tk-Instruct", inplace=True)

In [270]:
df["History Signal"].unique()

array(['None', 'Pegasus-CD', 'Recent-4', 'Full', 'Recent-10', 'Recent-8',
       'Recent-2', 'Recent-1', 'Semantic-10', 'Semantic-8', 'Semantic-4',
       'Semantic-2', 'Semantic-1', 'Threshold semantic sim (dialogcse)',
       'Threshold semantic sim (dmi)', 'BART', 'PegasusFT',
       'Summ (ext+abs)', 'Summ (ext+abs) + top 10',
       'Summ (ext+abs) + top 8', 'Summ (ext+abs) + top 2',
       'Summ (ext+abs) + top 1'], dtype=object)

In [271]:
df['Model'].replace("text-davinci-003", "GPT-3", inplace=True)


In [272]:
# Drop rows with Persona Signal = PegasusFT
df = df.loc[df['Persona Signal'] != "PegasusFT"]

In [273]:
df.sample(15)

Unnamed: 0,Model,Method,Persona Signal,History Signal,Prompt Type,BLEU,METEOR,rouge1,rouge2,rougeL,Bert-p,Bert-r,Bert-f1,DEB,Bleurt,output,prompt,budget
35,T0,Summ Pegasus (cnn/dm) + Short prompt (ppl),,Pegasus-CD,ZS-ICL (ppl),0.037994,0.126455,0.139171,0.018011,0.109473,0.798802,0.806363,0.80243,0.884256,0.311208,40.055808,120.870613,160.926421
143,GPT-3,Semantically similar + Short prompt,,Semantic-4,ZS-ICL,0.039399,0.154566,0.147379,0.019625,0.110913,0.796381,0.802064,0.799107,0.888307,0.4047,55.32454,174.140077,229.464616
182,T0,Semantically similar + Orig prompt,,Semantic-8,FS-ICL,0.040531,0.138761,0.143129,0.01964,0.111677,0.809715,0.812383,0.810882,0.938443,0.308738,45.452565,281.776456,
248,flanT5-XL,Summ (ext + abs) + Orig prompt (ppl),,Summ (ext+abs),FS-ICL (ppl),0.039793,0.125574,0.131663,0.017943,0.106365,0.806339,0.808351,0.807022,0.819685,0.323288,42.592886,238.078487,280.671374
234,Tk-Instruct,Bart summary + Orig prompt,,BART,FS-ICL,0.016406,0.071715,0.10859,0.014754,0.097872,0.842154,0.806587,0.823763,0.71918,0.279045,10.923971,269.969713,280.893684
89,T0,Prev utterances + Orig prompt,,Full,FS-ICL,0.029387,0.105273,0.109831,0.014919,0.085041,0.845908,0.842451,0.843901,0.817851,0.253337,37.894174,797.495813,835.389988
27,BlenderBot-3B,Summ Pegasus (cnn/dm) + Short prompt,,Pegasus-CD,ZS-ICL,0.028582,0.088115,0.109548,0.011019,0.092053,0.810353,0.804763,0.807417,0.564071,0.280342,19.083066,130.376581,149.459647
127,Tk-Instruct,Prev utterances + Orig prompt(ppl),,Recent-10,FS-ICL (ppl),0.032888,0.133613,0.128141,0.015632,0.09673,0.794738,0.80856,0.801425,0.896217,0.27629,57.918715,565.283939,623.202655
294,Tk-Instruct,Only knowledge + Orig prompt (ppl),Pegasus-CD,,FS-ICL (ppl),0.023812,0.069555,0.082387,0.004437,0.068839,0.772875,0.792695,0.782389,0.178614,0.193966,45.725997,793.273693,838.99969
269,T0,Summ (ext + abs) + Short prompt (ppl) + top 2,,Summ (ext+abs) + top 2,ZS-ICL (ppl),0.037223,0.1229,0.139103,0.019407,0.109962,0.807096,0.811703,0.809236,0.841576,0.320108,37.995858,96.791199,134.787057


In [274]:
used_cols = ['Model', 'Method', 'Persona Signal', 'History Signal', 'Prompt Type',
       'BLEU', 'METEOR', 'rouge1', 'rouge2', 'rougeL', 'Bert-p', 'Bert-r',
       'Bert-f1', 'DEB', 'Bleurt', 'output', 'prompt']
df = df[used_cols]
df['total_budget'] = df['output'] + df['prompt']
df['BLEURT']=df['Bleurt']

In [275]:
df = df.sort_values(['Model', 'Prompt Type'])

In [276]:
df

Unnamed: 0,Model,Method,Persona Signal,History Signal,Prompt Type,BLEU,METEOR,rouge1,rouge2,rougeL,Bert-p,Bert-r,Bert-f1,DEB,Bleurt,output,prompt,total_budget,BLEURT
26,BlenderBot-3B,Summ Pegasus (cnn/dm) + Orig prompt,,Pegasus-CD,FS-ICL,0.030190,0.094038,0.117702,0.009898,0.097960,0.815370,0.806016,0.810556,0.548794,0.258033,20.465838,300.101906,320.567745,0.258033
46,BlenderBot-3B,Prev utterances + Orig prompt,,Recent-4,FS-ICL,0.029816,0.092338,0.116651,0.010612,0.097084,0.817221,0.806499,0.811715,0.645156,0.268120,19.280554,370.421789,389.702343,0.268120
50,BlenderBot-3B,Prev utterances + Orig prompt,,Full,FS-ICL,,,,,,,,,,,,,,
148,BlenderBot-3B,Semantically similar + Orig prompt,,Semantic-4,FS-ICL,0.014941,0.060352,0.081460,0.010260,0.073805,0.838896,0.799086,0.818202,0.668794,0.272294,19.443212,387.624844,407.068056,0.272294
224,BlenderBot-3B,Bart summary + Orig prompt,,BART,FS-ICL,0.030122,0.094860,0.116774,0.009885,0.096999,0.814547,0.805837,0.810060,0.364468,0.251855,21.054561,298.001292,319.055852,0.251855
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
253,flanT5-XL,Summ (ext + abs) + Short prompt (ppl) + top 8,,Summ (ext+abs) + top 8,ZS-ICL (ppl),0.041865,0.123515,0.150216,0.022612,0.125618,0.828751,0.812514,0.820399,0.869599,0.344499,26.336763,114.716996,141.053759,0.344499
255,flanT5-XL,Summ (ext + abs) + Short prompt (ppl) + top 2,,Summ (ext+abs) + top 2,ZS-ICL (ppl),0.041443,0.123092,0.146342,0.021377,0.124071,0.828215,0.812203,0.819982,0.852628,0.345481,26.285899,108.214502,134.500401,0.345481
257,flanT5-XL,Summ (ext + abs) + Short prompt (ppl) + top 1,,Summ (ext+abs) + top 1,ZS-ICL (ppl),0.040670,0.121496,0.150048,0.023051,0.125927,0.826762,0.811544,0.818935,0.834585,0.343516,26.188758,108.059059,134.247818,0.343516
289,flanT5-XL,Only knowledge + Short prompt (ppl),Pegasus-CD,,ZS-ICL (ppl),0.034638,0.110125,0.116450,0.015808,0.097971,0.804561,0.802494,0.803100,0.837980,0.305019,38.970287,374.393717,413.364004,0.305019


In [277]:
# # set and increase Matplotlib font
# plt.rcParams['font.family'] = 'sans-serif'
# plt.rcParams['font.sans-serif'] = 'Arial'
# plt.rcParams['font.size'] = 24

# font = {'family' : 'normal',
#         'weight' : 'bold',
#         'size'   : 32}

# matplotlib.rc('font', **font)

In [278]:
def drop_nan_row_cols(df):
    # print("Shape of df: {}".format(df.shape))
    # Print the columns/rows that will be dropped
    # print("Columns with all nan: {}".format(df.columns[df.isnull().all(axis=0)]))
    # print("Rows with all nan: {}".format(df.index[df.isnull().all(axis=1)]))
    # Drop rows with all nan
    df = df.dropna(axis=0, how='all')
    # Drop columns with all nan
    df = df.dropna(axis=1, how='all')
    # print("Shape of df after dropping rows and columns with all nan: {}".format(df.shape))
    return df

def scatter_plot_pivoted_df(pivoted_df, metric, save_path, is_roi=False):
    # Print shape
    # print(pivoted_df)

    print("Metric: {}".format(metric))
    print("Save path: {}".format(save_path))
    
    # Check if any row or column is all nan
    # if pivoted_df.isnull().all(axis=1).any() or pivoted_df.isnull().all(axis=0).any():
    #     print("Skipping plot because of all nan row or column for {} @ {}".format(metric, save_path))
    #     return

    # # Drop rows with all nan
    # pivoted_df = pivoted_df.dropna(axis=0, how='all')
    # # Drop columns with all nan
    # pivoted_df = pivoted_df.dropna(axis=1, how='all')


    if pivoted_df.shape[0] == 0:
        print("Skipping plot for {} @ {}".format(metric, save_path))
        return

    colors = sns.color_palette('Paired', len(pivoted_df.columns))
    markers = ['o', 'v', 's', 'p', 'P', '*', 'X', 'D', 'd', '1', '2', '3', '4', '8', 'h', 'H', 'x', 'X', 'D', 'd', '|', '_']

    # Create scatter plot
    fig, ax = plt.subplots(figsize=(8,6))
    # print(pivoted_df.index.tolist())
    # Scatter plot
    models = pivoted_df.columns
    for i, model in enumerate(models):
        x = pivoted_df.index.tolist()
        y = pivoted_df[model]
        # y shouldn't be all nans
        if y.isnull().all():
            continue
        lbl = f"{model[0]} ({model[1].replace('-ICL', '')})"
        ax.scatter(x, y, color = colors[i], marker = markers[i], label=lbl, s=150)

    # Set x-axis labels
    ax.set_xticks(list(range(len(pivoted_df.index.tolist()))))
    ax.set_xticklabels(pivoted_df.index.tolist(), rotation=90, fontsize=label_fontsize)

    # Set y-axis tick font
    ax.tick_params(axis='y', labelsize=label_fontsize)

    # Add legend at the top of the figure
    # show in a single row
    # Smaller font size
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.43), ncol=2, fontsize=legend_fontsize)

    ax.grid(True, axis='y')
    # x-axis label
    ax.set_xlabel('History Signal', fontsize=label_fontsize)
    # y-axis label
    if is_roi:
        ax.set_ylabel(f'ROI ({metric})', fontsize=label_fontsize)
    else:
        ax.set_ylabel(f'{metric}', fontsize=label_fontsize)


    # plt.tight_layout()
    # Save such that legends are fitted within the figure
    plt.savefig(save_path, bbox_inches='tight', dpi=300)
    plt.close()

In [279]:
def calculate_ROI(df_perf, df_budget):
    # print(df_perf)
    assert df_perf.shape == df_budget.shape
    # Find the rows with index "None_0" or "No DH, no BI", only one of these should exist
    if "None_0" in df_perf.index:
        X_perf = df_perf.loc['None_0']
        X_budget = df_budget.loc['None_0']
    # elif "No DH, no BI" in df_perf.index:
    #     assert "None_0" not in df_perf.index
    #     X_perf = df_perf.loc['No DH, no BI']
    #     X_budget = df_budget.loc['No DH, no BI']
    else:
        raise ValueError("None_0 not found in df_perf.index.")

    # assert df_perf.index[0] == "None" or df_perf.index[0] == "No DH, no BI"
    # Match index and column order
    assert df_perf.index.tolist() == df_budget.index.tolist()
    assert df_perf.columns.tolist() == df_budget.columns.tolist()

    # Calculate ROI: w.r.t to best baseline in "None"
    df_delta = df_perf - X_perf
    df_delta_budget = df_budget - X_budget
    df_roi = 10_000*df_delta / df_delta_budget
    return df_roi.iloc[1:]

In [280]:
def proc_sub_df_persona(sub_df, baseline_df, signal, prompt_type_groups, metric, all_subframes, dataset_prefix):
    for prompt_types_name, prompt_types in prompt_type_groups.items():
        # prompt_sub_df = sub_df
        prompt_sub_df = sub_df[sub_df['Prompt Type'].isin(prompt_types)]
        # Remove old baselines
        # prompt_sub_df = sub_df[~(sub_df['History Signal'] == "None")]
        # Rename this baseline to "None_1"
        prompt_sub_df.loc[prompt_sub_df['History Signal'] == 'None', 'History Signal'] = 'None_1'
        
        # Add common baseline - prompt type would be non-ppl
        # prompt_baseline_df = baseline_df[baseline_df['Prompt Type'].isin(prompt_types)]
        prompt_baseline_df = baseline_df[baseline_df['Prompt Type'].isin(prompt_type_groups['normal'])]
        # overwrite prompt type of baseline
        for i, row in prompt_baseline_df.iterrows():
            if row['Prompt Type'] == 'FS-ICL':
                prompt_baseline_df.at[i, 'Prompt Type'] = prompt_types[0]
            elif row['Prompt Type'] == 'ZS-ICL':
                prompt_baseline_df.at[i, 'Prompt Type'] = prompt_types[1]
        # rename this baseline to "None_0"
        prompt_baseline_df['History Signal'] = "None_0"
        prompt_sub_df  = prompt_sub_df.append(prompt_baseline_df)
        
        print("History:", prompt_sub_df['History Signal'].unique())
        print("Prompt types:", prompt_sub_df['Prompt Type'].unique())

        # Remove blenderbot
        prompt_sub_df = prompt_sub_df[~prompt_sub_df['Model'].isin(["BlenderBot-3B"])]
        if len(prompt_sub_df) <= 0:
            continue

        pivoted_df = prompt_sub_df.pivot(index='History Signal', columns=['Model', 'Prompt Type'], values=metric)        
        pivoted_budget = prompt_sub_df.pivot(index='History Signal', columns=['Model', 'Prompt Type'], values='total_budget')
        pivoted_input_budget = prompt_sub_df.pivot(index='History Signal', columns=['Model', 'Prompt Type'], values='prompt')

        # Fixed model order
        history_order = ["None_0", "None_1", "BART","Full", "Pegasus-CD", "PegasusFT", 
                         "Recent-1", "Recent-2", "Recent-4", "Recent-8", "Recent-10", "Recent-16", 
                         "Semantic-1", "Semantic-2", "Semantic-4", "Semantic-8", "Semantic-10", "Semantic-16",
                         ]
        # remove history signals that are not in the data
        history_order = [h for h in history_order if h in prompt_sub_df['History Signal'].unique()]
        pivoted_df = pivoted_df.reindex(history_order)
        pivoted_budget = pivoted_budget.reindex(history_order)
        pivoted_input_budget = pivoted_input_budget.reindex(history_order)

        # ROI: based on first row (None)
        ROI = calculate_ROI(pivoted_df, pivoted_budget)

        models = ['flanT5-XL',
                'T0',
                'Tk-Instruct',
                'GPT-3']
        # remove models that are not in the data
        models = [m for m in models if m in prompt_sub_df['Model'].unique()] 
        # icls = ["ZS-ICL", "FS-ICL", "ZS-ICL (ppl)", "FS-ICL (ppl)"]
        icls = prompt_types
        model_icl_pairs = list(itertools.product(models, icls))
        pivoted_df = pivoted_df.reindex(model_icl_pairs, axis=1)
        pivoted_budget = pivoted_budget.reindex(model_icl_pairs, axis=1)
        pivoted_input_budget = pivoted_input_budget.reindex(model_icl_pairs, axis=1)
        ROI = ROI.reindex(model_icl_pairs, axis=1)

        # Clean the dataframes of empty columns/rows
        pivoted_df = drop_nan_row_cols(pivoted_df)
        pivoted_budget = drop_nan_row_cols(pivoted_budget)
        pivoted_input_budget = drop_nan_row_cols(pivoted_input_budget)
        ROI = drop_nan_row_cols(ROI)

        all_subframes[signal + "_" + prompt_types_name] = pivoted_df
        all_subframes[signal + "_" + prompt_types_name + "_budget"] = pivoted_budget
        all_subframes[signal + "_" + prompt_types_name + "_ibudget"] = pivoted_input_budget
        if "None_0" in pivoted_df.index:
            all_subframes[signal + "_" + prompt_types_name + "_ROI"] = ROI
        else:
            print("Cannot calculate ROI without NoHistory baseline (i.e. None).")

        # Plot absolute metrics
        f1 = f'plots/{dataset_prefix}/{metric}_{signal}_{prompt_types_name}.pdf'
        scatter_plot_pivoted_df(pivoted_df, metric, f1)

        # Plot ROI
        if "None" in pivoted_df.index:
            f2 = f'plots/{dataset_prefix}/{metric}_{signal}_{prompt_types_name}_ROI.pdf'
            scatter_plot_pivoted_df(ROI, metric, f2, is_roi=True)

In [281]:
def merge_runs(framedict, metric, dataset_prefix, target="ppl"):
    assert target in ["normal", "ppl"], "merge target must be either normal or ppl"

    # DF Merged, Only for None_ppl, PegasusFT_*
    def is_budget(x):
        return x.endswith("_budget") or x.endswith("_ibudget")
    
    def is_roi(x):
        return x.endswith("_ROI")
    
    def is_runlist(x):
        # Set of runs that we want to keep
        # No persona and PegasusFT-based persona summary
        return x.startswith("None") or x.startswith("PegasusFT") or x.startswith("Pegasus-CD")

    modded_run_keys = list(
        filter(
            lambda x: is_runlist(x) and not is_roi(x) and not is_budget(x),
            framedict.keys()
        )
    )
    df_modded_runs_group = {k: v for k, v in framedict.items() if k in modded_run_keys}

    budget_keys = list(
        filter(lambda x: is_budget(x), framedict.keys())
    )
    df_budgets = {k.replace("_budget", ""): v for k, v in framedict.items() if k in budget_keys}
    df_budgets = {k: v for k, v in df_budgets.items() if k in df_modded_runs_group}

    # Baseline is None_normal, index "None"
    df_baseline = df_modded_runs_group["None_normal"].iloc[:1]
    df_baseline_budget = df_budgets["None_normal"].iloc[:1]
    # check that history is None
    assert df_baseline.index[0] == "None_0" and df_baseline_budget.index[0] == "None_0", "Baseline must be None_0"

    to_remove = []
    if target == "ppl":
        for k, v in df_modded_runs_group.items():
            if k.endswith("_normal"):
                to_remove.append(k)
    elif target == "normal":
        for k, v in df_modded_runs_group.items():
            if k.endswith("_ppl"):
                to_remove.append(k)
        # del df_modded_runs_group["None_ppl"]
        # del df_budgets["None_ppl"]
    print("TO REMOVE:", to_remove)
    for k in to_remove:
        del df_modded_runs_group[k]
        del df_budgets[k]
    print("REMAINING:", df_modded_runs_group.keys())


    # Remove (ppl) from column names and "None" history rows
    df_modded_runs_group = {k: v.rename(columns=lambda x: x.replace(" (ppl)", "")) for k, v in df_modded_runs_group.items()}
    # df_modded_runs_group = {k: v.drop("None") for k, v in df_modded_runs_group.items()}

    # Do the same for budgets
    df_budgets = {k: v.rename(columns=lambda x: x.replace(" (ppl)", "")) for k, v in df_budgets.items()}
    # df_budgets = {k: v.drop("None") for k, v in df_budgets.items()}

    # Create merged df
    # Keys as a new column, and reset index, history as another new column
    df_merged = pd.concat(df_modded_runs_group, keys=df_modded_runs_group.keys()).reset_index(level=1)
    df_merged_budget = pd.concat(df_budgets, keys=df_budgets.keys()).reset_index(level=1)

    # Reset index again, rename the old one to "Config"
    df_merged = df_merged.reset_index().rename(columns={"index": "Config"})
    df_merged_budget = df_merged_budget.reset_index().rename(columns={"index": "Config"})

    # Add the baseline row at the top
    # TODO: It don't think this is necessary anymore, so I commented it out (May 21, 2023)
    # df_merged = df_merged.append(df_baseline.reset_index().rename(columns={"index": "Config"}))
    # df_merged_budget = df_merged_budget.append(df_baseline_budget.reset_index().rename(columns={"index": "Config"}))

    # Move last row to the top
    df_merged = df_merged.iloc[-1:].append(df_merged.iloc[:-1]).reset_index(drop=True)
    df_merged_budget = df_merged_budget.iloc[-1:].append(df_merged_budget.iloc[:-1]).reset_index(drop=True)

    # Final names for DH
    replacements_2 = {
        # "None": "No DH, no BI",
        "None_1": "No DH, no BI",
        "BART": "BART-D",
        "PegasusFT": "Pegasus-DS",
        "Pegasus-CD": "Pegasus-CD",
    }
    df_merged["History Signal"] = df_merged["History Signal"].replace(replacements_2)
    df_merged_budget["History Signal"] = df_merged_budget["History Signal"].replace(replacements_2)
 
    if target=="ppl":
        replace_dict = {
            "None_ppl": "",
            "PegasusFT_ppl": " + BI(P_FT)",
            "Pegasus-CD_ppl": " + BI"
        }
    elif target=="normal":
        replace_dict = {
            "None_normal": "",
            "PegasusFT_normal": " + BI(P_FT)",
            "Pegasus-CD_normal": " + BI"
        }


    df_merged["Config"] = df_merged["Config"].fillna("").replace(replace_dict)
    df_merged_budget["Config"] = df_merged_budget["Config"].fillna("").replace(replace_dict)

    # Add config to history signal
    df_merged["History Signal"] = df_merged["History Signal"] + df_merged["Config"]
    df_merged_budget["History Signal"] = df_merged_budget["History Signal"] + df_merged_budget["Config"]

    # One "None_0 + BI" might have been added, remove it -- None_0 is results from the orig none_none setup
    df_merged = df_merged[df_merged["History Signal"] != "None_0 + BI"]
    df_merged_budget = df_merged_budget[df_merged_budget["History Signal"] != "None_0 + BI"]

    # Drop all None_0 **
    # df_merged = df_merged[df_merged["History Signal"] != "None_0"]
    # df_merged_budget = df_merged_budget[df_merged_budget["History Signal"] != "None_0"]

    # No DH, No BI + BI -> Only BI
    df_merged["History Signal"] = df_merged["History Signal"].replace({"No DH, no BI + BI": "Only BI"})
    df_merged_budget["History Signal"] = df_merged_budget["History Signal"].replace({"No DH, no BI + BI": "Only BI"})
    
   # df_merged["History Signal"] = df_merged["History Signal"].replace({"None": "No DH, No BI"})
    # df_merged_budget["History Signal"] = df_merged_budget["History Signal"].replace({"None": "No DH, No BI"})

    # make history signal the index column
    df_merged = df_merged.set_index("History Signal")
    df_merged_budget = df_merged_budget.set_index("History Signal")

    # drop config
    df_merged = df_merged.drop("Config", axis=1)
    df_merged_budget = df_merged_budget.drop("Config", axis=1)

    assert (df_merged.index == df_merged_budget.index).all()

    # Convert the column names to tuples by splitting on space
    # df_merged.columns = [tuple(col.split()) for col in df_merged.columns]
    df_merged = df_merged.reindex(framedict['None_normal'].columns, axis=1)
    df_merged_budget = df_merged_budget.reindex(framedict['None_normal'].columns, axis=1)

    # print(df_merged)

    # Fixed model order
    history_order = ["None_0", "No DH, no BI", 
                    #  "Only BI", 
                        "BART-D", "Full", "Pegasus-CD", "Pegasus-DS", "Pegasus-DS + BI", 
                        "Recent-1", "Recent-2", "Recent-4", "Recent-8", "Recent-10", "Recent-16", 
                        "Semantic-1", "Semantic-2", "Semantic-4", "Semantic-8", "Semantic-10", "Semantic-16",
                        ]
    # remove history signals that are not in the data
    history_order_chk = [h for h in history_order if h in df_merged.index]
    df_merged = df_merged.reindex(history_order_chk)
    history_order_chk = [h for h in history_order if h in df_merged_budget.index]
    df_merged_budget = df_merged_budget.reindex(history_order_chk)


    # Plot the merged df using the scatter_plot_pivoted_df function
    f3 = f'plots/{dataset_prefix}/{metric}_merge_{target}.pdf'
    scatter_plot_pivoted_df(df_merged[df_merged.index != "None_0"] , metric, f3)

    # Get the ROI
    # df_delta = df_merged - df_merged.iloc[0]
    # df_delta_budget = df_merged_budget - df_merged_budget.iloc[0]
    # df_merged_ROI = 10_000 * df_delta / df_delta_budget
    df_merged_ROI = calculate_ROI(df_merged, df_merged_budget)

    # Plot the ROI
    f4 = f'plots/{dataset_prefix}/{metric}_merge_{target}_ROI.pdf'
    scatter_plot_pivoted_df(df_merged[df_merged.index != "None_0"], metric, f4, is_roi=True)

    return df_merged, df_merged_budget, df_merged_ROI


In [282]:
prompt_type_groups = {
    'normal': ['FS-ICL', 'ZS-ICL'],
    'ppl': ['FS-ICL (ppl)', 'ZS-ICL (ppl)'],
}
metrics = ['BLEU', 'METEOR', 'rougeL', 'Bert-f1', 'BLEURT', 'DEB']

# Start Processing
filtered_df = df[df['History Signal'].notnull()]

# Filter by metric
for metric in metrics:
    all_subframes = {}
    if 'Persona Signal' in df:
        # dataset_prefix = "MSC"
        # Filter by persona
        groups = filtered_df.groupby(['Persona Signal'])

        # Find common baselines
        # Persona = None, History = None, prompt_type=same
        baseline_df = filtered_df[(filtered_df['Persona Signal'] == "None") & (filtered_df['History Signal'] == "None")]
        for signal, sub_df in groups:
            #if signal != "None":
            #    _internal_baseline = sub_df[sub_df['History Signal'] == "None"]
            #    if len(_internal_baseline) == 0:
            #        sub_df_with_baseline = sub_df.append(baseline_df)
            proc_sub_df_persona(sub_df, baseline_df, signal, prompt_type_groups, metric, all_subframes, dataset_prefix)
            # if signal == "None":
            #     raise "Hell"
    else:
        # dataset_prefix = "TC"
        # Find common baselines
        # Persona = None, History = None, prompt_type=same
        baseline_df = filtered_df[(filtered_df['History Signal'] == "None")]

        proc_sub_df_persona(filtered_df, baseline_df, "NA", prompt_type_groups, metric, all_subframes, dataset_prefix)

    # raise Exception("Hell")
    # if dataset_prefix == "MSC":
    if 'Persona Signal' in df:
        df_merged_p, _, df_merged_p_ROI = merge_runs(all_subframes, metric, dataset_prefix, target="ppl")
        df_merged_n, _, df_merged_n_ROI = merge_runs(all_subframes, metric, dataset_prefix, target="normal")
    # Save all subframes to xlsx, one sheet per persona signal
    # (signal, prompt_types) for all prompt type should be in same sheet for easy comparison

    with pd.ExcelWriter(f'processed/{dataset_prefix}/{metric}.xlsx') as writer:
        for key, value in all_subframes.items():
            # remove multiindex 
            value.columns = [' '.join(col).strip() for col in value.columns.values]
            value.to_excel(writer, sheet_name=key)

        # if dataset_prefix == "MSC":
        if "Persona Signal" in df:
            # Save merged df
            # fix column names
            df_merged_p.columns = [' '.join(col).strip() for col in df_merged_p.columns.values]
            df_merged_p.to_excel(writer, sheet_name="merged_ppl")

            # Save merged ROI
            # fix column names
            df_merged_p_ROI.columns = [' '.join(col).strip() for col in df_merged_p_ROI.columns.values]
            df_merged_p_ROI.to_excel(writer, sheet_name="merged_ppl_ROI")

            # Save merged df
            # fix column names
            df_merged_n.columns = [' '.join(col).strip() for col in df_merged_n.columns.values]
            df_merged_n.to_excel(writer, sheet_name="merged_normal")

            # Save merged ROI
            # fix column names
            df_merged_n_ROI.columns = [' '.join(col).strip() for col in df_merged_n_ROI.columns.values]
            df_merged_n_ROI.to_excel(writer, sheet_name="merged_normal_ROI")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['Pegasus-CD' 'Recent-4' 'Full' 'Semantic-4' 'BART' 'PegasusFT' 'None_1'
 'Recent-10' 'Recent-8' 'Recent-2' 'Recent-1' 'Semantic-10' 'Semantic-8'
 'Semantic-2' 'Semantic-1' 'None_0']
Prompt types: ['FS-ICL' 'ZS-ICL']
Metric: BLEU
Save path: plots/TC/BLEU_None_normal.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['None_1' 'Pegasus-CD' 'Full' 'Recent-10' 'Recent-8' 'Recent-4' 'Recent-2'
 'Recent-1' 'Threshold semantic sim (dialogcse)'
 'Threshold semantic sim (dmi)' 'Semantic-10' 'Semantic-8' 'Semantic-4'
 'Semantic-2' 'Semantic-1' 'BART' 'PegasusFT' 'Summ (ext+abs)'
 'Summ (ext+abs) + top 10' 'Summ (ext+abs) + top 8'
 'Summ (ext+abs) + top 2' 'Summ (ext+abs) + top 1' 'None_0']
Prompt types: ['FS-ICL (ppl)' 'ZS-ICL (ppl)']
Metric: BLEU
Save path: plots/TC/BLEU_None_ppl.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['None_1' 'PegasusFT' 'None_0']
Prompt types: ['FS-ICL' 'ZS-ICL']
Metric: BLEU
Save path: plots/TC/BLEU_Pegasus-CD_normal.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['None_1' 'PegasusFT' 'None_0']
Prompt types: ['FS-ICL (ppl)' 'ZS-ICL (ppl)']
Metric: BLEU
Save path: plots/TC/BLEU_Pegasus-CD_ppl.pdf
TO REMOVE: ['None_normal', 'Pegasus-CD_normal']
REMAINING: dict_keys(['None_ppl', 'Pegasus-CD_ppl'])
Metric: BLEU
Save path: plots/TC/BLEU_merge_ppl.pdf


  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Metric: BLEU
Save path: plots/TC/BLEU_merge_ppl_ROI.pdf
TO REMOVE: ['None_ppl', 'Pegasus-CD_ppl']
REMAINING: dict_keys(['None_normal', 'Pegasus-CD_normal'])
Metric: BLEU
Save path: plots/TC/BLEU_merge_normal.pdf


  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Metric: BLEU
Save path: plots/TC/BLEU_merge_normal_ROI.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['Pegasus-CD' 'Recent-4' 'Full' 'Semantic-4' 'BART' 'PegasusFT' 'None_1'
 'Recent-10' 'Recent-8' 'Recent-2' 'Recent-1' 'Semantic-10' 'Semantic-8'
 'Semantic-2' 'Semantic-1' 'None_0']
Prompt types: ['FS-ICL' 'ZS-ICL']
Metric: METEOR
Save path: plots/TC/METEOR_None_normal.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['None_1' 'Pegasus-CD' 'Full' 'Recent-10' 'Recent-8' 'Recent-4' 'Recent-2'
 'Recent-1' 'Threshold semantic sim (dialogcse)'
 'Threshold semantic sim (dmi)' 'Semantic-10' 'Semantic-8' 'Semantic-4'
 'Semantic-2' 'Semantic-1' 'BART' 'PegasusFT' 'Summ (ext+abs)'
 'Summ (ext+abs) + top 10' 'Summ (ext+abs) + top 8'
 'Summ (ext+abs) + top 2' 'Summ (ext+abs) + top 1' 'None_0']
Prompt types: ['FS-ICL (ppl)' 'ZS-ICL (ppl)']
Metric: METEOR
Save path: plots/TC/METEOR_None_ppl.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['None_1' 'PegasusFT' 'None_0']
Prompt types: ['FS-ICL' 'ZS-ICL']
Metric: METEOR
Save path: plots/TC/METEOR_Pegasus-CD_normal.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['None_1' 'PegasusFT' 'None_0']
Prompt types: ['FS-ICL (ppl)' 'ZS-ICL (ppl)']
Metric: METEOR
Save path: plots/TC/METEOR_Pegasus-CD_ppl.pdf
TO REMOVE: ['None_normal', 'Pegasus-CD_normal']
REMAINING: dict_keys(['None_ppl', 'Pegasus-CD_ppl'])
Metric: METEOR
Save path: plots/TC/METEOR_merge_ppl.pdf


  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Metric: METEOR
Save path: plots/TC/METEOR_merge_ppl_ROI.pdf
TO REMOVE: ['None_ppl', 'Pegasus-CD_ppl']
REMAINING: dict_keys(['None_normal', 'Pegasus-CD_normal'])
Metric: METEOR
Save path: plots/TC/METEOR_merge_normal.pdf


  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Metric: METEOR
Save path: plots/TC/METEOR_merge_normal_ROI.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['Pegasus-CD' 'Recent-4' 'Full' 'Semantic-4' 'BART' 'PegasusFT' 'None_1'
 'Recent-10' 'Recent-8' 'Recent-2' 'Recent-1' 'Semantic-10' 'Semantic-8'
 'Semantic-2' 'Semantic-1' 'None_0']
Prompt types: ['FS-ICL' 'ZS-ICL']
Metric: rougeL
Save path: plots/TC/rougeL_None_normal.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['None_1' 'Pegasus-CD' 'Full' 'Recent-10' 'Recent-8' 'Recent-4' 'Recent-2'
 'Recent-1' 'Threshold semantic sim (dialogcse)'
 'Threshold semantic sim (dmi)' 'Semantic-10' 'Semantic-8' 'Semantic-4'
 'Semantic-2' 'Semantic-1' 'BART' 'PegasusFT' 'Summ (ext+abs)'
 'Summ (ext+abs) + top 10' 'Summ (ext+abs) + top 8'
 'Summ (ext+abs) + top 2' 'Summ (ext+abs) + top 1' 'None_0']
Prompt types: ['FS-ICL (ppl)' 'ZS-ICL (ppl)']
Metric: rougeL
Save path: plots/TC/rougeL_None_ppl.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['None_1' 'PegasusFT' 'None_0']
Prompt types: ['FS-ICL' 'ZS-ICL']
Metric: rougeL
Save path: plots/TC/rougeL_Pegasus-CD_normal.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['None_1' 'PegasusFT' 'None_0']
Prompt types: ['FS-ICL (ppl)' 'ZS-ICL (ppl)']
Metric: rougeL
Save path: plots/TC/rougeL_Pegasus-CD_ppl.pdf
TO REMOVE: ['None_normal', 'Pegasus-CD_normal']
REMAINING: dict_keys(['None_ppl', 'Pegasus-CD_ppl'])
Metric: rougeL
Save path: plots/TC/rougeL_merge_ppl.pdf


  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Metric: rougeL
Save path: plots/TC/rougeL_merge_ppl_ROI.pdf
TO REMOVE: ['None_ppl', 'Pegasus-CD_ppl']
REMAINING: dict_keys(['None_normal', 'Pegasus-CD_normal'])
Metric: rougeL
Save path: plots/TC/rougeL_merge_normal.pdf


  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Metric: rougeL
Save path: plots/TC/rougeL_merge_normal_ROI.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['Pegasus-CD' 'Recent-4' 'Full' 'Semantic-4' 'BART' 'PegasusFT' 'None_1'
 'Recent-10' 'Recent-8' 'Recent-2' 'Recent-1' 'Semantic-10' 'Semantic-8'
 'Semantic-2' 'Semantic-1' 'None_0']
Prompt types: ['FS-ICL' 'ZS-ICL']
Metric: Bert-f1
Save path: plots/TC/Bert-f1_None_normal.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['None_1' 'Pegasus-CD' 'Full' 'Recent-10' 'Recent-8' 'Recent-4' 'Recent-2'
 'Recent-1' 'Threshold semantic sim (dialogcse)'
 'Threshold semantic sim (dmi)' 'Semantic-10' 'Semantic-8' 'Semantic-4'
 'Semantic-2' 'Semantic-1' 'BART' 'PegasusFT' 'Summ (ext+abs)'
 'Summ (ext+abs) + top 10' 'Summ (ext+abs) + top 8'
 'Summ (ext+abs) + top 2' 'Summ (ext+abs) + top 1' 'None_0']
Prompt types: ['FS-ICL (ppl)' 'ZS-ICL (ppl)']
Metric: Bert-f1
Save path: plots/TC/Bert-f1_None_ppl.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['None_1' 'PegasusFT' 'None_0']
Prompt types: ['FS-ICL' 'ZS-ICL']
Metric: Bert-f1
Save path: plots/TC/Bert-f1_Pegasus-CD_normal.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['None_1' 'PegasusFT' 'None_0']
Prompt types: ['FS-ICL (ppl)' 'ZS-ICL (ppl)']
Metric: Bert-f1
Save path: plots/TC/Bert-f1_Pegasus-CD_ppl.pdf
TO REMOVE: ['None_normal', 'Pegasus-CD_normal']
REMAINING: dict_keys(['None_ppl', 'Pegasus-CD_ppl'])
Metric: Bert-f1
Save path: plots/TC/Bert-f1_merge_ppl.pdf


  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Metric: Bert-f1
Save path: plots/TC/Bert-f1_merge_ppl_ROI.pdf
TO REMOVE: ['None_ppl', 'Pegasus-CD_ppl']
REMAINING: dict_keys(['None_normal', 'Pegasus-CD_normal'])
Metric: Bert-f1
Save path: plots/TC/Bert-f1_merge_normal.pdf


  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Metric: Bert-f1
Save path: plots/TC/Bert-f1_merge_normal_ROI.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['Pegasus-CD' 'Recent-4' 'Full' 'Semantic-4' 'BART' 'PegasusFT' 'None_1'
 'Recent-10' 'Recent-8' 'Recent-2' 'Recent-1' 'Semantic-10' 'Semantic-8'
 'Semantic-2' 'Semantic-1' 'None_0']
Prompt types: ['FS-ICL' 'ZS-ICL']
Metric: BLEURT
Save path: plots/TC/BLEURT_None_normal.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['None_1' 'Pegasus-CD' 'Full' 'Recent-10' 'Recent-8' 'Recent-4' 'Recent-2'
 'Recent-1' 'Threshold semantic sim (dialogcse)'
 'Threshold semantic sim (dmi)' 'Semantic-10' 'Semantic-8' 'Semantic-4'
 'Semantic-2' 'Semantic-1' 'BART' 'PegasusFT' 'Summ (ext+abs)'
 'Summ (ext+abs) + top 10' 'Summ (ext+abs) + top 8'
 'Summ (ext+abs) + top 2' 'Summ (ext+abs) + top 1' 'None_0']
Prompt types: ['FS-ICL (ppl)' 'ZS-ICL (ppl)']
Metric: BLEURT
Save path: plots/TC/BLEURT_None_ppl.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['None_1' 'PegasusFT' 'None_0']
Prompt types: ['FS-ICL' 'ZS-ICL']
Metric: BLEURT
Save path: plots/TC/BLEURT_Pegasus-CD_normal.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['None_1' 'PegasusFT' 'None_0']
Prompt types: ['FS-ICL (ppl)' 'ZS-ICL (ppl)']
Metric: BLEURT
Save path: plots/TC/BLEURT_Pegasus-CD_ppl.pdf
TO REMOVE: ['None_normal', 'Pegasus-CD_normal']
REMAINING: dict_keys(['None_ppl', 'Pegasus-CD_ppl'])
Metric: BLEURT
Save path: plots/TC/BLEURT_merge_ppl.pdf


  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Metric: BLEURT
Save path: plots/TC/BLEURT_merge_ppl_ROI.pdf
TO REMOVE: ['None_ppl', 'Pegasus-CD_ppl']
REMAINING: dict_keys(['None_normal', 'Pegasus-CD_normal'])
Metric: BLEURT
Save path: plots/TC/BLEURT_merge_normal.pdf


  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Metric: BLEURT
Save path: plots/TC/BLEURT_merge_normal_ROI.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['Pegasus-CD' 'Recent-4' 'Full' 'Semantic-4' 'BART' 'PegasusFT' 'None_1'
 'Recent-10' 'Recent-8' 'Recent-2' 'Recent-1' 'Semantic-10' 'Semantic-8'
 'Semantic-2' 'Semantic-1' 'None_0']
Prompt types: ['FS-ICL' 'ZS-ICL']
Metric: DEB
Save path: plots/TC/DEB_None_normal.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['None_1' 'Pegasus-CD' 'Full' 'Recent-10' 'Recent-8' 'Recent-4' 'Recent-2'
 'Recent-1' 'Threshold semantic sim (dialogcse)'
 'Threshold semantic sim (dmi)' 'Semantic-10' 'Semantic-8' 'Semantic-4'
 'Semantic-2' 'Semantic-1' 'BART' 'PegasusFT' 'Summ (ext+abs)'
 'Summ (ext+abs) + top 10' 'Summ (ext+abs) + top 8'
 'Summ (ext+abs) + top 2' 'Summ (ext+abs) + top 1' 'None_0']
Prompt types: ['FS-ICL (ppl)' 'ZS-ICL (ppl)']
Metric: DEB
Save path: plots/TC/DEB_None_ppl.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['None_1' 'PegasusFT' 'None_0']
Prompt types: ['FS-ICL' 'ZS-ICL']
Metric: DEB
Save path: plots/TC/DEB_Pegasus-CD_normal.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


History: ['None_1' 'PegasusFT' 'None_0']
Prompt types: ['FS-ICL (ppl)' 'ZS-ICL (ppl)']
Metric: DEB
Save path: plots/TC/DEB_Pegasus-CD_ppl.pdf
TO REMOVE: ['None_normal', 'Pegasus-CD_normal']
REMAINING: dict_keys(['None_ppl', 'Pegasus-CD_ppl'])
Metric: DEB
Save path: plots/TC/DEB_merge_ppl.pdf


  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Metric: DEB
Save path: plots/TC/DEB_merge_ppl_ROI.pdf
TO REMOVE: ['None_ppl', 'Pegasus-CD_ppl']
REMAINING: dict_keys(['None_normal', 'Pegasus-CD_normal'])
Metric: DEB
Save path: plots/TC/DEB_merge_normal.pdf


  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Metric: DEB
Save path: plots/TC/DEB_merge_normal_ROI.pdf


In [283]:
df_merged_p

Unnamed: 0_level_0,flanT5-XL FS-ICL,flanT5-XL ZS-ICL,T0 FS-ICL,T0 ZS-ICL,Tk-Instruct FS-ICL,Tk-Instruct ZS-ICL,GPT-3 FS-ICL,GPT-3 ZS-ICL
History Signal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
None_0,0.744187,0.741139,0.557138,0.848526,0.629201,0.449476,0.881463,0.850998
"No DH, no BI",0.666568,0.513742,0.526063,0.84188,0.610212,0.642541,,
BART-D,0.771849,0.833615,0.565619,0.832675,0.762686,0.772178,,
Full,0.968929,0.947495,0.983706,0.99062,0.944253,0.958392,,
Pegasus-CD,0.829682,0.849403,0.619968,0.884256,0.762301,0.834908,,
Pegasus-DS,0.776777,0.835125,0.547618,0.832818,0.75728,0.781853,,
Pegasus-DS + BI,0.16695,0.830437,0.198983,0.863361,0.17902,0.827145,,
Recent-1,0.689169,0.827532,0.889715,0.853019,0.849776,0.704318,,
Recent-2,0.520035,0.848618,0.891026,0.87069,0.879548,0.755137,,
Recent-4,0.863947,0.845844,0.907878,0.883011,0.901825,0.802774,,


In [284]:
all_subframes["Pegasus-CD_ppl"]

Unnamed: 0_level_0,flanT5-XL FS-ICL (ppl),flanT5-XL ZS-ICL (ppl),T0 FS-ICL (ppl),T0 ZS-ICL (ppl),Tk-Instruct FS-ICL (ppl),Tk-Instruct ZS-ICL (ppl),GPT-3 FS-ICL (ppl),GPT-3 ZS-ICL (ppl)
History Signal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
None_0,0.744187,0.741139,0.557138,0.848526,0.629201,0.449476,0.881463,0.850998
None_1,0.154204,0.83798,0.511741,0.920201,0.178614,0.821118,,
PegasusFT,0.16695,0.830437,0.198983,0.863361,0.17902,0.827145,,


In [285]:
import pandas as pd

# assuming all_subframes is a dictionary containing the subframes
none_normal = all_subframes['None_normal'].loc[['None_1']]
none_ppl = all_subframes['None_ppl'].loc[['None_1']]

# display the subframes side by side
pd.concat([none_normal, none_ppl], axis=0)

Unnamed: 0_level_0,flanT5-XL FS-ICL,flanT5-XL ZS-ICL,T0 FS-ICL,T0 ZS-ICL,Tk-Instruct FS-ICL,Tk-Instruct ZS-ICL,GPT-3 FS-ICL,GPT-3 ZS-ICL,flanT5-XL FS-ICL (ppl),flanT5-XL ZS-ICL (ppl),T0 FS-ICL (ppl),T0 ZS-ICL (ppl),Tk-Instruct FS-ICL (ppl),Tk-Instruct ZS-ICL (ppl),GPT-3 FS-ICL (ppl),GPT-3 ZS-ICL (ppl)
History Signal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
None_1,0.744187,0.741139,0.557138,0.848526,0.629201,0.449476,0.881463,0.850998,,,,,,,,
None_1,,,,,,,,,0.666568,0.513742,0.526063,0.84188,0.610212,0.642541,,


In [286]:
sub_df[["History Signal", "Model", "Prompt Type"]].value_counts()

History Signal  Model        Prompt Type 
None            GPT-3        FS-ICL          1
                             ZS-ICL          1
PegasusFT       flanT5-XL    ZS-ICL          1
                             FS-ICL (ppl)    1
                             FS-ICL          1
                Tk-Instruct  ZS-ICL (ppl)    1
                             ZS-ICL          1
                             FS-ICL (ppl)    1
                             FS-ICL          1
                T0           ZS-ICL (ppl)    1
                             ZS-ICL          1
                             FS-ICL (ppl)    1
                             FS-ICL          1
                GPT-3        ZS-ICL          1
                             FS-ICL          1
None            flanT5-XL    ZS-ICL (ppl)    1
                             FS-ICL (ppl)    1
                Tk-Instruct  ZS-ICL (ppl)    1
                             FS-ICL (ppl)    1
                T0           ZS-ICL (ppl)    1
                  

In [287]:
signal

'Pegasus-CD'

In [288]:
%%bash 
    #!/bin/bash
    # if figlet is installed
    if [ -x "$(command -v figlet)" ]; then
        figlet "Adios!"
    fi