# Read aggregated results 
The collect_results.py script aggregates results from multiple directories. You can view and manipulate the aggregated results to get insights from the data

In [1]:
import glob
import os
import json
import re
import numpy as np
import matplotlib.pyplot as plt

from scipy import stats
import time

import pandas as pd
from pandas import DataFrame

Read the raw results file:

In [2]:
# This is the path to the raw results file generated with the script
results_json = "/private/home/tbmihaylov/fairseq-xlmg/gpt3_repro_cb_results.tsv.raw.jsonl"
results_json = "/private/home/tbmihaylov/fairseq-xlmg/cb_our_models.tsv.raw.jsonl"
results_json = "/checkpoint/tbmihaylov/few_shot/xnli_experimental/results.tsv.raw.jsonl"

In [3]:
results = []
with open(results_json) as f_res:
    for line in f_res:
        line = line.strip()
        item = json.loads(line)
        if "macro_F1::scores" in item:
            item["macro_F1::max"] = max(item["macro_F1::scores"])
            item["macro_F1::min"] = max(item["macro_F1::scores"])
        if "accuracy::scores" in item:
            item["accuracy::max"] = max(item["accuracy::scores"])
            item["accuracy::min"] = max(item["accuracy::scores"])
        results.append(item)

print(f"{len(results)} items loaded")
#print(item)

45 items loaded


In [4]:
df = DataFrame.from_records(results)

The results has the followinf columns:

In [5]:
list(df.columns)

['model_name',
 'task',
 'language',
 'template',
 'nb_few_shot_samples',
 'calibration_options',
 'calibrator_name',
 'train_set',
 'valid_set',
 'eval_set',
 'train_lang',
 'valid_lang',
 'ppl_common_prefix::scores',
 'ppl_common_prefix::mean',
 'ppl_common_prefix::std',
 'ppl_common_prefix::mean_confidence_interval',
 'ppl_selected_candidate::scores',
 'ppl_selected_candidate::mean',
 'ppl_selected_candidate::std',
 'ppl_selected_candidate::mean_confidence_interval',
 'ppl_full_selected_candidate::scores',
 'ppl_full_selected_candidate::mean',
 'ppl_full_selected_candidate::std',
 'ppl_full_selected_candidate::mean_confidence_interval',
 'ppl_candidates_full_prompt__entailment::scores',
 'ppl_candidates_full_prompt__entailment::mean',
 'ppl_candidates_full_prompt__entailment::std',
 'ppl_candidates_full_prompt__entailment::mean_confidence_interval',
 'ppl_candidates_full_prompt__contradiction::scores',
 'ppl_candidates_full_prompt__contradiction::mean',
 'ppl_candidates_full_prompt_

# Create a custom view using pandas
You can use the pandas api to manipulate the table and create custom views. 

In [6]:
def my_custom_view(df):
    pt = pd.pivot_table(df, values=["_metric_val", "_metric_val_std"], index=["task", "language", "template", "nb_few_shot_samples", "_metric", "calibration", "run_params::scoring", "run_params::train_sep"],
                        columns=['model_name',
                                ], aggfunc=np.mean)
    pt = pt.swaplevel(0, 1, axis=1).sort_index(axis=1)
    return pt

def cb_view(df, values=["_metric_val", "_metric_val_std"]):
    pt = pd.pivot_table(df, values=values, index=["task", "language", "template", "nb_few_shot_samples", "_metric", "calibration", "run_params::scoring", "run_params::train_sep"],
                        columns=['model_name',
                                ], aggfunc=np.mean)
    pt = pt.swaplevel(0, 1, axis=1).sort_index(axis=1)
    return pt

filtered_df = df[df["run_params::train_sep"] != None]

pt = cb_view(filtered_df, values=["macro_F1::mean", "macro_F1::std"])
pt.to_csv(results_json+"_macroF1.tsv", sep="\t")

pt = cb_view(filtered_df, values=["accuracy::mean", "accuracy::std"])
pt.to_csv(results_json+"_acc.tsv", sep="\t")

pt = cb_view(filtered_df, values=["micro_F1::mean", "micro_F1::std"])
pt.to_csv(results_json+"_microF1.tsv", sep="\t")

pt = cb_view(filtered_df, values=["macro_F1::max", "accuracy::max"])
out_file = results_json+"_max.tsv"
pt.to_csv(out_file, sep="\t")
print(f"exported to {out_file}")

pt = cb_view(filtered_df, values=["micro_F1::mean", "micro_F1::std", "macro_F1::max", "accuracy::mean", "accuracy::std", "accuracy::max"])
out_file = results_json+"_all.tsv"
pt.to_csv(out_file, sep="\t")
print(f"exported to {out_file}")


pt



KeyError: 'macro_F1::mean'

# Make your view available to others
If you think that you created a view that might be useful to others, you can add it to the collect_results.py.
Simply add the `my_custom_view` function to the `display_views` dictionary in [collect_results.py](examples/few_shot/scripts/collect_results.py) and the custom view will be available as -v my_custom_view_freindly.

In [None]:
#df[df["run_params::train_sep"] == "\n\n"]