# Analysis

Collect results of a given experiment and summarize in standardized dataframe.

In [None]:
import pandas as pd
from aaai20.io import filename_results

import matplotlib.style as style
style.use('fivethirtyeight')
import matplotlib.pyplot as plt

from aaai20.io import (
    build_filesystem,
    default_prefix_exp_fn_suffix,
    filename_config,
    filename_cli_commands,
    filename_script,
    filename_logs,
    exp_directory
)

from aaai20.cmd_gen import (
    all_fnames_in_dir,
    generate_config_and_log_fname,
    generate_df_commands,
    extract_idx_from_fnames,
    default_start_idx,
    prepare_fs,
)

import os

In [None]:
def convert_json_to_df(fn):
    exp_idx = pd.read_json(fn, typ='series').exp_idx
    predict_config = pd.read_json(fn, typ='series').predict_config

    for k in predict_config:
        predict_config[k] = [predict_config[k]]

    df = pd.DataFrame()
    df = df.from_dict(predict_config)
    df['exp_idx'] = exp_idx
    
    return df
    

In [None]:
def collect_results_in_directory(exp_keyword=None, script=None, add_config=True):
    # Obtain correct exp directory
    d = exp_directory(exp_dname=exp_keyword, script=script, fs=None)

    dn_res = d.get("results")
    dn_tmg = d.get("timings")
    dn_cfg = d.get("config")

    # Collect results
    fn_res = [os.path.relpath(os.path.join(dn_res, f)) for f in os.listdir(dn_res)]
    fn_res.sort()

    df_res = pd.concat([pd.read_csv(fn) for fn in fn_res])

    # Collect timings
    fn_tmg = [os.path.relpath(os.path.join(dn_tmg, f)) for f in os.listdir(dn_tmg)]
    fn_tmg.sort()

    df_tmg = pd.concat([pd.read_csv(fn) for fn in fn_tmg])
    
    # Collect configs
    fn_cfg = [os.path.relpath(os.path.join(dn_cfg, f)) for f in os.listdir(dn_cfg)]
    fn_cfg.sort()

    df_cfg = pd.concat([convert_json_to_df(fn) for fn in fn_cfg])

    # Join
    df = df_res.copy()
    df[df_tmg.columns] = df_tmg[df_tmg.columns]

    if add_config:
        df = pd.merge(left=df, right=df_cfg, left_on="exp_idx", right_on="exp_idx")

    # Save in proper location
    fn = os.path.join(d["current_exp"], "out.csv")
    df.to_csv(fn, index=False)

    return df

In [None]:
def plot(df):
    df_plot = df[['dataset', 'f1_macro', 'difficulty', 'identifier']].groupby(by=['identifier', 'difficulty']).mean()

    # Visuals
    identifiers = df_plot.index.levels[0].unique().values.tolist()

    x = df_plot.index.levels[1].values.tolist()

    y = dict()
    labels = []

    fig=plt.figure(figsize=(12, 8))
    for trace in identifiers:
        y[trace] = df_plot.loc[trace].values
        plt.plot(x, y[trace])
        labels.append(trace)


    plt.legend(labels, loc='lower left')
    plt.show()
    return

In [None]:
script = 'run_pxs'
exp_keyword = 'predict-pxs'

In [None]:
df_pxs = collect_results_in_directory(exp_keyword='predict-pxs', script='run_pxs')

In [None]:
df_mercs = collect_results_in_directory(exp_keyword='md12-random-final', script='run_mercs', add_config=True)

In [None]:
df_pxs['identifier'] = df_pxs['inference_algorithm']
df_mercs['identifier'] += df_mercs['prediction_algorithm']

In [None]:
dfs = [df_pxs, df_mercs]
df = pd.concat(dfs)

In [None]:
plot(df)

# Collect configs

In [None]:
exp_keyword='mercs-basic-SL'
script='run_mercs'

d = exp_directory(exp_dname=exp_keyword, script=script, fs=None)

dn_res = d.get('results')
dn_tmg = d.get('timings')
dn_cfg = d.get('config')

In [None]:
dn_cfg

In [None]:
# Collect configs
fn_cfg = [os.path.relpath(os.path.join(dn_cfg, f)) for f in os.listdir(dn_cfg)]
fn_cfg.sort()

df_cfg = pd.concat([convert_json_to_df(fn) for fn in fn_cfg])

In [None]:
df_cfg.head()

In [None]:
tuple([1,2,3])

In [None]:
df_merge