In [None]:
import pandas as pd
from pathlib import Path
from os import listdir
import os.path
from collections import defaultdict
import re
import numpy as np
pd.set_option('display.float_format', lambda x: '%.5f' % x)


In [None]:
repo_dir = Path("/home/luk/repos/RAPid-Learn/data")
experiment_id = "ce84fd25cf54415bb3885a37d63e37a1-2021-10-05_10:20:18-policy_gradient-10000episodes-rewardshapingon"
experiment_path = repo_dir / experiment_id

In [None]:
novelties = ["prenovelty", "axetobreakhard", 
             "scrapeplank", "axefirecteasy", "firecraftingtablehard"]
PRENOVELTY_TRANSFER_TRIAL_IDS = {"5bcc1f9ee3394fd49c784d356b806992-2021-10-05_16:52:30-PPO-10000episodes-rewardshapingon": 1,
                                "ce84fd25cf54415bb3885a37d63e37a1-2021-10-05_10:20:18-policy_gradient-10000episodes-rewardshapingon": 2}

In [None]:
def extract_number(f):
    s = re.findall("\d+", f)
    return int(s[0]) if s else -1

In [None]:
csvs_per_novelty = defaultdict(list)
for novelty in novelties:
    for trial_dir in listdir(experiment_path / novelty):
        if os.path.isdir(experiment_path / novelty / trial_dir):
            for filename in os.listdir(experiment_path / novelty / trial_dir):
                if filename.endswith(".csv"):
                    csvs_per_novelty[novelty].append(experiment_path / novelty / trial_dir / filename)       

In [None]:
dfs = []
timesteps_dfs = []
for novelty in novelties:
    for csv_path in csvs_per_novelty[novelty]:
        trial_id = extract_number(str(os.path.basename(os.path.normpath(csv_path))))
        print(f"novelty: {novelty}, trialid: {trial_id}")
        df = pd.read_csv(csv_path, skiprows=[0])

        df["trial"] = trial_id
        df["novelty"] = novelty
        
        df["ts_trained"] = df["l"].loc[df['mode'] == "learn"].cumsum()
        df["ts_trained"].ffill(inplace=True)
        if novelty == "prenovelty":
            if trial_id != PRENOVELTY_TRANSFER_TRIAL_IDS[experiment_id]:
                print(f"Skipping trial {trial_id} for prenovelty data")
                continue
            df["episode_counter"] = df.index
            df["episode_counter"].loc[df['mode'] == "eval"] = np.nan
            df["episode_counter"].ffill(inplace=True)
            
            
        final_eval_rows = df.loc[(df['episode_counter'] == df['episode_counter'].max()) & (df['mode'] == "eval")]
        # Any trial that ran fewer than the max number of episodes converged
        final_eval_rows["converged"] = final_eval_rows["episode_counter"].max() <= 9998
        
        print(final_eval_rows)
        dfs.append(final_eval_rows)
        
results_df = pd.concat(dfs).reset_index(drop=True)

In [None]:
results_df

In [None]:
agg_results_df_success = results_df.groupby(by=["novelty"]).agg({'success': ['mean', 'std']}).reset_index()

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(agg_results_df_success)

In [None]:
agg_results_df_ts_trained = results_df.loc[results_df.converged].groupby(by=["novelty"]).agg({'ts_trained': ['mean', 'std']}).reset_index()

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(agg_results_df_ts_trained)

In [None]:
summarised_results = pd.merge(agg_results_df_success, agg_results_df_ts_trained, on="novelty")

In [None]:
summarised_results

In [None]:
results_df.to_csv(repo_dir / experiment_id / "full_results.csv")

In [None]:
summarised_results.to_csv(repo_dir / experiment_id / "results_summary.csv")