In [2]:
import pandas as pd
from mario_replays.load_data import load_replay_sidecars

In [5]:
replay_dir = '../outputdata/replays'
replay_files = load_replay_sidecars(replay_dir)

In [6]:
replay_files

Unnamed: 0,Subject,World,Level,Duration,Cleared,Final_score,Final_position,Lives_lost,Hits_taken,Enemies_killed,Powerups_collected,Bricks_destroyed,Coins,idx_in_run,run,global_idx,level_idx,phase,level_fullname
0,01,1,1,40.616667,True,2105,3116,0,0,5,0,0,2,3,run-01,3,3,discovery,w1l1
1,01,1,3,58.216667,True,1985,2371,2,2,2,0,0,7,5,run-03,26,5,discovery,w1l3
2,01,2,1,58.950000,False,220,2278,3,3,6,0,0,0,6,run-04,39,6,discovery,w2l1
3,01,1,3,58.850000,True,1920,2376,2,2,2,0,0,5,2,run-03,23,2,discovery,w1l3
4,01,1,1,80.516667,True,1900,3127,2,2,4,0,0,2,0,run-01,0,0,discovery,w1l1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3369,06,7,1,180.750000,True,2915,2824,2,2,10,1,2,19,1,run-03,596,26,practice,w7l1
3370,06,3,3,75.666667,True,1935,2360,0,0,11,1,0,21,3,run-02,592,27,practice,w3l3
3371,06,2,3,46.366667,True,1820,3536,0,0,0,0,0,10,0,run-03,595,28,practice,w2l3
3372,06,1,2,142.700000,True,2150,2817,-1,0,15,2,18,69,4,run-01,588,23,practice,w1l2


In [45]:
import pandas as pd

def summarize_phase_data(df):
    summary = df.groupby(['Subject', 'phase']).agg(
        Repetitions=('Cleared', 'count'),
        Duration=('Duration', 'sum'),
        Success_rate=('Cleared', 'mean')
    ).reset_index()
    summary['Duration'] = pd.to_timedelta(summary['Duration'], unit='s')
    wide = summary.pivot(index='Subject', columns='phase')
    wide.columns = [f"{metric} ({phase.capitalize()})" for metric, phase in wide.columns]
    return wide

def add_combined_metrics(df):
    df = df.copy()
    df['Repetitions (Total)'] = df[['Repetitions (Discovery)', 'Repetitions (Practice)']].sum(axis=1)
    def weighted_rate(row):
        d, p = row.get('Repetitions (Discovery)', 0), row.get('Repetitions (Practice)', 0)
        sr_d, sr_p = row.get('Success_rate (Discovery)', 0), row.get('Success_rate (Practice)', 0)
        total = d + p
        return (d * sr_d + p * sr_p) / total if total > 0 else 0
    df['Success_rate (Total)'] = df.apply(weighted_rate, axis=1)
    def sum_durations(row):
        d = pd.to_timedelta(row.get('Duration (Discovery)', '0:00:00'))
        p = pd.to_timedelta(row.get('Duration (Practice)', '0:00:00'))
        return d + p
    df['Duration (Total)'] = df.apply(sum_durations, axis=1)
    return df

def add_total_row(df):
    df = df.copy()
    success_cols = [col for col in df.columns if col.startswith("Success_rate")]
    numeric_cols = [col for col in df.select_dtypes(include='number').columns if col not in success_cols]
    total_numeric = df[numeric_cols].sum()
    total_success = df[success_cols].mean()
    duration_cols = [col for col in df.columns if "Duration" in col]
    total_durations = {col: pd.to_timedelta(df[col]).sum() for col in duration_cols}
    total = pd.concat([total_numeric, total_success])
    for col, val in total_durations.items():
        total[col] = val
    total.name = 'Total'
    return pd.concat([df, pd.DataFrame([total])])

def format_duration_column(df):
    def fmt(td):
        if pd.isnull(td): return ''
        s = int(pd.to_timedelta(td).total_seconds())
        return f"{s // 3600:02}:{(s % 3600) // 60:02}:{s % 60:02}"
    for col in df.columns:
        if "Duration" in col:
            df[col] = df[col].apply(fmt)
    return df

def rename_columns(df):
    return df.rename(columns={col: col.replace("Success_rate", "Success rate").replace("Duration_HMS", "Duration") for col in df.columns})

def output_markdown_table(df):
    print(df.to_markdown(tablefmt="github"))

summary = summarize_phase_data(replay_files)
summary = add_combined_metrics(summary)
summary = add_total_row(summary)
summary = format_duration_column(summary)
summary = rename_columns(summary)
summary.index.name = 'Subject'
final_summary = summary[[col for col in summary.columns if not col.startswith("Duration (") or "Duration" in col]]
output_markdown_table(final_summary)


| Subject   |   Repetitions (Discovery) |   Repetitions (Practice) | Duration (Discovery)   | Duration (Practice)   |   Success rate (Discovery) |   Success rate (Practice) |   Repetitions (Total) |   Success rate (Total) | Duration (Total)   |
|-----------|---------------------------|--------------------------|------------------------|-----------------------|----------------------------|---------------------------|-----------------------|------------------------|--------------------|
| 01        |                       230 |                      567 | 03:54:27               | 09:47:11              |                   0.578261 |                  0.781305 |                   797 |               0.72271  | 13:41:38           |
| 02        |                       227 |                      487 | 04:57:35               | 12:30:24              |                   0.400881 |                  0.671458 |                   714 |               0.585434 | 17:27:59           |
| 03        |       

Collecting tabulate
  Using cached tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Using cached tabulate-0.9.0-py3-none-any.whl (35 kB)
Installing collected packages: tabulate
Successfully installed tabulate-0.9.0
