# Breakout Evaluation Dashboard

Load the exported evaluation CSVs (see `runs/eval_reports/`) and explore serve timing and per-life success rates.
Update the paths below to point at the run you want to inspect if you produce additional reports.

In [None]:
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display

# Adjust these paths if you exported evaluation data elsewhere
PROJECT_ROOT = Path.cwd()
if not (PROJECT_ROOT / 'runs').exists():
    PROJECT_ROOT = PROJECT_ROOT.parent
EVAL_ROOT = PROJECT_ROOT / 'runs' / 'eval_reports'
PER_LIFE_DEFAULT = EVAL_ROOT / 'deterministic' / 'per_life.csv'
PER_GAME_DEFAULT = EVAL_ROOT / 'deterministic' / 'per_game.csv'

per_life_path = PER_LIFE_DEFAULT
per_game_path = PER_GAME_DEFAULT

if not per_life_path.exists() or not per_game_path.exists():
    raise FileNotFoundError(
        f'Could not find evaluation exports at {per_life_path} / {per_game_path}.\n'
        'Run `python -m src.simple_game.evaluate --output-dir runs/eval_reports/...` first, '
        'or update `per_life_path` / `per_game_path` above.'
    )

per_life = pd.read_csv(per_life_path)
per_game = pd.read_csv(per_game_path)

lives_with_reward_share = (per_life['positive_events'] > 0).mean()
print(f'Share of lives scoring ≥1: {lives_with_reward_share:.2%}')

if 'positive_life_ratio' in per_game.columns:
    mean_ratio = per_game['positive_life_ratio'].mean()
    print(f'Mean positive-life ratio across games: {mean_ratio:.2%}')

first_hits = per_life.loc[per_life['first_positive_step'] >= 0, 'first_positive_step']
if not first_hits.empty:
    ax = first_hits.hist(bins=20, figsize=(6, 4))
    ax.set_title('Frames until first positive reward (per life)')
    ax.set_xlabel('Environment steps')
    ax.set_ylabel('Count')
    plt.tight_layout()
else:
    print('No positive events recorded in the selected run.')

plt.show()


In [None]:
# Plot brick-hit streaks / positive events per life across games
life_seq = per_life.sort_values(['game_index', 'life_index']).copy()
life_seq['life_order'] = life_seq.groupby('game_index').cumcount()

fig, ax = plt.subplots(figsize=(8, 4))
for game_idx, group in life_seq.groupby('game_index'):
    ax.plot(group['life_order'], group['positive_events'], marker='o', label=f'Game {game_idx}')

ax.set_title('Positive reward events per life (by game)')
ax.set_xlabel('Life order within game')
ax.set_ylabel('Positive events (brick hits)')
ax.set_xticks(range(int(life_seq['life_order'].max()) + 1))
ax.legend(loc='upper right', bbox_to_anchor=(1.25, 1.0))
plt.tight_layout()
plt.show()

# Inspect lives with high FIRE usage but zero reward
high_fire_zero = life_seq[(life_seq['positive_events'] == 0) & (life_seq['fire_presses'] >= 5)]
if not high_fire_zero.empty:
    display(high_fire_zero[['game_index', 'life_index', 'fire_presses', 'length']]
            .sort_values('fire_presses', ascending=False)
            .head(10))
else:
    print('No high-FIRE zero-reward lives found in this run.')
