In [3]:
from pathlib import Path
import re
import pandas as pd
import plotly.express as px
import plotly.io as pio
from IPython.display import display

pio.templates.default = "plotly_white"
RUN_ROOT = Path("../../run/20251203_203859")
assert RUN_ROOT.exists(), f"Missing run directory: {RUN_ROOT}"

In [4]:
# Choose which metric to visualize: 'train' or 'test'.
METRIC = "test"
VALID_METRICS = {"train", "test"}
METRIC_TITLES = {
    "train": "Best Training Reward",
    "test": "Best Test Reward",
}
assert METRIC in VALID_METRICS, f"METRIC must be one of {VALID_METRICS}"
Y_LABEL = METRIC_TITLES[METRIC]

In [5]:
TRAIN_PATTERN = re.compile(
    r"counter: (?P<gen>\d+).*?n_timesteps (?P<steps>[0-9.e+-]+),.*?best \(train\): (?P<best>-?[0-9.e+-]+)",
    flags=re.IGNORECASE,
)
TEST_PATTERN = re.compile(
    r"Test with best x \(max\): (?P<best>-?[0-9.e+-]+)",
    flags=re.IGNORECASE,
)

def parse_metadata(path: Path):
    env, sigma, lambda_val, seed = path.stem.split("_")
    return {
        "environment": env,
        "sigma": float(sigma),
        "lambda": float(lambda_val),
        "seed": int(seed),
    }

def parse_log(path: Path):
    meta = parse_metadata(path)
    records = []
    current_context = None
    with path.open(encoding="utf-8", errors="ignore") as log_file:
        for raw_line in log_file:
            line = raw_line.strip()
            train_match = TRAIN_PATTERN.search(line)
            if train_match:
                current_context = {
                    "generation": int(train_match.group("gen")),
                    "timesteps": float(train_match.group("steps")),
                }
                records.append({
                    **meta,
                    "run_name": f"sigma={meta['sigma']}, lambda={meta['lambda']}, seed={meta['seed']}",
                    "metric": "train",
                    "generation": current_context["generation"],
                    "timesteps": current_context["timesteps"],
                    "score": float(train_match.group("best")),
                    "log_path": str(path),
                })
                continue

            test_match = TEST_PATTERN.search(line)
            if test_match and current_context is not None:
                records.append({
                    **meta,
                    "run_name": f"sigma={meta['sigma']}, lambda={meta['lambda']}, seed={meta['seed']}",
                    "metric": "test",
                    "generation": current_context["generation"],
                    "timesteps": current_context["timesteps"],
                    "score": float(test_match.group("best")),
                    "log_path": str(path),
                })
    return records

log_files = []
all_envs = set()
for env_dir in sorted(RUN_ROOT.iterdir()):
    if not env_dir.is_dir():
        continue
    all_envs.add(env_dir.name)
    for log_file in sorted(env_dir.glob("*.log")):
        log_files.append(log_file)

if not log_files:
    raise FileNotFoundError(f"No .log files found under {RUN_ROOT}")

print(f"Scanning {len(log_files)} log file(s) under {RUN_ROOT}")
records = []
unmatched_logs = []
for log_file in log_files:
    parsed = parse_log(log_file)
    if parsed:
        records.extend(parsed)
    else:
        unmatched_logs.append(str(log_file))

if unmatched_logs:
    print(f"Skipped {len(unmatched_logs)} log(s) with no LM-MA-ES entries:")
    for path in unmatched_logs[:10]:
        print(f" - {path}")
    if len(unmatched_logs) > 10:
        print(f"   ... and {len(unmatched_logs) - 10} more")

df = pd.DataFrame(records)
if df.empty:
    sample_log = log_files[0]
    with sample_log.open(encoding="utf-8", errors="ignore") as fh:
        text = fh.read()
    train_matches = len(TRAIN_PATTERN.findall(text))
    test_matches = len(TEST_PATTERN.findall(text))
    print(
        "Parsed 0 rows — sample log",
        sample_log,
        f"had {train_matches} train matches and {test_matches} test matches",
    )
    raise ValueError("No LM-MA-ES log lines were parsed; see skipped log list above.")

summary = (
    df.groupby(["environment", "metric"])["run_name"].nunique().unstack("metric", fill_value=0)
)
display(summary.sort_index())

metric_df = df[df["metric"] == METRIC].copy()
if metric_df.empty:
    print(f"No records available for metric '{METRIC}'. Nothing will be plotted.")

covered_envs = set(df["environment"].unique())
missing_envs = sorted(all_envs - covered_envs)
if missing_envs:
    print("No valid data parsed for environments:")
    for env in missing_envs:
        print(f" - {env}")

missing_metric_envs = sorted(covered_envs - set(metric_df["environment"].unique()))
if missing_metric_envs:
    print(f"No '{METRIC}' records for:")
    for env in missing_metric_envs:
        print(f" - {env}")

metric_df.sort_values(["environment", "run_name", "timesteps"], inplace=True)
metric_df.head()

Scanning 3822 log file(s) under ../../run/20251203_203859
Skipped 790 log(s) with no LM-MA-ES entries:
 - ../../run/20251203_203859/Acrobot-v1/Acrobot-v1_0.01_12_0.log
 - ../../run/20251203_203859/Acrobot-v1/Acrobot-v1_0.01_12_108.log
 - ../../run/20251203_203859/Acrobot-v1/Acrobot-v1_0.01_12_12.log
 - ../../run/20251203_203859/Acrobot-v1/Acrobot-v1_0.01_12_24.log
 - ../../run/20251203_203859/Acrobot-v1/Acrobot-v1_0.01_12_36.log
 - ../../run/20251203_203859/Acrobot-v1/Acrobot-v1_0.01_12_48.log
 - ../../run/20251203_203859/Acrobot-v1/Acrobot-v1_0.01_12_60.log
 - ../../run/20251203_203859/Acrobot-v1/Acrobot-v1_0.01_12_72.log
 - ../../run/20251203_203859/Acrobot-v1/Acrobot-v1_0.01_12_84.log
 - ../../run/20251203_203859/Acrobot-v1/Acrobot-v1_0.01_12_96.log
   ... and 780 more


metric,test,train
environment,Unnamed: 1_level_1,Unnamed: 2_level_1
Ant-v4,210,210
Assault-v5,140,140
Atlantis-v5,210,210
BeamRider-v5,140,140
BipedalWalker-v3,210,210
Boxing-v5,140,140
CartPole-v1,210,210
CrazyClimber-v5,140,140
Enduro-v5,22,22
HalfCheetah-v4,210,210


No valid data parsed for environments:
 - Acrobot-v1
 - Breakout-v5


Unnamed: 0,environment,sigma,lambda,seed,run_name,metric,generation,timesteps,score,log_path
5,Ant-v4,0.01,108.0,0,"sigma=0.01, lambda=108.0, seed=0",test,5,537000.0,1014.188547,../../run/20251203_203859/Ant-v4/Ant-v4_0.01_1...
11,Ant-v4,0.01,108.0,0,"sigma=0.01, lambda=108.0, seed=0",test,10,1040000.0,1409.707457,../../run/20251203_203859/Ant-v4/Ant-v4_0.01_1...
17,Ant-v4,0.01,108.0,0,"sigma=0.01, lambda=108.0, seed=0",test,15,1540000.0,1477.608182,../../run/20251203_203859/Ant-v4/Ant-v4_0.01_1...
23,Ant-v4,0.01,108.0,0,"sigma=0.01, lambda=108.0, seed=0",test,20,2020000.0,1764.032041,../../run/20251203_203859/Ant-v4/Ant-v4_0.01_1...
29,Ant-v4,0.01,108.0,0,"sigma=0.01, lambda=108.0, seed=0",test,25,2470000.0,1783.219872,../../run/20251203_203859/Ant-v4/Ant-v4_0.01_1...


## Reward vs. Timesteps
Set `METRIC` in the configuration cell above to switch between training and test curves.

In [6]:
import numpy as np
import plotly.graph_objects as go

def hex_to_rgba(hex_color, opacity):
    hex_color = hex_color.lstrip('#')
    if len(hex_color) == 3:
        hex_color = ''.join([c*2 for c in hex_color])
    return f"rgba({int(hex_color[0:2], 16)}, {int(hex_color[2:4], 16)}, {int(hex_color[4:6], 16)}, {opacity})"

# Group by sigma and lambda to aggregate over seeds
# If you want to aggregate over sigmas as well, remove "sigma" from this list
GROUP_COLS = ["sigma", "lambda"]

for env, env_df in metric_df.groupby("environment"):
    title = f"{env} — {METRIC_TITLES[METRIC]} vs. Timesteps (Aggregated)"
    
    # Create a unique identifier for the group
    env_df = env_df.copy()
    env_df["group_id"] = env_df.apply(lambda row: ", ".join([f"{k}={row[k]}" for k in GROUP_COLS]), axis=1)
    
    # Define common x-axis
    min_step = env_df["timesteps"].min()
    max_step = env_df["timesteps"].max()
    common_timesteps = np.linspace(min_step, max_step, 200)
    
    agg_data = []
    
    for group_id, group_df in env_df.groupby("group_id"):
        interp_scores = []
        # Iterate over seeds
        for run_name, run_df in group_df.groupby("run_name"):
            run_df = run_df.sort_values("timesteps").drop_duplicates("timesteps")
            # Interpolate
            y = np.interp(common_timesteps, run_df["timesteps"], run_df["score"], left=np.nan, right=np.nan)
            interp_scores.append(y)
        
        interp_scores = np.array(interp_scores)
        # Compute stats
        mean = np.nanmean(interp_scores, axis=0)
        std = np.nanstd(interp_scores, axis=0)
        
        agg_data.append({
            "group_id": group_id,
            "timesteps": common_timesteps,
            "mean": mean,
            "std": std
        })
    
    # Plot
    fig = go.Figure()
    # Sort for consistent legend
    agg_data.sort(key=lambda x: x["group_id"])
    
    colors = px.colors.qualitative.Plotly
    
    for i, d in enumerate(agg_data):
        color_hex = colors[i % len(colors)]
        fill_color = hex_to_rgba(color_hex, 0.2)
        
        x = d["timesteps"]
        y = d["mean"]
        y_std = d["std"]
        
        # Filter NaNs
        mask = ~np.isnan(y)
        x_plot = x[mask]
        y_plot = y[mask]
        y_std_plot = y_std[mask]
        
        if len(x_plot) == 0:
            continue

        # Upper and Lower bounds
        y_upper = y_plot + y_std_plot
        y_lower = y_plot - y_std_plot
        
        # Shaded area
        fig.add_trace(go.Scatter(
            x=np.concatenate([x_plot, x_plot[::-1]]),
            y=np.concatenate([y_upper, y_lower[::-1]]),
            fill='toself',
            fillcolor=fill_color,
            line=dict(color='rgba(255,255,255,0)'),
            hoverinfo="skip",
            showlegend=False,
            name=d["group_id"] + " std"
        ))
        
        # Mean line
        fig.add_trace(go.Scatter(
            x=x_plot,
            y=y_plot,
            line=dict(color=color_hex),
            mode='lines',
            name=d["group_id"]
        ))

    fig.update_layout(
        title=title,
        xaxis_title="Environment Timesteps",
        yaxis_title=Y_LABEL,
        hovermode="x unified"
    )
    fig.show()

  mean = np.nanmean(interp_scores, axis=0)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  mean = np.nanmean(interp_scores, axis=0)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  mean = np.nanmean(interp_scores, axis=0)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  mean = np.nanmean(interp_scores, axis=0)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  mean = np.nanmean(interp_scores, axis=0)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  mean = np.nanmean(interp_scores, axis=0)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  mean = np.nanmean(interp_scores, axis=0)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  mean = np.nanmean(interp_scores, axis=0)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  mean = np.nanmean(interp_scores, axis=0)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  mean = np.nanmean(interp_scores, axis=0)
  var = nanv


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.




Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.




Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slic


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.




Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.




Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.




Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.




Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.




Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.




Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.




Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.




Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice.


Mean of empty slice


Degrees of freedom <= 0 for slice

In [None]:
# Investigate Atari environments stuck at constant reward
atari_envs = [env for env in metric_df['environment'].unique() if 'v5' in env]
print(f"Found {len(atari_envs)} Atari environments")

for env in atari_envs:
    env_data = metric_df[metric_df['environment'] == env]
    unique_scores = env_data['score'].nunique()
    most_common_score = env_data['score'].mode()[0] if len(env_data) > 0 else None
    score_counts = env_data['score'].value_counts()
    
    print(f"\n{env}:")
    print(f"  Unique scores: {unique_scores}")
    print(f"  Most common score: {most_common_score}")
    print(f"  Top 3 score frequencies:")
    for score, count in score_counts.head(3).items():
        pct = 100 * count / len(env_data)
        print(f"    {score}: {count} times ({pct:.1f}%)")