## Equivariance Intuition

In [None]:
import numpy as np

a=np.array([[1,2,3],[2,2,2],[3,3,3],])
b=np.array([[2,2,2],[3,3,3],[1,2,3],])
c=np.array([[3,3,3],[1,2,3],[2,2,2],])
d=np.array([[1,2,3],[3,3,3],[2,2,2],])
lam = np.array([[1,2,1],[1,1,1],[1,2,1],])
gam = np.ones((3,3))
w = np.array([4,4.5,3.5])

def func(x):
    """This on is like their equivariant"""
    # return np.sum(x@( w*np.eye(3) + w*np.ones((3,3)) ) ,axis=0)
    # return np.sum(x@( w*np.ones((3,3)) ) ,axis=0)
    return np.sum(x@( w*np.ones((3,3)) ) ,axis=0)

print(func(a))
print(func(b))
print(func(c))
print(func(d))

## Data Analysis

In [1]:
import wandb
import itertools
import pandas as pd
import matplotlib.pyplot as plt
from concurrent.futures import ThreadPoolExecutor
from typing import Optional
from scipy import stats

ENTITY = "no-organization-for-signup"
TRAINING = "hypergrid_v3"
EVALUATE = "hypergrid_v3_eval"
SUPPEVAL = "hypergrid_v3_eval_sup"

## Hardware Utilization

In [4]:
# Perf Ram Utilization
api = wandb.Api()
runs = api.runs(f'{ENTITY}/{TRAINING}')
hom = []
het = []
# Get the Runs
for run in runs:
    try:
        ram_util_series = run.history()["perf/ram_util_percent"]
        if run.config["induced_hom"]:
            hom.append(ram_util_series)
        else:
            het.append(ram_util_series)
    except KeyError:
        print(f"Key `perf/ram_util_percent` not found in run {run.id}")

# Cast to dataframes?
ram_df = pd.DataFrame({
    "homogeneous": pd.concat(hom).reset_index(drop=True),
    "heterogeneous": pd.concat(het).reset_index(drop=True),
})
ram_df["homogeneous"] = ram_df["homogeneous"].fillna(ram_df["homogeneous"].min())
ram_df["heterogeneous"] = ram_df["heterogeneous"].fillna(ram_df["heterogeneous"].min())

# Basic Stats
ram_stats = pd.DataFrame()
ram_stats["mean"] = ram_df.mean()
ram_stats["variance"] = ram_df.var()
print(ram_stats)

# 
samples = [ram_df["homogeneous"], ram_df["heterogeneous"]]
print("t = {:.3f}, p = {:.3g}".format(*stats.ttest_ind(*samples)))
print("U = {}, p = {:.3g}".format(*stats.mannwhitneyu(*samples)))
print("KS = {:.3f}, p = {:.3g}".format(*stats.ks_2samp(*samples)))

[34m[1mwandb[0m: Currently logged in as: [33mbhosley[0m ([33mno-organization-for-signup[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Key `perf/ram_util_percent` not found in run 5f6a8_00000
                   mean  variance
homogeneous    5.736991  1.526528
heterogeneous  5.628408  1.839968
t = 7.448, p = 9.71e-14
U = 129999044.0, p = 1.95e-08
KS = 0.066, p = 3.06e-30


Homogenation saves 0.019 % in system memory Probably can't use that. Despite per model improvement.
75% improvement on disk - 

## Training

In [2]:
def load_train_runs(
    entity: str = ENTITY,
    project: str = TRAINING,
    max_runs: Optional[int] = None,
) -> pd.DataFrame:
    """ """
    api = wandb.Api()
    runs = api.runs(f'{entity}/{project}')
    if max_runs is not None:
        runs = itertools.islice(runs, max_runs)
    rows = []

    for run in runs:
        hist = run.history()
        summ = run.summary._json_dict
        cfg = run.config
        row = {**cfg, **summ, "history":hist}
        rows.append(row)

    df = pd.DataFrame(rows)
    return df

df_tr = load_train_runs()

In [230]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import math

KEY = "env_runners/episode_return_mean"
TOP_X = 30   # examples: 5 (top 5 runs)

# ---- 1) Build long-form df from df_tr ----
records = []
for row in df_tr.itertuples():
    hist = getattr(row, "history", None)
    if KEY not in hist:
        continue
    series = hist[KEY] #
    # Make a unique ID per original training run/row
    trace_id = getattr(row, "run_id", row.Index)
    for step, val in enumerate(series):
        records.append({
            "step": step,
            "value": val,
            "sensor_conf": getattr(row, "sensor_conf"),
            "induced_hom": bool(getattr(row, "induced_hom")),
            "trace_id": trace_id,
        })

df_long = pd.DataFrame.from_records(records)

# # Stable facet order
cats_order = list(pd.unique(df_long["sensor_conf"]))
df_long["sensor_conf"] = pd.Categorical(df_long["sensor_conf"], categories=cats_order, ordered=True)

# ---- 2) Score each run within (sensor_conf, induced_hom) ----
def tail_mean(group: pd.DataFrame) -> float:
    # (sensor_conf, induced_hom, trace_id) subset across steps
    vals = group["value"].to_numpy()
    if len(vals) == 0:
        return -np.inf
    w = max(5, math.ceil(0.10 * len(vals)))  # last 10% (>=5)
    w = min(w, len(vals))
    return float(np.mean(vals[-w:]))

scores = (
    df_long
    .groupby(["sensor_conf", "induced_hom", "trace_id"], observed=True, sort=False)
    .apply(tail_mean)
    .rename("perf")
    .reset_index()
)

# ---- 3) Keep top X per (sensor_conf, induced_hom) ----
perf_series = scores.set_index(["sensor_conf", "induced_hom", "trace_id"])["perf"]

def select_top_x(s: pd.Series) -> pd.Index:
    n = len(s)
    if n == 0:
        return s.index[:0]
    if isinstance(TOP_X, float) and 0 < TOP_X <= 1:
        k = max(1, math.ceil(TOP_X * n))
    elif isinstance(TOP_X, int) and TOP_X >= 1:
        k = min(TOP_X, n)
    else:
        raise ValueError("TOP_X must be an int >=1 or float in (0,1].")
    return s.nlargest(k).index  # tie-breaks by value then by index order

winners_idx = (
    perf_series
    .groupby(level=[0, 1], observed=True)
    .apply(select_top_x)
    .explode()
)

winner_keys = set(winners_idx)
mask = df_long.set_index(["sensor_conf", "induced_hom", "trace_id"]).index.isin(winner_keys)
df_long_top = df_long[mask].copy()

# ---- 4) Aggregate mean + band (std or CI) over top-X only ----
g = (
    df_long_top
    .groupby(["sensor_conf", "induced_hom", "step"], observed=True)
    .agg(mean=("value", "mean"),
         n=("value", "size"),
         std=("value", "std"))
    .reset_index()
)
g["std"] = g["std"].fillna(0.0)
g["se"] = g["std"] / np.sqrt(g["n"].clip(lower=1))

# Choose your band: 1*std (wider, intuitive) or 1.96*se (95% CI)
USE_STD_BAND = True
g["band"] = g["std"] if USE_STD_BAND else 1.96 * g["se"]
g["lower"] = g["mean"] - g["band"]
g["upper"] = g["mean"] + g["band"]

# ---- 5) Base figure (mean lines) ----
fig = px.line(
    g,
    x="step",
    y="mean",
    color="induced_hom",
    facet_row="sensor_conf",
)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))

# ---- 6) Shaded ribbons (mean ± band) ----
colorway = fig.layout.colorway or px.colors.qualitative.Plotly
color_levels = list(g["induced_hom"].drop_duplicates())

def hex_to_rgba(hex_color, alpha=0.20):
    hex_color = hex_color.lstrip("#")
    r, gg, b = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
    return f"rgba({r},{gg},{b},{alpha})"

color_map = {lvl: colorway[i % len(colorway)] for i, lvl in enumerate(color_levels)}
fill_map  = {lvl: hex_to_rgba(color_map[lvl], 0.20) for lvl in color_levels}

for (conf, ih), df_grp in g.groupby(["sensor_conf", "induced_hom"], observed=True):
    row_idx = cats_order.index(conf) + 1
    df_grp = df_grp.sort_values("step")

    fig.add_trace(
        go.Scatter(
            x=df_grp["step"], y=df_grp["upper"],
            mode="lines", line=dict(width=0),
            showlegend=False, hoverinfo="skip", legendgroup=f"{ih}",
        ),
        row=row_idx, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=df_grp["step"], y=df_grp["lower"],
            mode="lines", line=dict(width=0),
            fill="tonexty", fillcolor=fill_map[ih],
            showlegend=False, hoverinfo="skip", legendgroup=f"{ih}",
        ),
        row=row_idx, col=1
    )

title_suffix = (
    f"top {TOP_X*100:.0f}% runs" if isinstance(TOP_X, float)
    else f"top {TOP_X} runs"
)
fig.update_layout(
    yaxis_title=f"mean(episode_return) — {title_suffix}",
    legend_title_text="Homogeneous",
    margin=dict(t=40, r=10, b=10, l=10),
)
fig.update_xaxes(tickangle=45)

fig.show()





## Evaluation

- [ ] Pull performance metrics

In [54]:
def load_eval_means(
    entity: str = ENTITY,
    project: str = EVALUATE,
    # projects: list = (EVALUATE, SUPPEVAL),
    max_runs: Optional[int] = None,
) -> pd.DataFrame:
    """ """
    api = wandb.Api()
    runs = api.runs(f'{entity}/{project}')
    runs = (
        list(api.runs(f'{ENTITY}/{EVALUATE}')) +
        list(api.runs(f'{ENTITY}/{SUPPEVAL}'))
    )
    if max_runs is not None:
        runs = itertools.islice(runs, max_runs)
    rows = []
    for run in runs:
        data = {
            "eval_type": run.config["eval_type"] or None,
            "policy_type": run.config["policy_type"] or "hetero",
            "sensor_config": run.config["sensor_config"],
            "returns/mean": run.summary._json_dict["metrics/returns/mean"],
            "returns/max": run.summary._json_dict["metrics/returns/max"],
            "returns/min": run.summary._json_dict["metrics/returns/min"],
        }
        for v in run.history()["metrics/returns/mean"]:
            rows.append(data|{"returns/mean":v})

    df = pd.DataFrame(rows)
    return df

In [None]:
# Training AUC
# - Split by train sensors perf

# IMPORTS

#['eval_type', 'policy_type', 'metrics/returns/mean', 'metrics/returns/max', 'metrics/returns/min', 'metrics/returns/policy_0', 'metrics/returns/policy_1', 'metrics/returns/policy_2', 'metrics/returns/policy_3'],


# df = load_eval_means(max_runs=400)

In [None]:
df = load_eval_means()

# Suppose the column is 'policy_type' and values are reversed
# fix_map = {
#     "default_het": "induced_hom",   # swap
#     "induced_hom": "default_het"
# }
# df["policy_type"] = df["policy_type"].map(fix_map)

df.to_csv("eval_metrics.csv", index=False)

In [228]:
df.head()

Unnamed: 0,eval_type,policy_type,sensor_config,returns/mean,returns/max,returns/min,metric_shifted
0,baseline,induced_hom,disjoint_span,-3.75,0.0,-5.0,3.75
1,baseline,induced_hom,disjoint_span,-3.7375,0.0,-5.0,3.7375
2,baseline,induced_hom,disjoint_span,-3.75,0.0,-5.0,3.75
3,baseline,induced_hom,disjoint_span,-3.475,0.0,-5.0,3.475
4,baseline,induced_hom,disjoint_span,-3.75,0.0,-5.0,3.75


In [226]:
import pandas as pd
import numpy as np
import plotly.express as px

SHIFT  = 1          # constant to add to the metric
MULT = 1
TOP_K  = 12          # keep top-K runs per (eval_type, policy_type, sensor_config)
# METRIC = "returns/max"
METRIC = "returns/mean"
FACTORS = ["eval_type","policy_type","sensor_config"]
df = df.copy()

# Sort by shifted metric and keep top-K per bucket
df_top = (
    df.sort_values(METRIC, ascending=False)
      .groupby(FACTORS, group_keys=False)
      .head(TOP_K)
)
df_top["metric_shifted"] = (pd.to_numeric(df_top[METRIC], errors="coerce") + SHIFT )* MULT 

eval_order = ['baseline', 'agent_loss', 'sensor_degradation',
       'sensor_improvement', 'degrade_coverage', 'improve_coverage',
       'shuffled_set', 'novel_span']
df_top["eval_type"] = pd.Categorical(df_top["eval_type"], categories=eval_order, ordered=True)

sens_order = ["complete", "intersecting_span", "disjoint_span", "incomplete"]
df_top["sensor_config"] = pd.Categorical(df_top["sensor_config"], categories=sens_order, ordered=True)

label_map = {
    "baseline": "Baseline", 
    "agent_loss": "Loss of Agent", 
    "sensor_degradation": "Sensor Degradation",
    "sensor_improvement": "Sensor Improvement", 
    "degrade_coverage": "Increased Coverage", 
    "improve_coverage": "Decreased Coverage",
    "shuffled_set": "Shuffled Policy Set", 
    "novel_span": "Novel Spanning Sensors"
}
df_top["eval_type"] = df_top["eval_type"].map(label_map)

g = df_top.groupby(FACTORS)["metric_shifted"]
summary = g.agg(mean="mean", n="count", sd=lambda x: x.std(ddof=1)).reset_index()
summary["se"]   = summary["sd"] / np.sqrt(summary["n"].clip(lower=1))
summary["ci95"] = np.where(summary["n"] > 1, 1.96 * summary["se"], np.nan)

fig = px.bar(
    summary,
    x="sensor_config",
    y="mean",
    color="policy_type",
    barmode="group",
    facet_row="eval_type",
    error_y="ci95",
    hover_data=["n","se"],
    title="returns/mean by eval_type × policy_type × sensor_config"
)
fig.update_layout(xaxis_title="sensor_config", yaxis_title="Mean returns/mean")
fig.for_each_annotation(lambda a: a.update(textangle=0))
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(
    legend=dict(
        x=1.02,   # just outside the plotting area
        y=1.12,
        xanchor="left",
        yanchor="top"
    )
)
fig.update_layout(width=800, height=800,)
fig.show()



