# pyident — Sparsity sweep (minimal notebook)
This notebook is a **thin wrapper** around the same core function the CLI uses:
`pyident.experiments.sparsity.sweep_sparsity`. It writes a CSV and shows a
couple of quick diagnostics/plots.

> Tip: If you open this in Colab, uncomment the first cell to install your repo.

In [None]:
# !pip install -U pip
# !pip install -U matplotlib pandas
# %pip install -e .  # if running within a cloned repo

In [3]:
# Setup
import os, json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# JAX env (safe defaults; overridden by your local env/CLI)
os.environ.setdefault("JAX_PLATFORM_NAME", "cpu")
os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "2")

from ..experiments.sparsity import sweep_sparsity

ImportError: attempted relative import with no known parent package

## Parameters (edit me)

In [4]:
# --- core experiment parameters ---
n = 12
m = 2
p_values = [1.0, 0.8, 0.6, 0.4, 0.3, 0.2, 0.1, 0.05]
T = 120
dt = 0.05
sparse_which = "both"       # "A", "B", or "both"
signal = "prbs"             # "prbs" or "multisine"
sigPE = 12
seeds = [0, 1, 2]
algs = ("dmdc", "dmdc_tls")  # tuple or list of estimator names

# options
x0_mode = None            # or "zero", "gaussian", ...
U_restr_dim = None        # e.g., 1 to restrict to 1D admissible input subspace
use_jax = False
jax_x64 = True

# output CSV path
out_csv = "runs/sparsity_notebook.csv"  # directories will be created

## Run

In [5]:
# Ensure output directory exists
import os
os.makedirs(os.path.dirname(out_csv) or ".", exist_ok=True)

# Run the exact same core routine as the CLI
sweep_sparsity(
    n=n,
    m=m,
    p_values=p_values,
    T=T,
    dt=dt,
    sparse_which=sparse_which,
    signal=signal,
    sigPE=sigPE,
    seeds=seeds,
    algs=algs,
    out_csv=out_csv,
    use_jax=use_jax,
    jax_x64=jax_x64,
    x0_mode=x0_mode,
    U_restr_dim=U_restr_dim,
)

print(f"Wrote {out_csv}")

NameError: name 'sweep_sparsity' is not defined

## Quick diagnostics

In [None]:
import pandas as pd, numpy as np
df = pd.read_csv(out_csv)
print("Unique env rows:\n", df[["env.accelerator","env.jax_x64"]].drop_duplicates())

g = df.groupby("p_density")["K_rank"].agg(["count","mean","std"]).reset_index()
print("\nK_rank by density:\n", g)

# slope of E[K_rank] vs density
slope = (g["mean"].iloc[-1] - g["mean"].iloc[0]) / (g["p_density"].iloc[-1] - g["p_density"].iloc[0])
print(f"\ndE[K_rank]/d(density) = {slope:.3f} (expect > 0)")

## Plots

In [None]:
# Plot 1: E[K_rank] vs density
df = pd.read_csv(out_csv)
g = df.groupby("p_density")["K_rank"].agg(["mean","std"]).reset_index()

fig, ax = plt.subplots(figsize=(5, 3.2))
ax.errorbar(g["p_density"], g["mean"], yerr=g["std"], lw=2, capsize=3)
ax.set_xlabel("p_density (A/B nonzero fraction)")
ax.set_ylabel("E[K_rank]")
ax.set_title("Sparsity: expected visible rank vs density")
ax.grid(True, alpha=0.3)
plt.show()

In [None]:
# Plot 2: Visibility success vs density
n = int(df["n"].iloc[0])
vis = (df["K_rank"] >= n).groupby(df["p_density"]).mean().reset_index()

fig, ax = plt.subplots(figsize=(5, 3.2))
ax.plot(vis["p_density"], vis["K_rank"], marker="o", lw=2)
ax.set_xlabel("p_density")
ax.set_ylabel("P[K_rank >= n]")
ax.set_title("Visibility success vs density")
ax.grid(True, alpha=0.3)
plt.show()