# MCS MEA — Analyze Ready (Notebook)
Interactive exploration of already-processed IFR NPZ data following the standardized workflow:- Build/inspect the readiness index (chem + NPZ by default)- Filter by round/group/plate and pick recordings- Load an NPZ and plot channels inline with the chemical timestamp- Use per-recording IFR NPZ summary (or compute it if missing)- Optionally run NPZ stats and/or inspect the global catalog
This notebook does NOT re-open raw H5; it works from NPZ + annotations only.

In [None]:
# Pairings (CTZ vs VEH) utilities
import sys
from pathlib import Path
def _ensure_repo_on_path():
    here = Path.cwd()
    for cand in [here, *here.parents]:
        if (cand / 'mcs_mea_analysis').exists():
            if str(cand) not in sys.path:
                sys.path.insert(0, str(cand))
            return cand
    return None
_ensure_repo_on_path()
from mcs_mea_analysis.pairings import PairingIndex

In [None]:
from pathlib import Path
import sys, json, csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Ensure repo root is on sys.path so `mcs_mea_analysis` is importable from notebooks/
if not (Path.cwd() / 'mcs_mea_analysis').exists() and (Path.cwd().parent / 'mcs_mea_analysis').exists():
    sys.path.insert(0, str(Path.cwd().parent))

from mcs_mea_analysis.ready import ReadinessConfig, build_ready_index
from mcs_mea_analysis.ifr_processing import IFRProcessorConfig, process_ifr_npz
from mcs_mea_analysis.analysis_config import NPZAnalysisConfig

# Configure outputs root: prefer external Manny2TB; fallback to local if absent
OUTPUT_ROOT = Path('/Volumes/Manny2TB/mcs_mea_outputs')
if not OUTPUT_ROOT.exists():
    print('External output root not found; using local fallback _mcs_mea_outputs_local')
    OUTPUT_ROOT = Path('_mcs_mea_outputs_local')
ANNOTATIONS_ROOT = OUTPUT_ROOT / 'annotations'
OUTPUT_ROOT, ANNOTATIONS_ROOT

In [None]:
# Build CTZ vs VEH pairings by plate (optionally also by round)
import sys as _sys
from pathlib import Path as _Path
import pandas as pd
# Ensure readiness DataFrame 'df' exists; if not, build it quickly here
try:
    df
except NameError:
    # Make sure repo is importable then build readiness
    if not (_Path.cwd() / 'mcs_mea_analysis').exists() and (_Path.cwd().parent / 'mcs_mea_analysis').exists():
        _sys.path.insert(0, str(_Path.cwd().parent))
    from mcs_mea_analysis.ready import ReadinessConfig as _ReadinessConfig, build_ready_index as _build_ready_index
    OUTPUT_ROOT = OUTPUT_ROOT if 'OUTPUT_ROOT' in globals() else (_Path('/Volumes/Manny2TB/mcs_mea_outputs') if (_Path('/Volumes/Manny2TB/mcs_mea_outputs').exists()) else _Path('_mcs_mea_outputs_local'))
    ready_csv, _, ready_rows = _build_ready_index(_ReadinessConfig(output_root=OUTPUT_ROOT, require_ifr_npz=True))
    df = pd.DataFrame(ready_rows)

# Ensure PairingIndex is importable (in case prior cell wasn't run)
try:
    PairingIndex
except NameError:
    if not (_Path.cwd() / 'mcs_mea_analysis').exists() and (_Path.cwd().parent / 'mcs_mea_analysis').exists():
        _sys.path.insert(0, str(_Path.cwd().parent))
    from mcs_mea_analysis.pairings import PairingIndex

group_by_round = True  # set False to group only by plate
pair_index = PairingIndex.from_ready_rows(df.to_dict('records'), group_by_round=group_by_round)
pairs_summary = pd.DataFrame(pair_index.summary_rows()).sort_values(['plate','round']).reset_index(drop=True)
print('Groups:', len(pairs_summary))
pairs_summary.head(12)

In [None]:
# Expanded pairs table (pairs + unpaired) and ready-only pairs
pairs_df = pd.DataFrame(pair_index.pairs_dataframe())
print('Pair rows:', len(pairs_df))
display(pairs_df.head(12))
ready_pairs_df = pairs_df.query('pair_status=="ready_pair"')
print('Ready pairs:', len(ready_pairs_df))
ready_pairs_df.head(12)

In [None]:
# Build readiness index (chem + NPZ present by default)
ready_cfg = ReadinessConfig(
    output_root=OUTPUT_ROOT,
    require_opto=False,
    require_not_ignored=True,
    require_eligible=False,
    require_ifr_npz=True,
    require_fr_summary=False,
)
ready_csv, ready_jsonl, ready_rows = build_ready_index(ready_cfg)
df = pd.DataFrame(ready_rows)
df_ready = df[df['ready'] == True].copy()
print(f'Ready rows: {len(df_ready)} (from {ready_csv})')
df_ready.head(3)

In [None]:
# Quick inventory by group/round
group_counts = df_ready['group_label'].value_counts().sort_index()
round_counts = df_ready['round'].value_counts().sort_index()
display(group_counts.to_frame('count'))
display(round_counts.to_frame('count'))

In [None]:
# Optional filters — set any of these lists to narrow the ready set
want_groups = []   # e.g., ['CTZ']
want_rounds = []   # e.g., ['mea_blade_round5']
want_plates = []   # e.g., [1, 6]

filtered = df_ready.copy()
if want_groups:
    filtered = filtered[filtered['group_label'].isin(want_groups)]
if want_rounds:
    filtered = filtered[filtered['round'].isin(want_rounds)]
if want_plates:
    filtered = filtered[[any(f'plate_{p}' in str(x) for p in want_plates) for x in filtered['path']]]
print(f'Filtered rows: {len(filtered)}')
filtered.head(5)

In [None]:
# Pick one recording by index in the filtered frame
row_idx = 0
r = filtered.iloc[row_idx]
r

In [None]:
# Load IFR NPZ and basic info
npz_path = Path(r['npz_path'])
stem = r['recording_stem']
print(npz_path)
d = np.load(npz_path)
time_s = np.asarray(d['time_s'], dtype=float)
ifr = np.asarray(d['ifr_hz'], dtype=float)
ifr_s = np.asarray(d.get('ifr_hz_smooth', ifr), dtype=float)
n_ch, n_bins = ifr_s.shape
dur = float(time_s[-1]) if time_s.size else np.nan
chem_ts = r.get('chem_timestamp')
if chem_ts is None or (pd.isna(chem_ts)):
    # Fallback: read annotations to find the first chemical timestamp
    def chem_time_for_stem(stem: str, annotations_root: Path):
        for ext in ('.json', '.csv'):
            p = annotations_root / f'{stem}{ext}'
            if not p.exists():
                continue
            try:
                if p.suffix.lower() == '.json':
                    data = json.loads(p.read_text())
                else:
                    with p.open('r', newline='') as fh:
                        data = list(csv.DictReader(fh))
                for row in data:
                    if str(row.get('category', 'manual')).lower() == 'chemical':
                        return float(row.get('timestamp', 0.0))
            except Exception:
                continue
        return None
    chem_ts = chem_time_for_stem(stem, ANNOTATIONS_ROOT)
print(f'Channels: {n_ch}  bins: {n_bins}  duration(s): {dur:.1f}  chem_ts: {chem_ts}')

In [None]:
# Plot a channels grid inline (decimated for speed), with chem marker if present
def plot_ifr_grid_inline(time_s, ifr_s, chem_ts=None, max_points=6000, ncols=6):
    n_ch, n_bins = ifr_s.shape
    step = max(1, int(n_bins // max_points))
    xs = time_s[::step]
    Y = ifr_s[:, ::step]
    nrows = int(np.ceil(n_ch / ncols))
    fig, axes = plt.subplots(nrows, ncols, figsize=(ncols*3.0, nrows*1.8), sharex=True, sharey=False)
    axes = np.asarray(axes).reshape(-1)
    for i in range(nrows * ncols):
        ax = axes[i]
        if i < n_ch:
            ax.plot(xs, Y[i, :], lw=0.6)
            if chem_ts is not None:
                ax.axvline(float(chem_ts), color='r', linestyle='--', lw=0.8)
            ax.set_title(f'Ch {i}', fontsize=8)
        else:
            ax.axis('off')
    fig.suptitle(f'IFR (smoothed) — {stem}')
    for ax in axes[-ncols:]:
        ax.set_xlabel('Time (s)')
    for r_i in range(nrows):
        axes[r_i*ncols].set_ylabel('IFR (Hz)')
    fig.tight_layout(rect=[0, 0.03, 1, 0.95])
    return fig

fig = plot_ifr_grid_inline(time_s, ifr_s, chem_ts)
fig

In [None]:
# Per-recording IFR NPZ summary (compute if missing, then inspect)
fr_dir = npz_path.parent
sum_csv = fr_dir / f'{stem}_ifr_npz_summary.csv'
if not sum_csv.exists():
    print('Summary CSV not found; computing with process_ifr_npz …')
    _ = process_ifr_npz(npz_path)
else:
    print('Using existing:', sum_csv)
sum_df = pd.read_csv(sum_csv)
display(sum_df.head())
ax = sum_df['modulation'].value_counts().plot(kind='bar', title=f'{stem} modulation counts')
ax.set_xlabel('modulation')
ax.set_ylabel('channels')
plt.show()
sum_df[['fr_pre','fr_post']].plot(kind='hist', bins=40, alpha=0.6, title='FR pre/post (Hz)')
plt.show()

In [None]:
# Optional: per-recording NPZ stats (writes *_npz_stats.csv next to NPZ)
run_npz_stats = False
if run_npz_stats and (chem_ts is not None):
    from mcs_mea_analysis.npz_stats import analyze_npz
    stats_cfg = NPZAnalysisConfig()
    out_stats = analyze_npz(npz_path, float(chem_ts), stats_cfg)
    stats_df = pd.read_csv(out_stats)
    display(stats_df.head())
else:
    print('Skipping stats (set run_npz_stats=True and ensure chem_ts is available).')

In [None]:
# Global catalog (if previously built via scripts.process_ifr_npz)
cat_dir = OUTPUT_ROOT / 'ifr_npz_catalog'
cat_csv = cat_dir / 'ifr_npz_catalog.csv'
status_csv = cat_dir / 'ifr_npz_status.csv'
if cat_csv.exists():
    cat_df = pd.read_csv(cat_csv)
    display(cat_df.head())
    if status_csv.exists():
        status_df = pd.read_csv(status_csv)
        display(status_df.head())
    # Example: top recordings by positive-modulation ratio
    pos_ratio = (
        cat_df.assign(pos=(cat_df['modulation']=='positive').astype(int))
              .groupby('recording_stem')
              .agg(n=('channel','size'), n_pos=('pos','sum'))
    )
    pos_ratio['ratio'] = pos_ratio['n_pos'] / pos_ratio['n']
    display(pos_ratio.sort_values('ratio', ascending=False).head(10))
else:
    print('Catalog not found at', cat_csv)
    print('You can build it by running scripts/process_ifr_npz.py or the cell below.')

In [None]:
# Optional: build/rebuild global catalog from all NPZs (may take time)
run_build_catalog = False
if run_build_catalog:
    from mcs_mea_analysis.ifr_processing import process_all_ifr_npz, find_ifr_npz
    files = find_ifr_npz(OUTPUT_ROOT)
    print('Found NPZ:', len(files))
    _, out_cat_csv, out_status_csv = process_all_ifr_npz(files, IFRProcessorConfig(output_root=OUTPUT_ROOT))
    print('Catalog ->', out_cat_csv)
    print('Status  ->', out_status_csv)
else:
    print('Skipping catalog rebuild (set run_build_catalog=True to run).')