# PCS‑HELIO v4.3 — 05 · Reading Analysis
OLS (clustered) and MixedLM; BH‑FDR; saves coeff tables and F2 figure.

In [None]:
from pathlib import Path; import pandas as pd, numpy as np, json, sys
# Robust import of shared fragments regardless of CWD
ROOT = Path.cwd()
if (ROOT/'notebooks'/'_fragments.py').exists():
    sys.path.insert(0, str(ROOT))
elif (ROOT.parent/'notebooks'/'_fragments.py').exists():
    sys.path.insert(0, str(ROOT.parent))
try:
    from notebooks._fragments import apply_style, preflight_checks, print_contract, qa_assertions, save_manifest
except Exception as e:
    print('[preflight] Failed importing notebooks._fragments:', e)
    def apply_style(): pass
    def preflight_checks(): pass
    def print_contract(): pass
    def qa_assertions(df, rules): pass
    def save_manifest(path, payload): Path(path).parent.mkdir(parents=True, exist_ok=True); Path(path).write_text(json.dumps(payload, indent=2))
from pcs_toolbox.analysis import fit_ols_clustered, fit_mixedlm, apply_fdr
apply_style(); preflight_checks(); print_contract()
BASE=Path('.') ; DATA=BASE/'data' ; PROC=DATA/'processed' ; RPTS=BASE/'reports' ; FIG=BASE/'figures'/'metrics'
PROC.mkdir(parents=True, exist_ok=True); RPTS.mkdir(parents=True, exist_ok=True); FIG.mkdir(parents=True, exist_ok=True)


In [None]:
# Load merged dataset (preferred) or fall back to ZuCo only
path_merged = PROC/'zuco_kec_merged.csv'
path_zuco   = PROC/'zuco_aligned.csv'
df = pd.read_csv(path_merged) if path_merged.exists() else (pd.read_csv(path_zuco) if path_zuco.exists() else pd.DataFrame())
# Engineer features
if not df.empty:
    if 'TRT' in df.columns:
        df['log_TRT'] = np.log1p(df['TRT'])
    if 'GPT' in df.columns:
        df['log_GPT'] = np.log1p(df['GPT'])
print('Rows:', len(df))
display(df.head(3))


In [None]:
# OLS across outcomes with KEC predictors (+ optional controls)
results = []
ols_df = pd.DataFrame()
if not df.empty:
    outcomes = [c for c in ['FFD','GD','log_TRT','log_GPT'] if c in df.columns]
    predictors = [c for c in ['entropy','curvature','coherence'] if c in df.columns]
    controls = [c for c in ['length','log_freq','w_pos'] if c in df.columns]
    if outcomes and predictors and 'Subject' in df.columns:
        for y in outcomes:
            rhs = predictors + controls
            formula = f"{y} ~ " + " + ".join(rhs)
            try:
                m = fit_ols_clustered(df.dropna(subset=[y]+rhs), formula, cluster='Subject')
                for term, est in m.params.items():
                    if term=='Intercept': continue
                    results.append({'outcome': y, 'term': term, 'estimate': float(est), 'p': float(m.pvalues.get(term, np.nan))})
            except Exception as e:
                print('[warn] OLS failed for', y, e)
ols_df = pd.DataFrame(results) if results else pd.DataFrame()
print(ols_df.head())
# Save coeffs and FDR
if not ols_df.empty:
    fdr_df = apply_fdr(ols_df, p_col='p', group_col='outcome', alpha=0.05)
    out_csv = PROC/'models_reading_coeffs.csv'
    out_fdr = PROC/'models_reading_coeffs_fdr.csv'
    ols_df.to_csv(out_csv, index=False)
    fdr_df.to_csv(out_fdr, index=False)
    save_manifest(RPTS/'models_reading_manifest.json', {'rows': int(len(ols_df)), 'outcomes': sorted(ols_df['outcome'].unique())})


In [None]:
# Figure F2: coefficients (if available)
import matplotlib.pyplot as plt
if not pd.DataFrame(results).empty:
    agg = pd.DataFrame(results)
    # focus on log_TRT if present, else first outcome
    focus = 'log_TRT' if 'log_TRT' in agg['outcome'].unique() else agg['outcome'].unique()[0]
    sub = agg[agg['outcome']==focus].set_index('term')['estimate']
    fig, ax = plt.subplots(figsize=(5,3))
    sub.plot(kind='bar', ax=ax)
    ax.set_title(f'F2 · OLS coefficients: {focus}')
    ax.set_ylabel('Estimate')
    fig.tight_layout(); fig.savefig(FIG/'F2_reading_coeffs.png', dpi=150); plt.close(fig)
else:
    print('[note] No coefficients to plot.')
# QA
if not pd.DataFrame(results).empty:
    qa_assertions(pd.DataFrame(results), {'min_rows': 3})
