# 03 — Comparative Master

Import regression results (coefficient tables, betas) from AHS and ASEC analyses and compare drivers/barriers of multigenerational living across data sources. Optionally include NHGIS-based results if `nhgis_analysis_ready.csv` and/or saved result tables exist.

In [None]:
import os
import sys
import pandas as pd

REPO_ROOT = os.path.dirname(os.getcwd()) if os.path.basename(os.getcwd()) == "notebooks" else os.getcwd()
sys.path.insert(0, os.path.join(REPO_ROOT, "scripts"))

DATA_PROCESSED = os.path.join(REPO_ROOT, "data", "processed")
OUTPUT = os.path.join(REPO_ROOT, "output")
os.makedirs(OUTPUT, exist_ok=True)

In [None]:
# Load coefficient tables from each source (if saved by analysis notebooks).
# Alternatively re-run run_ols_pipeline for each dataset and compare in memory.
sources = []
for name, f in [("AHS", "ahs_analysis_ready.csv"), ("ASEC", "asec_analysis_ready.csv"), ("NHGIS", "nhgis_analysis_ready.csv")]:
    path = os.path.join(DATA_PROCESSED, f)
    if os.path.isfile(path):
        sources.append((name, path))
print("Available:", [s[0] for s in sources])

In [None]:
from core_metrics import run_ols_pipeline

comparison = {}
for name, path in sources:
    df = pd.read_csv(path)
    wcol = "_total_hh" if "_total_hh" in df.columns else None
    res = run_ols_pipeline(df, target_col="Multigen_Rate", weight_col=wcol)
    comparison[name] = {"beta_table": res["beta_table"], "coef_table": res["coef_table"], "r2": res["ols_robust"].rsquared, "n": int(res["ols_robust"].nobs)}
    print(f"{name}: R²={res['ols_robust'].rsquared:.4f}, N={res['ols_robust'].nobs:.0f}")

## Compare standardized (beta) coefficients across sources

Merge beta tables on `Feature` to compare effect sizes across AHS, ASEC, and NHGIS.

In [None]:
beta_dfs = {name: comp["beta_table"][["Feature", "Beta_Coef", "Beta_pval"]].rename(columns={"Beta_Coef": name + "_Beta", "Beta_pval": name + "_pval"}) for name, comp in comparison.items()}
merged = beta_dfs[list(beta_dfs)[0]]
for name in list(beta_dfs)[1:]:
    merged = merged.merge(beta_dfs[name], on="Feature", how="outer")
merged.head(12)