In [14]:
# 02 — Rebate costs (2024 only; verbose)
import os, sys, time, numpy as np, pandas as pd, importlib.util

t0 = time.time()
print("Step 02 start.")

# Load vat_rebate
vat_path = os.path.abspath("../policy/vat_rebate.py")
print("Loading vat_rebate from:", vat_path)
spec = importlib.util.spec_from_file_location("vat_rebate", vat_path)
vr = importlib.util.module_from_spec(spec); spec.loader.exec_module(vr)
print("Loaded:", vr.__file__)

# Load panel
parq = "../intermediate/ca_panel_2024.parquet"
csv  = "../intermediate/ca_panel_2024.csv"
panel_path = parq if os.path.exists(parq) else (csv if os.path.exists(csv) else None)
if panel_path is None:
    raise FileNotFoundError("Missing panel; run Step 01: ca_panel_2024.(parquet|csv)")
print("Reading:", panel_path)
df = pd.read_parquet(panel_path) if panel_path.endswith(".parquet") else pd.read_csv(panel_path)
print("Panel shape:", df.shape)

# Normalize weight
if "weight" not in df.columns:
    wcol = [c for c in df.columns if c.lower() in ("household_weight","weight","hh_weight")]
    if not wcol:
        raise KeyError("No weight column found.")
    df["weight"] = pd.to_numeric(df[wcol[0]], errors="coerce").fillna(0.0)
else:
    df["weight"] = pd.to_numeric(df["weight"], errors="coerce").fillna(0.0)

# Ensure allowance & phaseout present
need_allow = "consumption_allowance" not in df.columns
need_phase = "rebate_after_phaseout" not in df.columns
if need_allow:
    must = {"size_bucket","is_married_couple"}
    missing = [m for m in must if m not in df.columns]
    if missing:
        raise KeyError(f"Missing {missing} required to compute allowance.")
    df = vr.compute_allowance(df)
if need_phase:
    if "household_agi" not in df.columns:
        raise KeyError("household_agi missing; cannot compute phaseout.")
    df = vr.apply_phaseout(df)

# Statewide totals
w = df["weight"].astype(float)
total_no = vr.weighted_sum(df["consumption_allowance"].astype(float), w)
total_ph = vr.weighted_sum(df["rebate_after_phaseout"].astype(float), w)
print(f"Totals — No phase: ${total_no:,.0f} | With phase: ${total_ph:,.0f}")

os.makedirs("../outputs/vat", exist_ok=True)
pd.DataFrame({"year":[2024],"no_phaseout_total":[total_no],"phaseout_total":[total_ph]}).to_csv(
    "../outputs/vat/rebate_cost_2024.csv", index=False
)
print("Saved ../outputs/vat/rebate_cost_2024.csv")

# Deciles by equivalized income (AGI / size)
if ("household_agi" not in df) or ("household_size" not in df):
    raise KeyError("Need household_agi and household_size for deciles.")
df["equiv_income"] = df["household_agi"].astype(float) / np.maximum(df["household_size"].astype(float), 1.0)
df = vr.add_weighted_deciles(df, income_col="equiv_income", weight_col="weight", label="decile")

by_dec = (df.groupby("decile", as_index=False)
            .apply(lambda g: pd.Series({
                "total_no_phaseout": vr.weighted_sum(g["consumption_allowance"], g["weight"]),
                "total_phaseout":    vr.weighted_sum(g["rebate_after_phaseout"], g["weight"]),
                "households_weighted": float(g["weight"].sum()),
            })))
by_dec.to_csv("../outputs/vat/rebate_cost_by_decile_2024.csv", index=False)
print("Saved ../outputs/vat/rebate_cost_by_decile_2024.csv")
print(by_dec.head().to_string(index=False))

# By filing status (if present)
if "filing_status" in df.columns:
    by_fs = (df.groupby("filing_status", as_index=False)
               .apply(lambda g: pd.Series({
                   "total_no_phaseout": vr.weighted_sum(g["consumption_allowance"], g["weight"]),
                   "total_phaseout":    vr.weighted_sum(g["rebate_after_phaseout"], g["weight"]),
               })))
    by_fs.to_csv("../outputs/vat/rebate_cost_by_status_2024.csv", index=False)
    print("Saved ../outputs/vat/rebate_cost_by_status_2024.csv")
    print(by_fs.to_string(index=False))
else:
    print("filing_status not in panel; skipping by-status table.")

# Checks
assert (by_dec["total_phaseout"] <= by_dec["total_no_phaseout"] + 1e-9).all()
assert np.isclose(by_dec["total_no_phaseout"].sum(), total_no)
assert np.isclose(by_dec["total_phaseout"].sum(),   total_ph)

print(f"✅ Step 02 complete. Elapsed {time.time()-t0:.2f}s")


Step 02 start.
Loading vat_rebate from: c:\Users\Ali.Melad\Dropbox\Ali Work\Kyle\California VAT\policy_engile_cali_v2\policy\vat_rebate.py
Loaded: c:\Users\Ali.Melad\Dropbox\Ali Work\Kyle\California VAT\policy_engile_cali_v2\policy\vat_rebate.py
Reading: ../intermediate/ca_panel_2024.csv
Panel shape: (1747, 15)
Totals — No phase: $439,892,827,841 | With phase: $345,335,469,784
Saved ../outputs/vat/rebate_cost_2024.csv
Saved ../outputs/vat/rebate_cost_by_decile_2024.csv
decile  total_no_phaseout  total_phaseout  households_weighted
     1       4.895187e+10    4.895187e+10         1.890135e+06
     2       5.310761e+10    5.310761e+10         1.568790e+06
     3       3.797165e+10    3.795165e+10         1.489727e+06
     4       5.255198e+10    5.243593e+10         1.611124e+06
     5       4.562152e+10    4.172668e+10         1.551894e+06
Saved ../outputs/vat/rebate_cost_by_status_2024.csv
filing_status  total_no_phaseout  total_phaseout
          mfj       3.106090e+11    2.556471e+1

  by_dec = (df.groupby("decile", as_index=False)
  .apply(lambda g: pd.Series({
  .apply(lambda g: pd.Series({


In [None]:
Step 02 start.
Loading vat_rebate from: c:\Users\Ali.Melad\Dropbox\Ali Work\Kyle\California VAT\policy_engile_cali_v2\policy\vat_rebate.py
Loaded: c:\Users\Ali.Melad\Dropbox\Ali Work\Kyle\California VAT\policy_engile_cali_v2\policy\vat_rebate.py
Reading: ../intermediate/ca_panel_2024.csv
Panel shape: (1747, 15)
Totals — No phase: $439,892,827,841 | With phase: $345,335,469,784
Saved ../outputs/vat/rebate_cost_2024.csv
Saved ../outputs/vat/rebate_cost_by_decile_2024.csv
decile  total_no_phaseout  total_phaseout  households_weighted
     1       4.895187e+10    4.895187e+10         1.890135e+06
     2       5.310761e+10    5.310761e+10         1.568790e+06
     3       3.797165e+10    3.795165e+10         1.489727e+06
     4       5.255198e+10    5.243593e+10         1.611124e+06
     5       4.562152e+10    4.172668e+10         1.551894e+06
Saved ../outputs/vat/rebate_cost_by_status_2024.csv
filing_status  total_no_phaseout  total_phaseout
          mfj       3.106090e+11    2.556471e+11
       single       1.292838e+11    8.968836e+10
✅ Step 02 complete. Elapsed 0.09s
C:\Users\Ali.Melad\AppData\Local\Temp\ipykernel_19264\4124014797.py:65: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  by_dec = (df.groupby("decile", as_index=False)
C:\Users\Ali.Melad\AppData\Local\Temp\ipykernel_19264\4124014797.py:66: FutureWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.
  .apply(lambda g: pd.Series({
C:\Users\Ali.Melad\AppData\Local\Temp\ipykernel_19264\4124014797.py:78: FutureWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.
  .apply(lambda g: pd.Series({