# 03 — MTR on labor (+$1 wages): current law vs no-income-tax + rebate
- Current: income taxes + payroll (no rebate)
- Reform: zero fed/state income taxes + rebate (phase-out)
- Compute Δtax from +$1 wages; report pop- and earnings-weighted averages
- Wage shares in phase-out bands:
  - Single: AGI ∈ [75k,125k)
  - MFJ:    AGI ∈ [150k,200k)


In [1]:
# 03 — Rebate-only MTR on labor (+$1), 2024  [fixed: normalize 'weight']
import os, numpy as np, pandas as pd, importlib.util

print("Step 03 start.")

# Load helpers
vat_path = os.path.abspath("../policy/vat_rebate.py")
spec = importlib.util.spec_from_file_location("vat_rebate", vat_path)
vr = importlib.util.module_from_spec(spec); spec.loader.exec_module(vr)
print("Loaded:", vr.__file__)

# Load panel
parq = "../intermediate/ca_panel_2024.parquet"
csv  = "../intermediate/ca_panel_2024.csv"
panel_path = parq if os.path.exists(parq) else (csv if os.path.exists(csv) else None)
if panel_path is None:
    raise FileNotFoundError("Missing panel; run Step 01.")
df = pd.read_parquet(panel_path) if panel_path.endswith(".parquet") else pd.read_csv(panel_path)
print("Panel shape:", df.shape)
print("Columns:", list(df.columns))

# --- Normalize weight column (fix for your earlier error) ---
if "weight" not in df.columns:
    w_aliases = [c for c in df.columns if c.lower() in ("household_weight","weight","hh_weight")]
    if not w_aliases:
        raise KeyError("No weight column found (looked for household_weight/weight/hh_weight).")
    df["weight"] = pd.to_numeric(df[w_aliases[0]], errors="coerce").fillna(0.0)
else:
    df["weight"] = pd.to_numeric(df["weight"], errors="coerce").fillna(0.0)

# Ensure required structural columns
need = ["household_agi","employment_income","size_bucket","is_married_couple"]
missing = [c for c in need if c not in df.columns]
if missing:
    raise KeyError(f"Missing required columns: {missing}")

# Compute base rebate if needed
if "consumption_allowance" not in df.columns:
    df = vr.compute_allowance(df)
if "rebate_after_phaseout" not in df.columns:
    df = vr.apply_phaseout(df)

base = df.copy()
plus = df.copy()

# +$1 for households with wages > 0
has_wages = plus["employment_income"].fillna(0) > 0
plus.loc[has_wages, "household_agi"] = plus.loc[has_wages, "household_agi"] + 1.0

# Recompute rebate for 'plus'
if "consumption_allowance" not in plus.columns:
    plus = vr.compute_allowance(plus)
plus = vr.apply_phaseout(plus)

# Δrebate = rebate_plus - rebate_base → MTR (tax) = −Δrebate (positive = higher tax burden from clawback)
d_rebate = plus["rebate_after_phaseout"] - base["rebate_after_phaseout"]
mtr_tax  = -d_rebate

# Weighting
w = df["weight"].astype(float)
wages = df["employment_income"].astype(float).clip(lower=0.0)
mask = has_wages.fillna(False)

# Averages over households with wages > 0
pop_wtd  = float((mtr_tax[mask] * w[mask]).sum() / w[mask].sum()) if w[mask].sum() > 0 else 0.0
earn_wtd = float((mtr_tax[mask] * wages[mask]).sum() / wages[mask].sum()) if wages[mask].sum() > 0 else 0.0

# Wage shares in phase-out bands (by filing_status if present; else derive from is_married_couple)
if "filing_status" in df.columns:
    fs = df["filing_status"].astype(str).str.lower()
else:
    fs = pd.Series(np.where(df["is_married_couple"].astype(int)==1, "mfj", "single"), index=df.index)

agi = df["household_agi"].astype(float)
wages_total = float(wages.sum())

# Bands: Singles $75k–$125k, MFJ $150k–$200k
single_band_mask = (fs.eq("single") & (agi >= 75_000) & (agi <= 125_000))
mfj_band_mask    = (fs.eq("mfj")    & (agi >= 150_000) & (agi <= 200_000))

share_single = float((wages[single_band_mask]).sum() / wages_total) if wages_total > 0 else 0.0
share_mfj    = float((wages[mfj_band_mask]).sum()   / wages_total) if wages_total > 0 else 0.0

# Save
os.makedirs("../outputs/vat", exist_ok=True)
pd.DataFrame([{
    "regime":"rebate_only",
    "population_weighted_MTR": pop_wtd,
    "earnings_weighted_MTR":   earn_wtd,
    "year": 2024,
}]).to_csv("../outputs/vat/mtr_summary_2024.csv", index=False)

pd.DataFrame([{
    "year": 2024,
    "share_wages_single_75k_125k": share_single,
    "share_wages_mfj_150k_200k":   share_mfj,
}]).to_csv("../outputs/vat/wage_phaseout_shares_2024.csv", index=False)

# Checks
assert np.isfinite([pop_wtd, earn_wtd]).all()
assert 0.0 <= share_single <= 1.0
assert 0.0 <= share_mfj    <= 1.0

print(f"✅ Step 03 done. Rebate-only MTR pop-wtd={pop_wtd:.4f}, earn-wtd={earn_wtd:.4f}")
print(f"   Wage shares in phase-out: Single={share_single:.2%}, MFJ={share_mfj:.2%}")


Step 03 start.
Loaded: c:\Users\Ali.Melad\Dropbox\Ali Work\Kyle\California VAT\policy_engile_cali_v2\policy\vat_rebate.py
Panel shape: (1747, 15)
Columns: ['state_code', 'household_size', 'household_weight', 'household_agi', 'employment_income', 'fed_income_tax', 'ca_income_tax', 'filing_status', 'is_married_couple', 'size_bucket', 'consumption_allowance', 'rebate_after_phaseout', 'excess_over_threshold', 'allowance_no_phaseout', 'allowance_phaseout']
✅ Step 03 done. Rebate-only MTR pop-wtd=0.0294, earn-wtd=0.0855
   Wage shares in phase-out: Single=4.63%, MFJ=9.91%
