In [None]:
from pathlib import Path
import pandas as pd

# ========= Configuration Area (Modify as needed) =========
INPUT_CSV   = Path("./poverty/energy_poverty_outputs/energy_poverty_share_2025.csv")   # County-level input CSV
OUTPUT_XLSX = Path("./poverty/energy_poverty_outputs/energy_poverty_state_2025.csv")   # State-level output XLSX

# If you only want to calculate a specific year, e.g., 2025, set it here; set to None to keep all years
YEAR_FILTER = None   # or 2025

# ========= Read CSV =========
# Read county_fips as string to preserve leading zeros
df = pd.read_csv(
    INPUT_CSV,
    dtype={"county_fips": str},
)

# Ensure county_fips is 5 digits
df["county_fips"] = df["county_fips"].str.zfill(5)

# Extract State FIPS from County FIPS (first two digits)
df["state_fips"] = df["county_fips"].str[:2]

# Filter by year if needed
if YEAR_FILTER is not None:
    df = df.loc[df["year"] == YEAR_FILTER].copy()

# ========= Calculate Population-Weighted State Proportions =========
# 1) County-level "Extra population entering energy burden state"
#    share_diff_gt6 / share_diff_gt10 are "proportions", multiplying by total_units_counted gives the headcount
df["extra_pop_gt6"]  = df["total_units_counted"] * df["share_diff_gt6"]
# If you don't need gt10 for now, comment out the line below
df["extra_pop_gt10"] = df["total_units_counted"] * df["share_diff_gt10"]

# 2) Aggregate by State (and Year, if multiple years retained)
group_cols = ["state_fips"]
if YEAR_FILTER is None:
    group_cols.append("year")  # Retain year info

state = (
    df
    .groupby(group_cols, as_index=False)[["total_units_counted", "extra_pop_gt6", "extra_pop_gt10"]]
    .sum()
    .rename(columns={"total_units_counted": "total_units_state"})
)

# 3) State-level Population-Weighted Proportion:
#    State Proportion = Sum of extra_pop in all counties within the state / State Total Population
state["share_diff_gt6_state"]  = state["extra_pop_gt6"]  / state["total_units_state"]
state["share_diff_gt10_state"] = state["extra_pop_gt10"] / state["total_units_state"]

# ========= Save to XLSX =========
state.to_excel(OUTPUT_XLSX, index=False)

print("State-level population-weighted results saved to:", OUTPUT_XLSX)
print(state.head())


In [None]:
# -*- coding: utf-8 -*-
from pathlib import Path
import pandas as pd

# ========= Configuration Area (Modify as needed) =========
INPUT_CSV   = Path("./poverty/energy_poverty_outputs/energy_poverty_share_2030.csv")   # County-level input CSV
OUTPUT_XLSX = Path("./poverty/energy_poverty_outputs/energy_poverty_state_2030.csv")   # State-level output XLSX

# If you only want to calculate a specific year, e.g., 2025, set it here; set to None to keep all years
YEAR_FILTER = None   # or 2025

# ========= Read CSV =========
# Read county_fips as string to preserve leading zeros
df = pd.read_csv(
    INPUT_CSV,
    dtype={"county_fips": str},
)

# Ensure county_fips is 5 digits
df["county_fips"] = df["county_fips"].str.zfill(5)

# Extract State FIPS from County FIPS (first two digits)
df["state_fips"] = df["county_fips"].str[:2]

# Filter by year if needed
if YEAR_FILTER is not None:
    df = df.loc[df["year"] == YEAR_FILTER].copy()

# ========= Calculate Population-Weighted State Proportions =========
# 1) County-level "Extra population entering energy burden state"
#    share_diff_gt6 / share_diff_gt10 are "proportions", multiplying by total_units_counted gives the headcount
df["extra_pop_gt6"]  = df["total_units_counted"] * df["share_diff_gt6"]
# If you don't need gt10 for now, comment out the line below
df["extra_pop_gt10"] = df["total_units_counted"] * df["share_diff_gt10"]

# 2) Aggregate by State (and Year, if multiple years retained)
group_cols = ["state_fips"]
if YEAR_FILTER is None:
    group_cols.append("year")  # Retain year info

state = (
    df
    .groupby(group_cols, as_index=False)[["total_units_counted", "extra_pop_gt6", "extra_pop_gt10"]]
    .sum()
    .rename(columns={"total_units_counted": "total_units_state"})
)

# 3) State-level Population-Weighted Proportion:
#    State Proportion = Sum of extra_pop in all counties within the state / State Total Population
state["share_diff_gt6_state"]  = state["extra_pop_gt6"]  / state["total_units_state"]
state["share_diff_gt10_state"] = state["extra_pop_gt10"] / state["total_units_state"]

# ========= Save to XLSX =========
state.to_excel(OUTPUT_XLSX, index=False)

print("State-level population-weighted results saved to:", OUTPUT_XLSX)
print(state.head())