## Compare EIA 930 demand imputation

### Extract Alicia/Tyler's imputation results

In [None]:
import zipfile
import requests
import pandas as pd
from io import BytesIO
import matplotlib.pyplot as plt
from pudl.analysis.timeseries_evaluation import plot_compare_timeseries

from pudl.etl import defs

def extract_baseline_imputation() -> pd.DataFrame:
    r = requests.get("https://zenodo.org/records/14768167/files/truggles/EIA_Cleaned_Hourly_Electricity_Demand_Data-v1.4.zip?download=1")
    f = BytesIO(r.content)
    subregions = []
    base_path = "truggles-EIA_Cleaned_Hourly_Electricity_Demand_Data-5c959df/data/"
    archive = zipfile.Path(f, at=base_path)
    for release in ["release_2020_Oct_include_subregions", "release_2025_Jan_include_subregions"]:
        for path in (archive / release / "subregions_and_balancing_authorities").iterdir():
            if path.suffix != ".csv":
                continue
            df = pd.read_csv(path.open())

            # Get subregion/ba
            name = path.stem.split("-")
            ba = name[0]
            subregion = None if len(name) == 1 else name[1]
            df["balancing_authority_code_eia"] = ba
            df["balancing_authority_subregion_code_eia"] = subregion

            subregions.append(df)
    df = pd.concat(subregions).rename(columns={"cleaned demand (MW)": "baseline_demand_mwh"})
    df["datetime_utc"] = pd.to_datetime(df["date_time"])
    return df[["datetime_utc", "baseline_demand_mwh", "balancing_authority_code_eia", "balancing_authority_subregion_code_eia"]]

### Merge baseline imputed demand and new version

In [None]:
baseline_subregion_demand = extract_baseline_imputation()
new_subregion_demand = defs.load_asset_value("out_eia930__hourly_subregion_demand")
df = new_subregion_demand.merge(baseline_subregion_demand, on=["datetime_utc", "balancing_authority_code_eia", "balancing_authority_subregion_code_eia"], how="inner")

### Plot timeseries' for comparison

In [None]:
plt.figure(figsize=(12, 6))
plot_compare_timeseries(
    df,
    idx_cols=["balancing_authority_code_eia", "balancing_authority_subregion_code_eia"],
    idx_vals=("PJM", "PEP"),
    start_date="2022-05-15",
    end_date="2022-05-31",
    timeseries_a="baseline_demand_mwh",
    timeseries_b="demand_imputed_pudl_mwh",
)