## Compare EIA 930 demand imputation

In [None]:
import pandas as pd
import matplotlib
# from pudl.analysis.timeseries_evaluation import plot_compare_imputation
from pudl.analysis.timeseries_evaluation import plot_compare_imputation, extract_baseline_eia930_imputation

from pudl.etl import defs

### Visualization settings

In [None]:
%matplotlib inline

In [None]:
from pudl.metadata.enums import IMPUTATION_CODES

In [None]:
IMPUTATION_CODES

In [None]:
matplotlib.rcParams["figure.figsize"] = (10, 6)
matplotlib.rcParams["figure.dpi"] = 150
pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 300)
pd.set_option("display.max_colwidth", 1000)

# Nice dark theme for Matplotlib... but only if you have matplotx installed.
try:
    import matplotx
    matplotlib.style.use(matplotx.styles.onedark)
except ImportError:
    pass

### Extract Alicia/Tyler's imputation results

In [None]:
baseline_subregion_demand = extract_baseline_eia930_imputation()

### Merge baseline imputed demand and new version

In [None]:

# new_subregion_demand = defs.load_asset_value("out_eia930__hourly_subregion_demand")
new_subregion_demand = pd.read_parquet("s3://pudl.catalyst.coop/tmp/eia930/out_eia930__hourly_subregion_demand.parquet")

df = new_subregion_demand.merge(
    baseline_subregion_demand,
    on=["datetime_utc", "balancing_authority_code_eia", "balancing_authority_subregion_code_eia"],
    how="inner"
)

### Plot timeseries' for comparison

In [None]:
reported_col = "demand_reported_mwh"

plot_compare_imputation(
    df,
    idx_cols=["balancing_authority_code_eia", "balancing_authority_subregion_code_eia"],
    idx_vals=("SWPP", "INDN"),
    start_date="2024-12-14",
    end_date="2024-12-21",
    timeseries_a="baseline_demand_mwh",
    timeseries_b="demand_imputed_pudl_mwh",
    reported_col=reported_col,
)

In [None]:
plot_compare_imputation(
    df,
    idx_cols=["balancing_authority_code_eia", "balancing_authority_subregion_code_eia"],
    idx_vals=("SWPP", "INDN"),
    start_date="2019-12-01",
    end_date="2019-12-31",
    timeseries_a="baseline_demand_mwh",
    timeseries_b="demand_imputed_pudl_mwh",
    reported_col=reported_col,
)

In [None]:
plot_compare_imputation(
    df,
    idx_cols=["balancing_authority_code_eia", "balancing_authority_subregion_code_eia"],
    idx_vals=("CISO", "PGAE"),
    start_date="2019-02-05",
    end_date="2019-02-17",
    timeseries_a="baseline_demand_mwh",
    timeseries_b="demand_imputed_pudl_mwh",
    reported_col=reported_col,
)

In [None]:
plot_compare_imputation(
    df,
    idx_cols=["balancing_authority_code_eia", "balancing_authority_subregion_code_eia"],
    idx_vals=("CISO", "VEA"),
    start_date="2019-12-09",
    end_date="2019-12-31",
    timeseries_a="baseline_demand_mwh",
    timeseries_b="demand_imputed_pudl_mwh",
    reported_col=reported_col,
)