# Setup

Import libraries and specify some plotting configuation.

In [28]:
from pathlib import Path

import pandas as pd
import pyarrow.parquet as pq
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display

In [18]:
# Print out full values
pd.set_option("display.float_format", "{:.2f}".format)

# Load data

Here we're just gonna load the `core_ferc714__yearly_planning_area_demand_forecast` asset.

At the bottom of this section you'll see a preview of the resulting dataframe.

In [2]:
# Put in the name of the file that you want to load
ferc714_yearly = "core_ferc714__yearly_planning_area_demand_forecast"

In [3]:
def path_finder(target_asset_name):
    """Returns the path to the target_asset_name and incorporates your local PUDL_OUTPUT value."""
    # Find the PUDL_OUTPUT dir path
    bashrc_path = Path.home() / ".bashrc"
    pudl_output_filepath = None

    with Path.open(bashrc_path, "r") as file:
        for line in file:
            if line.startswith("export PUDL_OUTPUT="):
                pudl_output_filepath = line.split("=")[1].strip().strip('"')
                break

    if not pudl_output_filepath:
        print("PUDL_OUTPUT not found in .bashrc")
        return ""

    # Using the PUDL_OUTPUT dir path, find the target file
    target_asset_filepath = ""

    pudl_output_path = Path(pudl_output_filepath)

    for path in pudl_output_path.rglob("*"):
        if path.is_file() and path.stem == target_asset_name:
            target_asset_filepath = str(path)
            break

    print(f"Target asset filepath: {target_asset_filepath}")
    return target_asset_filepath


In [4]:
ferc714_yearly_path = path_finder(ferc714_yearly)
ferc714_yearly_table = pq.read_table(ferc714_yearly_path)
ferc714_yearly_df = ferc714_yearly_table.to_pandas()
ferc714_yearly_df.head()

Target asset filepath: /Users/sam/Documents/pudl-data/pudl_output/parquet/core_ferc714__yearly_planning_area_demand_forecast.parquet


Unnamed: 0,respondent_id_ferc714,report_year,forecast_year,summer_peak_demand_mw,winter_peak_demand_mw,net_demand_mwh
0,2,2006,2007,1108.0,0.0,0.0
1,2,2006,2008,1141.0,0.0,0.0
2,2,2006,2009,1173.0,0.0,0.0
3,2,2006,2010,1261.0,0.0,0.0
4,2,2006,2011,1292.0,0.0,0.0


# Initial checks

## Is the data complete?

Did all respondents always file 10 years' worth of data?

In [19]:
counts = ferc714_yearly_df.groupby(["respondent_id_ferc714", "report_year"]).size().reset_index(name="forecast_year_count").sort_values(by="report_year")
print("For this FERC 714 form, respondents were expected to provide 10 years' worth of forecasted demand.")
print("Here we can see the respondents who did not provide 10 years' worth for each report year:")
counts[counts["forecast_year_count"] != 10]

For this FERC 714 form, respondents were expected to provide 10 years' worth of forecasted demand.
Here we can see the respondents who did not provide 10 years' worth for each report year:


Unnamed: 0,respondent_id_ferc714,report_year,forecast_year_count
174,125,2006,9
1088,235,2009,9
1770,321,2010,9
849,211,2013,9
939,219,2013,3
787,201,2018,1


## Do we see any obvious anomalies?

### Big diffs

Do we see any years where there's a huge diff from one year to the next in terms of the predicted demand? Spoiler: yes, we do. Scroll down to the dataframe at the end of this section to see which respondents/years had big differences.

In [21]:
# Define threshold for big_diffs (e.g., more than 100% change)
threshold = 100.0

In [22]:
def big_diff_check(threshold):
    """This method checks for big changes from one year to the next in predicted demand."""
    # Group the repondents/report years
    grouped = ferc714_yearly_df.groupby(["respondent_id_ferc714", "report_year"])

    # Initialize an empty DataFrame to collect big_diffs
    big_diffs = pd.DataFrame(columns=[
        "respondent_id_ferc714", "report_year", "forecast_year",
        "metric_type", "percentage_change", "value_in_prior_year", "value_in_this_forecast_year"
    ])

    for key, group in grouped:
        # Sort by forecast_year
        group = group.sort_values(by="forecast_year")

        # Calculate percentage change
        group["summer_peak_demand_pct_change"] = (group["summer_peak_demand_mw"].pct_change() * 100).round(2)
        group["winter_peak_demand_pct_change"] = (group["winter_peak_demand_mw"].pct_change() * 100).round(2)
        group["net_demand_pct_change"] = (group["net_demand_mwh"].pct_change() * 100).round(2)

        # Check for large changes and append to the result DataFrame
        for index, row in group.iterrows():
            if abs(row["summer_peak_demand_pct_change"]) > threshold:
                big_diffs = pd.concat([big_diffs, pd.DataFrame({
                    "respondent_id_ferc714": [row["respondent_id_ferc714"]],
                    "report_year": [row["report_year"]],
                    "forecast_year": [row["forecast_year"]],
                    "metric_type": ["summer_peak_demand_mw"],
                    "percentage_change": [row["summer_peak_demand_pct_change"]],
                    "value_in_prior_year": [group.loc[index-1, "summer_peak_demand_mw"]] if index > 0 else [None],
                    "value_in_this_forecast_year": [row["summer_peak_demand_mw"]]
                })], ignore_index=True)
            if abs(row["winter_peak_demand_pct_change"]) > threshold:
                big_diffs = pd.concat([big_diffs, pd.DataFrame({
                    "respondent_id_ferc714": [row["respondent_id_ferc714"]],
                    "report_year": [row["report_year"]],
                    "forecast_year": [row["forecast_year"]],
                    "metric_type": ["winter_peak_demand_mw"],
                    "percentage_change": [row["winter_peak_demand_pct_change"]],
                    "value_in_prior_year": [group.loc[index-1, "winter_peak_demand_mw"]] if index > 0 else [None],
                    "value_in_this_forecast_year": [row["winter_peak_demand_mw"]]
                })], ignore_index=True)
            if abs(row["net_demand_pct_change"]) > threshold:
                big_diffs = pd.concat([big_diffs, pd.DataFrame({
                    "respondent_id_ferc714": [row["respondent_id_ferc714"]],
                    "report_year": [row["report_year"]],
                    "forecast_year": [row["forecast_year"]],
                    "metric_type": ["net_demand_mwh"],
                    "percentage_change": [row["net_demand_pct_change"]],
                    "value_in_prior_year": [group.loc[index-1, "net_demand_mwh"]] if index > 0 else [None],
                    "value_in_this_forecast_year": [row["net_demand_mwh"]]
                })], ignore_index=True)

    # Reset index for the final big_diffs DataFrame
    big_diffs = big_diffs.reset_index(drop=True)
    big_diffs["respondent_id_ferc714"] = big_diffs["respondent_id_ferc714"].astype(int)
    big_diffs["report_year"] = big_diffs["report_year"].astype(int)
    big_diffs["forecast_year"] = big_diffs["forecast_year"].astype(int)

    return big_diffs

In [24]:
big_diffs = big_diff_check(threshold)
big_diffs

  big_diffs = pd.concat([big_diffs, pd.DataFrame({


Unnamed: 0,respondent_id_ferc714,report_year,forecast_year,metric_type,percentage_change,value_in_prior_year,value_in_this_forecast_year
0,120,2006,2013,net_demand_mwh,514.4,1928828.0,11850629.0
1,125,2017,2024,winter_peak_demand_mw,944.3,3384.0,35339.0
2,134,2007,2009,winter_peak_demand_mw,9786.79,106.0,10480.0
3,159,2006,2008,winter_peak_demand_mw,inf,0.0,2848.0
4,159,2006,2008,net_demand_mwh,inf,0.0,13399136.0
5,159,2008,2010,winter_peak_demand_mw,inf,0.0,3029.0
6,164,2008,2013,net_demand_mwh,913.07,13086800.0,132578224.0
7,201,2006,2009,winter_peak_demand_mw,270.0,40.0,148.0
8,211,2013,2015,summer_peak_demand_mw,102.38,16833.0,34066.0
9,211,2013,2015,winter_peak_demand_mw,100.47,12368.5,24795.0


# Trends

What did each respondent (planning area) predict each year per metric? Use the plot below to see the metrics for a specific respondent (or group of respondents by selecting multiple IDs in the dropdown).

Note that not all respondents filed with FERC each year, so you may have to play around with the plot a bit.

In [33]:
# Create dropdown widgets
respondent_ids = ferc714_yearly_df["respondent_id_ferc714"].unique()
metrics = ["summer_peak_demand_mw", "winter_peak_demand_mw", "net_demand_mwh"]
report_years = ferc714_yearly_df["report_year"].unique()

respondent_dropdown = widgets.SelectMultiple(
    options=respondent_ids,
    value=[respondent_ids[0]],
    description="Respondent ID:",
    disabled=False,
)

metric_dropdown = widgets.Dropdown(
    options=metrics,
    value=metrics[0],
    description="Metric:",
    disabled=False,
)

report_year_dropdown = widgets.Dropdown(
    options=report_years,
    value=report_years[0],
    description="Report Year:",
    disabled=False,
)

# Define plot function
def plot_data(respondent_id, metric, report_year):
    filtered_df = ferc714_yearly_df[ferc714_yearly_df["report_year"] == report_year]
    plt.figure(figsize=(10, 6))
    for rid in respondent_id:
        respondent_data = filtered_df[filtered_df["respondent_id_ferc714"] == rid]
        plt.plot(respondent_data["forecast_year"], respondent_data[metric], label=f"Respondent {rid}")
    plt.xlabel("Forecast Year")
    plt.ylabel(metric.replace("_", " ").title())
    plt.title(f"{metric.replace('_', ' ').title()} for Selected Respondent IDs in Report Year {report_year}")
    plt.legend()
    plt.grid(True)
    plt.show()

# Create interactive plot
widgets.interactive(plot_data, respondent_id=respondent_dropdown, metric=metric_dropdown, report_year=report_year_dropdown)


interactive(children=(SelectMultiple(description='Respondent ID:', index=(0,), options=(2, 101, 102, 103, 104,…