In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import calendar
import logging
import pathlib
import zipfile

from datetime import timedelta
import datetime

import geopandas
from geopandas import gpd
import numpy as np
import pandas as pd
import requests
from geopandas import GeoDataFrame
from shapely.geometry import GeometryCollection, MultiPolygon, Polygon
from shapely.ops import unary_union
from tqdm import tqdm
import seaborn as sns
import matplotlib as mpl
import sys

logger = logging.getLogger(__name__)

from shapely.geometry import Polygon, MultiPolygon

from pudl.analysis.demand_mapping import (corr_fig, error_heatmap, error_fig, compare_allocation,
                                          regional_demand_profiles, vec_error, uncovered_area_mismatch,
                                          layer_intersection, demand_allocation,
                                          sales_ratio_by_class_fips, DemandSpaceTimeSeries)

from pudl.output.ferc714 import Respondents
import matplotlib.pyplot as plt
import matplotlib as mpl

%matplotlib inline

import scipy
from collections import defaultdict
from IPython.display import clear_output
import pudl

from pudl.helpers import download_zip_url
from pudl.transform.ferc714 import OFFSET_CODES, TZ_CODES
import addfips

import sqlalchemy as sa

In [None]:
## Not set for map visualization
sns.set()
%matplotlib inline
mpl.rcParams["figure.figsize"] = (10,4)
mpl.rcParams["figure.dpi"] = 150
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100

In [None]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
log_format = "%(asctime)s [%(levelname)8s] %(name)s:%(lineno)s %(message)s"
formatter = logging.Formatter(log_format)
handler.setFormatter(formatter)
logger.handlers = [handler]

# Define notebook parameters

In [None]:
pudl_settings = pudl.workspace.setup.get_defaults()
pudl_engine = sa.create_engine(pudl_settings["pudl_db"])

local_data = pathlib.Path(pudl_settings["data_dir"]) / "local"

# Importing and Loading Relevant Dataframes

Major dataframes and tables being accessed:
- FERC-714 databases used to allocate demand annually at the county level (part of the PUDL database)
- Census databases for county shapefiles [Public]
- 2010 and 2012 ReEDS regions hourly demand data [Private]
- More coming...

## County Shapefiles and Population Data

In [None]:
%%time
county_gdf = (pudl.analysis.service_territory.get_census2010_gdf(pudl_settings, "county")
              .rename(columns={"GEOID10":"county_id_fips", "DP0010001": "population"}))

county_gdf["STATE_FIPS_int"] = pd.to_numeric(county_gdf["county_id_fips"].str[:2])

county_gdf =  (
    county_gdf
    # Remove all islands and non-mainland states and territories
    .query("STATE_FIPS_int<=56 & STATE_FIPS_int not in (2, 15, 44)")
    # Project to US Albers conic equal-area projection
    .to_crs("ESRI:102003")
    .drop("STATE_FIPS_int", axis=1).sort_values("county_id_fips").reset_index(drop=True)[
        ["county_id_fips", "population"]
    ]
)

## County-Planning Area Mapping Dataset & Planning Area Hourly Demand (FERC-714 Data)
- `ba_county_map` contains pairwise rows signifying which county have been supplied by which utility in each reporting year
- `pa_demand_ferc714_df` contains the hourly demand data for each planning area 
- One year of data is analyzed at a time

In [None]:
%%time

pudl_out = pudl.output.pudltabl.PudlTabl(pudl_engine=pudl_engine)
pa_demand_ferc714_df = pudl_out.demand_hourly_pa_ferc714()
ferc714_out = pudl.output.ferc714.Respondents(pudl_out)
ba_county_map = ferc714_out.georef_counties()

## REEDS Data [OPTIONAL]
- Some of the REEDS public data is not readily accessible
- If that is the case, extract the timeseries data and the shapefile

### REEDS Shapefiles

In [None]:
%%time
reeds_gdf_path = local_data / "nrel/reeds_shp"
if not reeds_gdf_path.is_dir():
    raise FileNotFoundError(
        f"ReEDS Balancing Area geometries not found."
        f"Expected them at {reeds_path}"
    )
reeds_gdf = gpd.read_file(reeds_gdf_path)

logger.info("Normalizing inconsistent geometries")
reeds_gdf["geometry"] = reeds_gdf["geometry"].buffer(0)

logger.info("Keeping Mainland US REEDS regions and dissolving to the PCA level")
reeds_gdf = (
    reeds_gdf.assign(pca_num=lambda x: pd.to_numeric(x.pca.replace("^p", value="", regex=True)))
    .query("pca_num<=134")
    .to_crs("ESRI:102003")
    .drop(["Shape_Leng", "Shape_Area", "OBJECTID", "gid", "demreg2", "pca_num"], axis=1)
    .dissolve(by=["pca"], as_index=False)
)

### 2012 REEDS Hourly Data

In [None]:
reeds_hourly_2012 = (pd.read_csv(local_data / "nrel" / "reeds_load_2012_est_time.csv",
                                 infer_datetime_format=True, parse_dates=["est_time"]))

logger.info("Converting local hourly demand data to UTC datetime for consistency")
reeds_hourly_2012["utc_datetime"] = reeds_hourly_2012["est_time"] - OFFSET_CODES["EST"]

logger.info("adding utc_datetime and transforming dataset")
reeds_hourly_2012 = (reeds_hourly_2012.drop("est_time", axis=1)
                     .set_index("utc_datetime")
                     .stack()
                     .reset_index()
                     .rename(columns={"level_1":"pca", 0:"demand_mwh"}))

## Object Creation for Demand Allocation

In [None]:
df_demand = DemandSpaceTimeSeries(pudl_engine, pa_demand_df=pa_demand_ferc714_df, ba_county_map=ba_county_map)
df_demand.add_gdf(reeds_gdf, "reeds_gdf")

pca_demand, pca_layers = df_demand.allocate_demand(2012, 2013, allocators={"population":1}, aggregators=["pca"])
pca_demand = pca_demand.reset_index()