In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Standard libraries
import logging
import os
import pathlib
import sys

# 3rd party libraries
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas as pd
import seaborn as sns
import sqlalchemy as sa

# Local libraries
import pudl
import pudl.output.ferc714

# Configure Display Parameters

In [3]:
sns.set()
%matplotlib inline
mpl.rcParams['figure.figsize'] = (10,4)
mpl.rcParams['figure.dpi'] = 150
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100

# Use Python Logging facilities
* Using a logger from the beginning will make the transition into the PUDL package easier.
* Creating a logging handler here will also allow you to see the logging output coming from PUDL and other underlying packages.

In [4]:
logger=logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.handlers = [handler]

# Define Functions

# Define Notebook Parameters

In [5]:
pudl_settings = pudl.workspace.setup.get_defaults()
display(pudl_settings)

ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])
display(ferc1_engine)

pudl_engine = sa.create_engine(pudl_settings['pudl_db'])
display(pudl_engine)

{'pudl_in': '/home/zane/code/catalyst/pudl-work',
 'data_dir': '/home/zane/code/catalyst/pudl-work/data',
 'settings_dir': '/home/zane/code/catalyst/pudl-work/settings',
 'pudl_out': '/home/zane/code/catalyst/pudl-work',
 'sqlite_dir': '/home/zane/code/catalyst/pudl-work/sqlite',
 'parquet_dir': '/home/zane/code/catalyst/pudl-work/parquet',
 'datapkg_dir': '/home/zane/code/catalyst/pudl-work/datapkg',
 'notebook_dir': '/home/zane/code/catalyst/pudl-work/notebook',
 'ferc1_db': 'sqlite:////home/zane/code/catalyst/pudl-work/sqlite/ferc1.sqlite',
 'pudl_db': 'sqlite:////home/zane/code/catalyst/pudl-work/sqlite/pudl.sqlite'}

Engine(sqlite:////home/zane/code/catalyst/pudl-work/sqlite/ferc1.sqlite)

Engine(sqlite:////home/zane/code/catalyst/pudl-work/sqlite/pudl.sqlite)

# Load Data

In [6]:
pudl_out = pudl.output.pudltabl.PudlTabl(pudl_engine=pudl_engine)

In [7]:
%%time
ferc714_out = pudl.output.ferc714.FERC714Respondents(pudl_out)
annualized = ferc714_out.annualize()
categorized = ferc714_out.categorize()
summarized = ferc714_out.summarize_demand()
fipsified = ferc714_out.fipsify()
counties_gdf = ferc714_out.georef_counties()

Running the interim EIA 861 ETL process! (~2 minutes)
Extracting eia861 spreadsheet data.


The data has not yet been validated, and the structure may change.


Transforming raw EIA 861 DataFrames for service_territory_eia861 concatenated across all years.
Assigned state FIPS codes for 100.00% of records.
Assigned county FIPS codes for 99.64% of records.
Transforming raw EIA 861 DataFrames for balancing_authority_eia861 concatenated across all years.
Started with 37622 missing BA Codes out of 38882 records (96.76%)
Ended with 12674 missing BA Codes out of 38882 records (32.60%)
Transforming raw EIA 861 DataFrames for sales_eia861 concatenated across all years.
Tidying the EIA 861 Sales table.
Dropped 0 duplicate records from EIA 861 Demand Response table, out of a total of 301045 records (0.0000% of all records). 
Performing value transformations on EIA 861 Sales table.
Transforming raw EIA 861 DataFrames for advanced_metering_infrastructure_eia861 concatenated across all years.
Tidying the EIA 861 Advanced Metering Infrastructure table.
Transforming raw EIA 861 DataFrames for demand_response_eia861 concatenated across all years.
Tidying the E

The data has not yet been validated, and the structure may change.


Extracting demand_monthly_ba_ferc714 from CSV into pandas DataFrame.
Extracting net_energy_load_ba_ferc714 from CSV into pandas DataFrame.
Extracting adjacency_ba_ferc714 from CSV into pandas DataFrame.
Extracting interchange_ba_ferc714 from CSV into pandas DataFrame.
Extracting lambda_hourly_ba_ferc714 from CSV into pandas DataFrame.
Extracting lambda_description_ferc714 from CSV into pandas DataFrame.
Extracting description_pa_ferc714 from CSV into pandas DataFrame.
Extracting demand_forecast_pa_ferc714 from CSV into pandas DataFrame.
Extracting demand_hourly_pa_ferc714 from CSV into pandas DataFrame.
Transforming respondent_id_ferc714.
Transforming id_certification_ferc714.
Transforming gen_plants_ba_ferc714.
Transforming demand_monthly_ba_ferc714.
Transforming net_energy_load_ba_ferc714.
Transforming adjacency_ba_ferc714.
Transforming interchange_ba_ferc714.
Transforming lambda_hourly_ba_ferc714.
Transforming lambda_description_ferc714.
Transforming description_pa_ferc714.
Transfor

In [8]:
annualized.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2968 entries, 0 to 2967
Data columns (total 4 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   respondent_id_ferc714    2968 non-null   Int64         
 1   respondent_name_ferc714  2968 non-null   string        
 2   eia_code                 2954 non-null   Int64         
 3   report_date              2968 non-null   datetime64[ns]
dtypes: Int64(2), datetime64[ns](1), string(1)
memory usage: 121.7 KB


In [9]:
annualized.sample(10)

Unnamed: 0,respondent_id_ferc714,respondent_name_ferc714,eia_code,report_date
394,135,City of Burbank,2507,2008-01-01
77,106,Alliant Energy-West,9392,2013-01-01
198,115,"Arizona Electric Power Cooperative, Inc.",796,2008-01-01
1951,251,South Carolina Public Service Authority,17543,2011-01-01
2034,257,Southwest Power Pool (SPP),59504,2010-01-01
796,164,Entergy Corporation/Services (Entergy System),12506,2018-01-01
2594,299,Southern Power Company,16687,2010-01-01
65,105,Alliant Energy-East,20856,2015-01-01
1214,197,Louisville Gas & Electric and Kentucky Utilities,11249,2016-01-01
302,123,Boston Edison Company (NSTAR),1998,2014-01-01


In [10]:
categorized.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2968 entries, 0 to 2967
Data columns (total 10 columns):
 #   Column                        Non-Null Count  Dtype         
---  ------                        --------------  -----         
 0   eia_code                      2954 non-null   Int64         
 1   respondent_type               2870 non-null   category      
 2   respondent_id_ferc714         2968 non-null   Int64         
 3   respondent_name_ferc714       2968 non-null   string        
 4   report_date                   2968 non-null   datetime64[ns]
 5   balancing_authority_id_eia    1806 non-null   Int64         
 6   balancing_authority_code_eia  1176 non-null   category      
 7   balancing_authority_name_eia  1806 non-null   string        
 8   utility_id_eia                1064 non-null   Int64         
 9   utility_name_eia              1064 non-null   string        
dtypes: Int64(4), category(2), datetime64[ns](1), string(3)
memory usage: 229.3 KB


In [11]:
categorized.sample(10)

Unnamed: 0,eia_code,respondent_type,respondent_id_ferc714,respondent_name_ferc714,report_date,balancing_authority_id_eia,balancing_authority_code_eia,balancing_authority_name_eia,utility_id_eia,utility_name_eia
56,3285,utility,114,Aquila Networks (West Plains Energy - Kansas),2006-01-01,,,,3285.0,UtiliCorp United
730,40211,utility,271,"Wabash Valley Power Association, Inc.",2008-01-01,,,,40211.0,"Wabash Valley Power Assn, Inc"
716,19497,utility,268,United Illuminating Company,2008-01-01,,,,19497.0,United Illuminating Co
696,9996,balancing_authority,187,Kansas City Board of Public Utilities & Wyando...,2016-01-01,9996.0,KACY,Kansas City City of,,
1287,16868,balancing_authority,247,Seattle City Light,2019-01-01,16868.0,SCL,Seattle City of,,
586,40229,utility,295,"Old Dominion Elec. Coop., Inc.-Delmarva P&L Pl...",2018-01-01,,,,40229.0,Old Dominion Electric Coop
371,4045,balancing_authority,144,Columbia (MO) Water & Light,2013-01-01,4045.0,,Columbia City of,,
345,7601,utility,176,Green Mountain Power Corporation,2015-01-01,,,,7601.0,Green Mountain Power Corp
976,56456,utility,313,Plum Point Energy Associates (PLUM) -SERC REGION,2016-01-01,,,,56456.0,Amos Schwarzrock
829,21584,utility,286,Gen-Sys Energy,2009-01-01,,,,21584.0,GEN-SYS Energy


In [12]:
summarized.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2968 entries, 0 to 2967
Data columns (total 11 columns):
 #   Column                        Non-Null Count  Dtype         
---  ------                        --------------  -----         
 0   report_date                   2968 non-null   datetime64[ns]
 1   respondent_id_ferc714         2968 non-null   Int64         
 2   demand_annual_mwh             2968 non-null   float64       
 3   eia_code                      2954 non-null   Int64         
 4   respondent_type               2870 non-null   category      
 5   respondent_name_ferc714       2968 non-null   string        
 6   balancing_authority_id_eia    1806 non-null   Int64         
 7   balancing_authority_code_eia  1176 non-null   category      
 8   balancing_authority_name_eia  1806 non-null   string        
 9   utility_id_eia                1064 non-null   Int64         
 10  utility_name_eia              1064 non-null   string        
dtypes: Int64(4), category(2), date

In [13]:
summarized.sample(10)

Unnamed: 0,report_date,respondent_id_ferc714,demand_annual_mwh,eia_code,respondent_type,respondent_name_ferc714,balancing_authority_id_eia,balancing_authority_code_eia,balancing_authority_name_eia,utility_id_eia,utility_name_eia
1783,2014-01-01,198,0.0,11479,balancing_authority,Madison Gas & Electric Company,11479.0,,Madison Gas & Electric Co,,
1628,2013-01-01,256,0.0,40580,balancing_authority,Southern Minnesota Municipal Power Agency,40580.0,,Southern Minnesota Mun P Agny,,
1116,2011-01-01,164,127655791.0,12506,balancing_authority,Entergy Corporation/Services (Entergy System),12506.0,,Entergy Electric System,,
2327,2016-01-01,326,0.0,10620,balancing_authority,City of Lake Worth,10620.0,,Lake Worth City of,,
2774,2019-01-01,120,0.0,1692,balancing_authority,Big Rivers Electric Corporation,1692.0,,Big Rivers Electric Corp,,
2360,2017-01-01,135,1135319.0,2507,utility,City of Burbank,,,,2507.0,City of Burbank Water and Power
2646,2018-01-01,214,0.0,13356,utility,Northeast Utilities Service Company,,,,13356.0,New England Hydro-Trans Corp
783,2009-01-01,259,29861920.0,17718,balancing_authority,Southwestern Public Service Company (Xcel),17718.0,SPS,Southwestern Public Service Co,,
2518,2017-01-01,300,0.0,13809,utility,NorthWestern Energy (South Dakota),,,,13809.0,NorthWestern Energy - (SD)
414,2007-01-01,321,0.0,56669,balancing_authority,MISO,56669.0,MISO,Midwest Indep System Operator,,


In [14]:
fipsified.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100294 entries, 0 to 2967
Data columns (total 14 columns):
 #   Column                        Non-Null Count   Dtype         
---  ------                        --------------   -----         
 0   eia_code                      100280 non-null  Int64         
 1   respondent_type               100196 non-null  category      
 2   respondent_id_ferc714         100294 non-null  Int64         
 3   respondent_name_ferc714       100294 non-null  string        
 4   report_date                   100294 non-null  datetime64[ns]
 5   balancing_authority_id_eia    91877 non-null   Int64         
 6   balancing_authority_code_eia  82250 non-null   category      
 7   balancing_authority_name_eia  91877 non-null   string        
 8   utility_id_eia                8319 non-null    Int64         
 9   utility_name_eia              8319 non-null    string        
 10  state                         98818 non-null   string        
 11  county         

In [15]:
fipsified.sample(10)

Unnamed: 0,eia_code,respondent_type,respondent_id_ferc714,respondent_name_ferc714,report_date,balancing_authority_id_eia,balancing_authority_code_eia,balancing_authority_name_eia,utility_id_eia,utility_name_eia,state,county,state_id_fips,county_id_fips
87582,56669,balancing_authority,321,MISO,2014-01-01,56669,MISO,Midwest Indep System Operator,,,MI,Calhoun,26,26025
35451,14725,balancing_authority,301,PJM Interconnection Eastern Hub,2017-01-01,14725,PJM,PJM Interconnection,,,WV,Lewis,54,54041
54629,14725,balancing_authority,305,PJM Interconnection Dominion Hub,2012-01-01,14725,PJM,PJM Interconnection,,,PA,Allegheny,42,42003
39100,14725,balancing_authority,302,PJM Interconnection Western Hub,2014-01-01,14725,PJM,PJM Interconnection,,,KY,Clay,21,21051
73771,59504,balancing_authority,257,Southwest Power Pool (SPP),2015-01-01,59504,SWPP,Southwest Power Pool,,,ND,Barnes,38,38003
65903,15466,balancing_authority,235,Public Service Company of Colorado,2013-01-01,15466,PSCO,Public Service Co of Colorado,,,CO,Alamosa,8,8003
36101,14725,balancing_authority,301,PJM Interconnection Eastern Hub,2018-01-01,14725,PJM,PJM Interconnection,,,PA,Susquehanna,42,42115
57428,14725,balancing_authority,305,PJM Interconnection Dominion Hub,2017-01-01,14725,PJM,PJM Interconnection,,,OH,Sandusky,39,39143
48323,14725,balancing_authority,304,PJM Interconnection North Illinois Hub,2010-01-01,14725,PJM,PJM Interconnection,,,PA,Sullivan,42,42113
22049,13501,balancing_authority,211,"New York Independent System Operator, Inc.",2006-01-01,13501,NYIS,ISO New York,,,NY,Jefferson,36,36045


In [16]:
counties_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 100294 entries, 0 to 100293
Data columns (total 16 columns):
 #   Column                        Non-Null Count   Dtype         
---  ------                        --------------   -----         
 0   county_id_fips                98800 non-null   string        
 1   county_name_census            98783 non-null   object        
 2   geometry                      98783 non-null   geometry      
 3   eia_code                      100280 non-null  Int64         
 4   respondent_type               100196 non-null  category      
 5   respondent_id_ferc714         100294 non-null  Int64         
 6   respondent_name_ferc714       100294 non-null  string        
 7   report_date                   100294 non-null  datetime64[ns]
 8   balancing_authority_id_eia    91877 non-null   Int64         
 9   balancing_authority_code_eia  82250 non-null   category      
 10  balancing_authority_name_eia  91877 non-null   string        
 11  utili

In [17]:
counties_gdf.sample(10)

Unnamed: 0,county_id_fips,county_name_census,geometry,eia_code,respondent_type,respondent_id_ferc714,respondent_name_ferc714,report_date,balancing_authority_id_eia,balancing_authority_code_eia,balancing_authority_name_eia,utility_id_eia,utility_name_eia,state,county,state_id_fips
99030,,,,9273,balancing_authority,184,Indianapolis Power & Light Company,2015-01-01,9273.0,,Indianapolis Power & Light Co,,,,,
21891,24015.0,Cecil County,"MULTIPOLYGON (((-76.05719 39.42021, -76.05525 ...",14725,balancing_authority,305,PJM Interconnection Dominion Hub,2006-01-01,14725.0,PJM,PJM Interconnection,,,MD,Cecil,24.0
30130,31083.0,Harlan County,"MULTIPOLYGON (((-99.51785 40.00197, -99.52069 ...",28503,balancing_authority,273,Western Area Power Administration - Colorado-M...,2018-01-01,28503.0,WACM,Colorado Missouri/Loveland,,,NE,Harlan,31.0
33699,36101.0,Steuben County,"MULTIPOLYGON (((-77.09975 42.27422, -77.09974 ...",13501,balancing_authority,211,"New York Independent System Operator, Inc.",2009-01-01,13501.0,NYIS,ISO New York,,,NY,Steuben,36.0
59064,28115.0,Pontotoc County,"MULTIPOLYGON (((-89.09212 34.38020, -89.09200 ...",18642,balancing_authority,263,Tennessee Valley Authority,2013-01-01,18642.0,TVA,Tennessee Valley Authority,,,MS,Pontotoc,28.0
17885,36099.0,Seneca County,"MULTIPOLYGON (((-76.73721 42.95004, -76.73687 ...",13511,utility,212,New York State Electric & Gas Corporation,2017-01-01,,,,13511.0,New York State Elec & Gas Corp,NY,Seneca,36.0
64911,29025.0,Caldwell County,"MULTIPOLYGON (((-94.20669 39.61419, -94.20663 ...",19436,balancing_authority,108,Ameren Corporation Control Area,2009-01-01,19436.0,,Union Electric Co,,,MO,Caldwell,29.0
31402,13171.0,Lamar County,"MULTIPOLYGON (((-84.24837 33.07856, -84.24837 ...",7140,utility,296,Georgia Power Company,2013-01-01,,,,7140.0,Georgia Power Co,GA,Lamar,13.0
52483,21187.0,Owen County,"MULTIPOLYGON (((-85.00666 38.55752, -85.00770 ...",14725,balancing_authority,301,PJM Interconnection Eastern Hub,2018-01-01,14725.0,PJM,PJM Interconnection,,,KY,Owen,21.0
15031,4021.0,Pinal County,"MULTIPOLYGON (((-111.68604 33.20464, -111.6859...",25471,balancing_authority,275,Western Area Power Administration - Upper Miss...,2018-01-01,25471.0,WALC,USDOE-WAPA-Upper Missouri-West,,,AZ,Pinal,4.0


In [18]:
# This takes 45 minutes so...
#respondents_gdf = ferc714_out.georef_respondents()
#display(respondents_gdf.info())
#respondents_gdf.sample(10)