# Test Old Years

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Standard libraries
import logging
import os
import pathlib
import sys

# 3rd party libraries
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas as pd
import seaborn as sns
import sqlalchemy as sa
from functools import reduce

# Local libraries
import pudl
import pudl.constants as pc

In [3]:
logger=logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.handlers = [handler]

In [4]:
pudl_settings = pudl.workspace.setup.get_defaults()
pudl_in = pathlib.Path(pudl_settings['pudl_in'])
ds = pudl.workspace.datastore.Datastore(pudl_in, sandbox=True)
#eia861_raw_dfs = pudl.extract.eia861.Extractor(ds).extract([2019])

In [5]:
datapkg_dir = '/Users/aesharpe/Desktop/Work/Catalyst_Coop/PUDL_DIR/datapkg/pudl-2008/eia-example/'

## Toggle Settings

In [6]:
eia_inputs = {
    "eia860_years": [2009, 2008],
    "eia860_tables": pudl.constants.pudl_tables["eia860"],
    "eia861_years": [],
    "eia861_tables": pudl.constants.pudl_tables["eia861"],
    "eia923_years": [2009],
    "eia923_tables": pudl.constants.pudl_tables["eia923"],
}

eia860_tables = eia_inputs["eia860_tables"]
eia860_years = eia_inputs["eia860_years"]
eia861_tables = eia_inputs["eia861_tables"]
eia861_years = eia_inputs["eia861_years"]
eia923_tables = eia_inputs["eia923_tables"]
eia923_years = eia_inputs["eia923_years"]

In [7]:
# generate CSVs for the static EIA tables, return the list of tables
static_tables = pudl.etl._load_static_tables_eia(datapkg_dir)

Loading Static EIA Tables fuel_type_eia923 dataframe into CSV
Loading Static EIA Tables prime_movers_eia923 dataframe into CSV
Loading Static EIA Tables fuel_type_aer_eia923 dataframe into CSV
Loading Static EIA Tables energy_source_eia923 dataframe into CSV
Loading Static EIA Tables transport_modes_eia923 dataframe into CSV


In [8]:
# Extract EIA forms 923, 860
eia860_raw_dfs = pudl.extract.eia860.Extractor(ds).extract(eia860_years)
#eia861_raw_dfs = pudl.extract.eia861.Extractor(ds).extract(eia861_years)
eia923_raw_dfs = pudl.extract.eia923.Extractor(ds).extract(eia923_years)

Extracting eia860 spreadsheet data.
Columns for boiler_generator_assn are off: should be 4 but got 6
Columns for generator are off: should be 48 but got 50
Columns for generator_existing are off: should be 76 but got 77
Columns for generator_proposed are off: should be 55 but got 56
Columns for generator_retired are off: should be 75 but got 76
Columns for ownership are off: should be 14 but got 15
Columns for plant are off: should be 45 but got 50
Columns for utility are off: should be 20 but got 21
Extracting eia923 spreadsheet data.


In [9]:
eia860_raw_dfs.keys()

dict_keys(['boiler_generator_assn', 'generator', 'generator_existing', 'generator_proposed', 'generator_retired', 'ownership', 'plant', 'utility'])

In [10]:
# Transform EIA forms 860, 861, 923
eia860_transformed_dfs = pudl.transform.eia860.transform(eia860_raw_dfs, eia860_tables=eia860_tables)
#eia861_transformed_dfs = pudl.transform.eia861.transform(eia861_raw_dfs, eia861_tables=eia861_tables)
eia923_transformed_dfs = pudl.transform.eia923.transform(eia923_raw_dfs, eia923_tables=eia923_tables)

Transforming raw EIA 860 DataFrames for ownership_eia860 concatenated across all years.
Transforming raw EIA 860 DataFrames for generators_eia860 concatenated across all years.
Transforming raw EIA 860 DataFrames for plants_eia860 concatenated across all years.
Transforming raw EIA 860 DataFrames for boiler_generator_assn_eia860 concatenated across all years.
Transforming raw EIA 860 DataFrames for utilities_eia860 concatenated across all years.
Transforming raw EIA 923 DataFrames for generation_fuel_eia923 concatenated across all years.
Transforming raw EIA 923 DataFrames for boiler_fuel_eia923 concatenated across all years.
Transforming raw EIA 923 DataFrames for generation_eia923 concatenated across all years.
Transforming raw EIA 923 DataFrames for coalmine_eia923 concatenated across all years.
Transforming raw EIA 923 DataFrames for fuel_receipts_costs_eia923 concatenated across all years.


In [11]:
#eia860_transformed_dfs['utilities_eia860'].columns.tolist()

In [12]:
# create an eia transformed dfs dictionary
eia_transformed_dfs = eia860_transformed_dfs.copy()
#eia_transformed_dfs.update(eia861_transformed_dfs.copy())
eia_transformed_dfs.update(eia923_transformed_dfs.copy())

In [13]:
# convert types..
eia_transformed_dfs = pudl.helpers.convert_dfs_dict_dtypes(eia_transformed_dfs, 'eia')

  mask = arr == x
  mask = arr == x


In [14]:
entities_dfs, eia_transformed_dfs = pudl.transform.eia.transform(
    eia_transformed_dfs,
    eia860_years=eia860_years,
    eia923_years=eia923_years,
)

Harvesting IDs & consistently static attributes for EIA plants
Average consistency of static plants values is 99.94%
Harvesting IDs & consistently static attributes for EIA generators
Average consistency of static generators values is 99.99%
Harvesting IDs & consistently static attributes for EIA utilities
Average consistency of static utilities values is 100.00%
Harvesting IDs & consistently static attributes for EIA boilers
Average consistency of static boilers values is 99.92%
Inferring complete EIA boiler-generator associations.
Multiple EIA unit codes:plant_id_eia=10725, unit_id_pudl=1, unit_id_eia=['F801' 'F802']
Multiple EIA unit codes:plant_id_eia=56309, unit_id_pudl=1, unit_id_eia=['G401' 'G402']


In [15]:
entities_dfs = pudl.helpers.convert_dfs_dict_dtypes(entities_dfs, 'eia')

  mask = arr == x


In [16]:
transformed_dfs = {"Entities": entities_dfs, "EIA": eia_transformed_dfs}

In [17]:
# LOAD step
for data_source, transformed_df in transformed_dfs.items():
    pudl.load.csv.dict_dump(transformed_df,
                            data_source,
                            datapkg_dir=datapkg_dir)
# return (
#     list(eia_transformed_dfs.keys())
#     + list(entities_dfs.keys())
#     + static_tables)

Loading Entities plants_entity_eia dataframe into CSV
Loading Entities generators_entity_eia dataframe into CSV
Loading Entities utilities_entity_eia dataframe into CSV
Loading Entities boilers_entity_eia dataframe into CSV
Loading EIA ownership_eia860 dataframe into CSV
Loading EIA generators_eia860 dataframe into CSV
Loading EIA plants_eia860 dataframe into CSV
Loading EIA boiler_generator_assn_eia860 dataframe into CSV
Loading EIA utilities_eia860 dataframe into CSV
Loading EIA generation_fuel_eia923 dataframe into CSV
Loading EIA boiler_fuel_eia923 dataframe into CSV
Loading EIA generation_eia923 dataframe into CSV
Loading EIA coalmine_eia923 dataframe into CSV
Loading EIA fuel_receipts_costs_eia923 dataframe into CSV


In [18]:
transformed_dfs['EIA'].keys()

dict_keys(['ownership_eia860', 'generators_eia860', 'plants_eia860', 'boiler_generator_assn_eia860', 'utilities_eia860', 'generation_fuel_eia923', 'boiler_fuel_eia923', 'generation_eia923', 'coalmine_eia923', 'fuel_receipts_costs_eia923'])