## Recreating the Master Unit List

#### setup/imports

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np
import pudl
import pudl.constants as pc
import pudl.extract.ferc1
import sqlalchemy as sa
import logging
import sys
import copy
from copy import deepcopy

In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline

In [None]:
import sys
sys.path.append("../") # go to parent dir
from plant_part_agg_eia import *
from plant_parts import plant_parts

In [None]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.handlers = [handler]

#### defining a table grabbing object

In [None]:
pudl_settings = pudl.workspace.setup.get_defaults()
pudl_engine = sa.create_engine(pudl_settings["pudl_db"])
pt = pudl.output.pudltabl.get_table_meta(pudl_engine)
start_date=None
end_date=None
freq='AS'

In [None]:
from plant_parts import plant_parts

In [None]:
self = CompileTables(pudl_engine, freq='AS')

In [None]:
compiled_plant_parts = self.generate_master_unit_list(plant_parts)

In [None]:
# 1) aggregate the data points by generator
plant_gen_df = self.aggregate_plant_part(plant_parts['plant_gen'])
# 2) generating proportional data by ownership %s
plant_gen_df = self.slice_by_ownership(plant_gen_df)

In [None]:
# 3) aggreate everything by each plant part
compiled_dfs = {}
for part_name, plant_part in plant_parts.items():
    logger.info(part_name)
    if plant_part['denorm_table']:
        logger.info('   denormiiee')
        compiled_dfs[part_name] = self.agg_cols(
            plant_part,
            self.denoramlize_table(plant_gen_df,
                                   plant_part['id_cols'],
                                   plant_part['denorm_table'],
                                   plant_part['denorm_cols'],
                                   ))
    else:
        compiled_dfs[part_name] = self.agg_cols(
            plant_part,
            plant_gen_df)

In [None]:
def plot_plant_vs_agg(compiled_plant_parts, field, xy_limits, scale):
    """
    Make plots to compare FERC & EIA reported values for Coal & Gas plants.
    
    For each of the fields specified in fields_to_plot, create a pair of plots,
    one for 'gas' and one for 'coal' in the same frame, showing the EIA quantity
    vs. the FERC quantity in a scatter plot.
    """
    for plant_gran, df in compiled_plant_parts.items():
        if plant_gran == 'plant':
            pass
        field_plant = field+'_plant'
        field_gran = field+'_'+plant_gran
        try:
            merge_df = compiled_plant_parts['plant'].merge(df, on=['plant_id_eia', 'report_date'], suffixes=('_plant','_' + plant_gran))
            # this is for the try
            merge_df[field_gran]
            fig, (ax) = plt.subplots(ncols=1, nrows=1, figsize=(8, 8))
            ax.scatter(merge_df[field_plant],
                       merge_df[field_gran],
                       color='black', alpha='0.1', label=field)
            ax.set_ylim(xy_limits[field][0],xy_limits[field][1])
            ax.set_xlim(xy_limits[field][0],xy_limits[field][1])
            ax.set_xscale(scale)
            ax.set_yscale(scale)
            ax.set_ylabel(f'{plant_gran} {field}')
            ax.set_xlabel(f'Plant {field}')
            ax.set_title(f"Plant vs {plant_gran}: {field}")
        except KeyError:
            pass

In [None]:
fields_to_plot = [
    # Simple Quantities
    'capacity_mw',
    #'opex_fuel',
    #'total_mmbtu',
    'net_generation_mwh',
    # Derived values
    #'capacity_factor',
    'heat_rate_mmbtu_mwh',
    'fuel_cost_per_mwh',
    'fuel_cost_per_mmbtu',
    'total_fuel_cost'
]

xy_limits = {
    # Simple Quantities
    'capacity_mw': (1e0, 1e4),
    'net_generation_mwh': (1e3,1e8),
    # Derived values
    'capacity_factor': (0,1.0),
    'heat_rate_mmbtu_mwh': (6,16),
    'fuel_cost_per_mwh': (10,80),
    'fuel_cost_per_mmbtu': (1e0,1e1),
    'total_fuel_cost': (1e7,1e10)
}

for field in fields_to_plot:
    plot_plant_vs_agg(compiled_plant_parts,field, xy_limits, scale="log")
#plot_eia_v_ferc1(pudl_dude, fields_to_plot, xy_limits)

## Playing with the compiled outputs 

In [None]:
# printing out the keys of the dictionary so you can see
compiled_plant_parts.keys()

In [None]:
# various tables to poke at for debuging
generat860 = self.grab_the_table('generation_eia923')
gens860 = self.grab_the_table('generators_eia860')
bga860 = self.grab_the_table('boiler_generator_assn_eia860')
own860 = self.grab_the_table('ownership_eia860')
plant_unit = compiled_plant_parts['plant_unit']
plant_gen = compiled_plant_parts['plant_gen']
plant = compiled_plant_parts['plant']

In [None]:
# if you want to look at an individaul plant
plant_unit[plant_unit['plant_id_eia'] ==6179]

In [None]:
# selecting on two criteria (plant_id_eia and report_date)
plant_unit[(plant_unit['plant_id_eia'] == 260)]

In [None]:
# you can see where fields are empty
plant_unit[plant_unit['capacity_mw'].isnull()]

In [None]:
# you can see where fields are not empty
plant_unit[plant_unit['capacity_mw'].notnull()]