In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# Standard libraries
import logging
import sys

import pandas as pd
import sqlalchemy as sa
import matplotlib as mpl
import matplotlib.pyplot as plt


import pudl
from pudl_ct.guts import *

mpl.style.use('dark_background')
pd.options.display.max_columns = None

In [None]:
logger=logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.handlers = [handler]

In [None]:
pudl_settings = pudl.workspace.setup.get_defaults()
pudl_engine = sa.create_engine(pudl_settings['pudl_db'])

In [None]:
pudl_out = pudl.output.pudltabl.PudlTabl(
    pudl_engine,
    freq='AS',
    fill_fuel_cost=True,
    roll_fuel_cost=True,
    fill_net_gen=False,
)

In [None]:
%%time
# this takes a few minutes to run... it is mostly the first two lines
gen = prep_gens_eia(pudl_out)
unit = agg_gen_eia(gen=gen, pudl_out=pudl_out)

steam_df = prep_plants_ferc(pudl_out)
steam_by_fuel = agg_plants_ferc_by_plant_fuel(steam_df)
gens_w_ferc1 = merge_eia_ferc(gen=gen, unit=unit, steam_df=steam_df, steam_by_fuel=steam_by_fuel, pudl_out=pudl_out)
gens_w_ferc1_nems = add_nems(gens_w_ferc1, pudl_out, path_nems).pipe(fill_in_opex_w_nems)

In [None]:
%%time
# this takes several minutes.. it is aggregating the hourly cems records
epacems_path = pathlib.Path(pudl_settings['parquet_dir']) / 'epacems'
cems_by_boiler = get_cems(epacems_path)

In [None]:
eia_cems_merge = stuff(cems_byplant=cems_by_boiler, gen=gen, pudl_out=pudl_out)

In [None]:
gens_w_ferc1_nems.to_csv(pathlib.Path.cwd().parent / 'outputs/carbon-tracker-plant-compilation.csv.gz')
eia_cems_merge.to_csv(pathlib.Path.cwd().parent / 'outputs/carbon-tracker-cems.csv.gz')

### Charting the different FERC methods

In [None]:
eia_ferc_fuel = merge_eia_ferc_simple(unit=unit, steam_df=steam_df, steam_by_fuel=steam_by_fuel)
#eia_ferc_unit = merge_eia_ferc_unit(gen)

In [None]:
for fuel in ['coal', 'gas']:
    fuel_df = eia_ferc_fuel[
        (eia_ferc_fuel.fuel_type_code_pudl == fuel)
        & (eia_ferc_fuel.report_date.dt.year == 2019)
    ]
    plt.hist(
        fuel_df.opex_nonfuel_per_mwh, 
        weights=fuel_df.net_generation_mwh,
        range=(-10,50),
        bins=100,
        label=fuel
    )
plt.legend()
plt.show()

In [None]:
non_matching = gens_w_ferc1[
    (gens_w_ferc1.opex_nonfuel_per_mwh_plant_fuel !=
    gens_w_ferc1.opex_nonfuel_per_mwh_unit)
    & gens_w_ferc1.opex_nonfuel_per_mwh_plant_fuel.notnull()
    & gens_w_ferc1.opex_nonfuel_per_mwh_unit.notnull()
]

In [None]:
fig, (ax) = plt.subplots(ncols=1, nrows=1, figsize=(10, 10))
ax.scatter(non_matching.opex_nonfuel_per_mwh_plant_fuel,
           non_matching.opex_nonfuel_per_mwh_unit,
           color='aquamarine', alpha=0.1, 
           #label=field
          )
lims = (1e0, 1e5)
ax.set_ylim(lims)
ax.set_xlim(lims)

ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel('Opex Non-fuel ($/MWh) plant_fuel')
ax.set_ylabel('Opex Non-fuel ($/MWh) plant_unit')
ax.set_title(f"Non-Fuel Opex plant_fuel vs. plant_unit (Non-matching)", {'fontsize': 18,'fontweight' : 'bold'})

In [None]:
fig, (ax) = plt.subplots(ncols=1, nrows=1, figsize=(10, 10))
for fuel_type in ['gas', 'coal']:
    non_matching_fuel = non_matching[non_matching.fuel_type_code_pudl == fuel_type]
    ax.scatter(non_matching_fuel.opex_nonfuel_per_mwh_plant_fuel,
               non_matching_fuel.opex_nonfuel_per_mwh_unit,
               #color='aquamarine',
               alpha=0.3, 
               label=fuel_type
              )
    lims = (1e0, 1e3)
ax.set_ylim(lims)
ax.set_xlim(lims)
ax.legend()

ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel('Opex Non-fuel ($/MWh) plant_fuel')
ax.set_ylabel('Opex Non-fuel ($/MWh) plant_unit')
ax.set_title(f"Non-Fuel Opex plant_fuel vs. plant_unit (Non-matching)", {'fontsize': 18,'fontweight' : 'bold'})

In [None]:
fig, (ax) = plt.subplots(ncols=1, nrows=1, figsize=(10, 10))
#for year in non_matching.report_date.sort_values().dt.year.unique():
for year in [2017,2018,2019]:
    non_matching_year = non_matching[non_matching.report_date.dt.year == year]
    ax.scatter(non_matching_year.opex_nonfuel_per_mwh_plant_fuel,
               non_matching_year.opex_nonfuel_per_mwh_unit,
               #color='aquamarine',
               alpha=0.3, 
               label=year
              )
    lims = (1e0, 1e5)
ax.set_ylim(lims)
ax.set_xlim(lims)
ax.legend()

ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel('Opex Non-fuel ($/MWh) plant_fuel')
ax.set_ylabel('Opex Non-fuel ($/MWh) plant_unit')
ax.set_title(f"Non-Fuel Opex plant_fuel vs. plant_unit {year} (Non-matching)", {'fontsize': 18,'fontweight' : 'bold'})