# MCOE Compilation

In [249]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [251]:
import pudl
import pandas as pd
import sqlalchemy as sa
import sys
import logging
import pathlib
import json

In [252]:
# basic setup for logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.handlers = [handler]
pd.options.display.max_columns = None

In [253]:
# pudl_settings is a dictionary that includes the paths to several key pudl directories
pudl_settings = pudl.workspace.setup.get_defaults()
pudl_engine = sa.create_engine(pudl_settings["pudl_db"])

In [254]:
# the creation of the pudl_out object
# this will compile output tables like mcoe - if you want to restart/wipe the saved elements rerun this cell
pudl_out = pudl.output.pudltabl.PudlTabl(pudl_engine, freq='AS', rolling=True)

In [255]:
# run mcoe method... the original function is in pudl.analysis.mcoe.mcoe()
mcoe = pudl_out.mcoe()

filling in fuel cost NaNs with rolling averages


## Part 1: Basic Plant & Unit Information

#### Plant Level Data:  *mcoe_plant_df*
Subdivided by broad fuel type (coal, gas, oil, waste) by most recent reporting date.

In [256]:
# grouped by plant id and fuel type -- return value for most recent year
mcoe_plant_df = mcoe.groupby([
    'plant_id_eia',
    'fuel_type_code_pudl',
    'plant_name_eia',
    'state',
    'city',
    'latitude',
    'longitude'],as_index=False)['report_date'].max()

In [257]:
mcoe_plant_df

Unnamed: 0,plant_id_eia,fuel_type_code_pudl,plant_name_eia,state,city,latitude,longitude,report_date
0,3,coal,Barry,AL,Bucks,31.006900,-88.010300,2017-01-01
1,3,gas,Barry,AL,Bucks,31.006900,-88.010300,2017-01-01
2,7,coal,Gadsden,AL,Gadsden,34.012800,-85.970800,2014-01-01
3,7,gas,Gadsden,AL,Gadsden,34.012800,-85.970800,2018-01-01
4,8,coal,Gorgas,AL,Parrish,33.644344,-87.196486,2018-01-01
...,...,...,...,...,...,...,...,...
612,58503,gas,Carty Generating Station,OR,Boardman,45.698611,-119.813056,2018-01-01
613,58557,gas,Mesquite Generating Station Block 1,AZ,Arlington,33.345000,-112.864167,2018-01-01
614,58697,gas,L V Sutton Combined Cycle,NC,Wilmington,34.283056,-77.985278,2018-01-01
615,59220,gas,Wildcat Point Generation Facility,MD,Conowingo,39.719364,-76.161625,2018-01-01


#### Unit Level Data: *mcoe_unit_df*
Subdivided by plant, generator, and broad fuel type by most recent reporting date.

In [258]:
mcoe_unit_pre = mcoe[['report_date',
                     'plant_id_eia',
                     'plant_name_eia',
                     'state',
                     'city',
                     'latitude',
                     'longitude',
                     'generator_id',
                     'nameplate_power_factor',
                     'generator_age_years',
                     'retirement_date',
                     'planned_retirement_date',
                     'technology_description',
                     'fuel_type_code_pudl',
                     'total_mmbtu',
                     'net_generation_mwh',
                     'heat_rate_mmbtu_mwh']]

In [261]:
# ensures most recent date at the top **(can't use ['col'].max() like before because will cut
# out certain columns--limit of 9 per groupby function)**
mcoe_unit1 = mcoe_unit_pre.sort_values(['plant_id_eia','generator_id','fuel_type_code_pudl','report_date'],ascending=[True,False,False,False])
# drops already retired plants
mcoe_unit2 = mcoe_unit1.loc[mcoe_new1['retirement_date'].isnull()]
mcoe_unit2 = mcoe_unit2.drop('retirement_date',axis=1)
# groups by fuel type per unit and gets rid of duplicates
mcoe_unit_df = mcoe_unit2.groupby(['plant_id_eia','generator_id','fuel_type_code_pudl'],as_index=False).first()

In [262]:
mcoe_unit_df

Unnamed: 0,plant_id_eia,generator_id,fuel_type_code_pudl,report_date,plant_name_eia,state,city,latitude,longitude,nameplate_power_factor,generator_age_years,planned_retirement_date,technology_description,total_mmbtu,net_generation_mwh,heat_rate_mmbtu_mwh
0,3,1,coal,2012-01-01,Barry,AL,Bucks,31.0069,-88.010300,,20.0,2019-12-01,,1.743140e+06,152102.0,11.460336
1,3,1,gas,2017-01-01,Barry,AL,Bucks,31.0069,-88.010300,0.85,20.0,2019-12-01,Natural Gas Steam Turbine,1.966533e+05,7221.0,27.233530
2,3,2,coal,2012-01-01,Barry,AL,Bucks,31.0069,-88.010300,,20.0,2019-12-01,,1.493954e+06,123410.0,12.105616
3,3,2,gas,2017-01-01,Barry,AL,Bucks,31.0069,-88.010300,0.85,20.0,2019-12-01,Natural Gas Steam Turbine,1.855619e+05,7498.0,24.748185
4,3,3,coal,2012-01-01,Barry,AL,Bucks,31.0069,-88.010300,,20.0,,,5.284706e+06,496557.0,10.642697
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1789,59338,CTG2,gas,2018-01-01,,AZ,Gila Bend,32.9750,-112.694444,0.85,17.0,,Natural Gas Fired Combined Cycle,3.375803e+06,462659.0,7.296524
1790,59338,ST9,gas,2018-01-01,,AZ,Gila Bend,32.9750,-112.694444,0.85,17.0,,Natural Gas Fired Combined Cycle,4.453222e+06,610321.0,7.296524
1791,59784,CTG5,gas,2017-01-01,Gila River Power Block 3,AZ,Gila Bend,32.9750,-112.694400,0.85,17.0,,Natural Gas Fired Combined Cycle,5.030924e+06,676536.0,7.436299
1792,59784,CTG6,gas,2017-01-01,Gila River Power Block 3,AZ,Gila Bend,32.9750,-112.694400,0.85,17.0,,Natural Gas Fired Combined Cycle,4.862373e+06,653870.0,7.436299
