In [1]:
import sys
import os
import numpy as np
import pandas as pd
import sqlalchemy as sa
sys.path.append(os.path.abspath(os.path.join('..','..','..')))
from pudl import pudl, ferc1, eia923, settings, constants, analysis
from pudl import models, models_ferc1, models_eia923
from pudl import clean_eia923, clean_ferc1, clean_pudl
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
pudl_engine  = pudl.db_connect_pudl()

### Pulling the required tables

In [3]:
# Convert the generation_eia923 table into a dataframe
g9 = analysis.simple_select('generation_eia923', pudl_engine)
# Get yearly net generation by plant_id, year and generator_id
g9_summed = analysis.yearly_sum_eia(g9 , 'net_generation_mwh')
g9_summed.reset_index(inplace=True)

In [4]:
# Convert the generators_eia860 table into a dataframe
g8 = analysis.simple_select('generators_eia860', pudl_engine)

# In order to use the energy source associated with each generator
g8_es = g8[['plant_id','generator_id', 'energy_source_1']]
g8_es = g8_es.rename(columns={'energy_source_1': 'energy_source'})
g8_es.drop_duplicates(['plant_id','generator_id'], inplace=True)

# In order to use the capacity of each generator
g8 = g8[['plant_id','generator_id','nameplate_capacity_mw','summer_capacity_mw','winter_capacity_mw','year']]
# Reformat the year column to a datetime object
g8['year'] = pd.to_datetime(g8['year'], format='%Y')
g8.rename(columns={'year': 'report_date'},inplace=True)
g8 = g8.set_index(pd.DatetimeIndex(g8['report_date']).year)
g8.drop('report_date',axis=1, inplace=True)
g8.reset_index(inplace=True)

In [5]:
# Convert the fuel_receipts_costs_eia923 table into a dataframe
frc9 = analysis.simple_select('fuel_receipts_costs_eia923', pudl_engine)
# Get yearly fuel cost by plant_id, year and energy_source
frc9_summed = analysis.yearly_sum_eia(frc9 , 'fuel_cost_per_mmbtu', columns=['plant_id','report_date','energy_source'])
frc9_summed = frc9_summed.reset_index()

In [6]:
frc9 = frc9.set_index(pd.DatetimeIndex(frc9['report_date']).year)
frc9.drop('report_date', axis=1, inplace=True)
frc9.reset_index(inplace=True)

In [7]:
frc9.drop_duplicates(subset=['report_date', 'plant_id','energy_source']).count()

report_date                      15815
fuel_receipt_id                  15815
plant_id                         15815
contract_type                    15801
contract_expiration_date          6904
energy_source                    15813
fuel_group                       15813
coalmine_id                       3924
supplier                         15815
fuel_quantity                    15815
average_heat_content             15815
average_sulfur_content           15815
average_ash_content              15815
average_mercury_content           6843
fuel_cost_per_mmbtu               8819
primary_transportation_mode      14029
secondary_transportation_mode     1179
natural_gas_transport             8260
dtype: int64

In [8]:
# Convert the boiler_fuel_eia923 table into a dataframe
bf9 = analysis.simple_select('boiler_fuel_eia923', pudl_engine)
bf9['fuel_consumed_mmbtu'] = bf9['fuel_qty_consumed']*bf9['fuel_mmbtu_per_unit']
# Get yearly fuel consumed by plant_id, year and boiler_id
bf9_summed = analysis.yearly_sum_eia(bf9 , 'fuel_consumed_mmbtu', columns=['plant_id','report_date','boiler_id'])
bf9_summed.reset_index(inplace=True)

In [9]:
# Convert the boiler_generator_assn_eia860 table into a dataframe
bga8 = analysis.simple_select('boiler_generator_assn_eia860', pudl_engine)
bga8.drop(['id','operator_id'],axis=1, inplace=True)

### Generating Capacity Factor

In [10]:
# merge the generation and capacity to calculate capacity factor
capacity_factor = g9_summed.merge(g8, on=['plant_id','generator_id','report_date'])
capacity_factor['capacity_factor'] =\
            capacity_factor['net_generation_mwh']/(capacity_factor['nameplate_capacity_mw']*8760)

In [11]:
# Remove unrealistic capacity factors: < 0 or > 1.5
capacity_factor = capacity_factor[capacity_factor.capacity_factor > 0]
capacity_factor = capacity_factor[capacity_factor.capacity_factor <= 1.5]

### Generating Fuel Cost

In [12]:
# Merge the generation table with the generator table to include energy_source
net_gen = g9_summed.merge(g8_es, how='left', on=['plant_id','generator_id'])
fuel_cost_per_mwh = net_gen.merge(frc9_summed,how='left',on=['plant_id','report_date','energy_source'])
fuel_cost_per_mwh['fuel_cost_per_mwh'] = fuel_cost_per_mwh['fuel_cost_per_mmbtu'] / fuel_cost_per_mwh['net_generation_mwh']

### Generating Heat Rate

In [13]:
bg = bf9_summed.merge(bga8,how='left',on=['plant_id','boiler_id'])

# Get yearly fuel_consumed_mmbtu by plant_id, year and generator_id
bg_gb = bg.groupby(by=['plant_id','report_date','generator_id'])
bg_summed = bg_gb.agg({'fuel_consumed_mmbtu':np.sum})
bg_summed.reset_index(inplace=True)

heat_rate = bg_summed.merge(g9_summed,how='left',on=['plant_id', 'report_date', 'generator_id'])
heat_rate['heat_rate_mmbtu_mwh'] = heat_rate['fuel_consumed_mmbtu'] / heat_rate['net_generation_mwh']

### Merge fields together

In [14]:
capacity_factor.count()

plant_id                 16000
report_date              16000
generator_id             16000
net_generation_mwh       16000
nameplate_capacity_mw    16000
summer_capacity_mw       15756
winter_capacity_mw       15750
capacity_factor          16000
dtype: int64

In [15]:
fuel_cost_per_mwh.count()

plant_id               27030
report_date            27030
generator_id           27030
net_generation_mwh     27030
energy_source          26476
fuel_cost_per_mmbtu     9997
fuel_cost_per_mwh       9997
dtype: int64

In [16]:
heat_rate.count()

plant_id               23450
report_date            23450
generator_id           23450
fuel_consumed_mmbtu    23450
net_generation_mwh     22459
heat_rate_mmbtu_mwh    21621
dtype: int64

In [17]:
mcoe_by_generator = fuel_cost_per_mwh.merge(heat_rate,on=['plant_id','report_date','generator_id'])

In [18]:
mcoe_by_generator = mcoe_by_generator.merge(capacity_factor, on=['plant_id','report_date','generator_id'])

In [19]:
mcoe_by_generator[mcoe_by_generator['plant_id'] == 3]

Unnamed: 0,plant_id,report_date,generator_id,net_generation_mwh_x,energy_source,fuel_cost_per_mmbtu,fuel_cost_per_mwh,fuel_consumed_mmbtu,net_generation_mwh_y,heat_rate_mmbtu_mwh,net_generation_mwh,nameplate_capacity_mw,summer_capacity_mw,winter_capacity_mw,capacity_factor
0,3,2011,1,312130.0,BIT,1398440.0,4.480313,7130630.0,312130.0,22.845065,312130.0,153.1,138.0,138.0,0.232732
1,3,2011,2,191475.0,BIT,1398440.0,7.303512,4572941.0,191475.0,23.882708,191475.0,153.1,137.0,137.0,0.142769
2,3,2011,3,710069.0,BIT,1398440.0,1.969442,15029960.0,710069.0,21.1669,710069.0,272.0,249.0,249.0,0.298008
3,3,2011,4,1175685.0,BIT,1398440.0,1.189468,23938650.0,1175685.0,20.361445,1175685.0,403.7,362.0,362.0,0.332451
4,3,2011,5,2264413.0,BIT,1398440.0,0.617573,46112000.0,2264413.0,20.363777,2264413.0,788.8,726.5,726.5,0.327706
5,3,2011,A1ST,1397495.0,NG,1028300.0,0.735817,402424.5,1397495.0,0.287961,1397495.0,195.2,173.0,192.0,0.817271
6,3,2011,A2ST,1381781.0,NG,1028300.0,0.744184,346711.6,1381781.0,0.250916,1381781.0,195.2,173.0,192.0,0.808082
7,3,2012,1,152102.0,BIT,1098330.0,7.22101,3486280.0,152102.0,22.920672,152102.0,153.1,138.0,138.0,0.113411
8,3,2012,2,123410.0,BIT,1098330.0,8.899846,2987908.0,123410.0,24.211231,123410.0,153.1,137.0,137.0,0.092018
9,3,2012,3,496557.0,BIT,1098330.0,2.211891,10569410.0,496557.0,21.285395,496557.0,272.0,249.0,249.0,0.208399


In [20]:
mcoe_by_generator.count()

plant_id                 12938
report_date              12938
generator_id             12938
net_generation_mwh_x     12938
energy_source            12938
fuel_cost_per_mmbtu       5032
fuel_cost_per_mwh         5032
fuel_consumed_mmbtu      12938
net_generation_mwh_y     12938
heat_rate_mmbtu_mwh      12938
net_generation_mwh       12938
nameplate_capacity_mw    12938
summer_capacity_mw       12836
winter_capacity_mw       12828
capacity_factor          12938
dtype: int64

In [21]:
del mcoe_by_generator['net_generation_mwh_y']
del mcoe_by_generator['net_generation_mwh_x']

In [None]:
mcoe_annotations = pd.read_csv('mcoe_field_annotations.csv')

In [24]:
xlsx_writer = pd.ExcelWriter('EIA_mcoe_2011-2015.xlsx')
mcoe_by_generator.to_excel(xlsx_writer, sheet_name='MCOE by Generator', index=False)
mcoe_annotations.to_excel(xlsx_writer, sheet_name='MCOE Annotations', index=False)
xlsx_writer.save()