In [1]:
import sys
import os
import numpy as np
import pandas as pd
import sqlalchemy as sa
sys.path.append(os.path.abspath(os.path.join('..','..','..')))
from pudl import pudl, ferc1, eia923, settings, constants, analysis
from pudl import models, models_ferc1, models_eia923
from pudl import clean_eia923, clean_ferc1, clean_pudl
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
pudl_engine  = pudl.db_connect_pudl()

In [3]:
# Convert the generation_eia923 table into a dataframe
g9 = analysis.simple_select('generation_eia923', pudl_engine)
# Get yearly net generation by plant_id, year and generator_id
g9_summed = analysis.yearly_sum_eia(g9 , 'net_generation_mwh')
g9_summed.reset_index(inplace=True)

In [4]:
# Convert the generators_eia860 table into a dataframe
g8 = analysis.simple_select('generators_eia860', pudl_engine)
g8_es = g8[['plant_id','generator_id', 'energy_source_1']]
g8_es = g8_es.rename(columns={'energy_source_1': 'energy_source'})
g8_es.drop_duplicates(['plant_id','generator_id'], inplace=True)

In [5]:
# Merge the generation table with the generator table to include energy_source
net_gen = g9_summed.merge(g8_es, how='left', on=['plant_id','generator_id'])

In [6]:
# Convert the fuel_receipts_costs_eia923 table into a dataframe
frc9 = analysis.simple_select('fuel_receipts_costs_eia923', pudl_engine)

In [7]:
frc9['fuel_cost'] = frc9['fuel_quantity'] * frc9['average_heat_content'] * frc9['fuel_cost_per_mmbtu']

In [8]:
frc9

Unnamed: 0,fuel_receipt_id,plant_id,report_date,contract_type,contract_expiration_date,energy_source,fuel_group,coalmine_id,supplier,fuel_quantity,average_heat_content,average_sulfur_content,average_ash_content,average_mercury_content,fuel_cost_per_mmbtu,primary_transportation_mode,secondary_transportation_mode,natural_gas_transport,fuel_cost
0,1,3,2009-01-01,C,2009-12-01,BIT,Coal,1.0,Alabama Coal,120393.0,24.000,1.00,12.0,,63110.0,RV,,,1.823521e+11
1,2,3,2009-01-01,C,2013-12-01,BIT,Coal,2.0,Interocean Coal,199388.0,23.000,1.00,9.5,,35030.0,RV,,,1.606449e+11
2,3,3,2009-01-01,C,2013-12-01,BIT,Coal,2.0,Interocean Coal,43105.0,22.785,0.46,5.7,,35570.0,RV,,,3.493498e+10
3,4,3,2009-01-01,C,2011-12-01,BIT,Coal,2.0,Interocean Coal,9458.0,23.790,1.00,13.3,,49800.0,RV,,,1.120529e+10
4,5,3,2009-01-01,C,2009-12-01,BIT,Coal,1.0,Alabama Coal,9094.0,24.000,1.00,12.0,,62900.0,RV,,,1.372830e+10
5,6,3,2009-01-01,C,,NG,Natural Gas,,Various,1902799.0,1.036,0.00,0.0,,68090.0,PL,,F,1.342258e+11
6,7,3,2009-01-01,C,,NG,Natural Gas,,Various,28469.0,1.045,0.00,0.0,,56800.0,PL,,F,1.689806e+09
7,8,7,2009-01-01,S,,BIT,Coal,3.0,Alabama Coal,21205.0,24.908,1.66,13.1,,39760.0,TR,,F,2.100020e+10
8,9,7,2009-01-01,S,,NG,Natural Gas,,Various,3189.0,1.014,0.00,0.0,,63810.0,,,F,2.063390e+08
9,10,7,2009-01-01,S,,NG,Natural Gas,,Various,11.0,1.009,0.00,0.0,,61210.0,,,F,6.793698e+05


In [9]:
# Get yearly fuel cost by plant_id, year and energy_source
frc9_summed = analysis.yearly_sum_eia(frc9 , 'fuel_cost', columns=['plant_id','report_date','energy_source'])
frc9_summed = frc9_summed.reset_index()

In [10]:
fuel_cost_per_mwh = net_gen.merge(frc9_summed,how='left',on=['plant_id','report_date','energy_source'])
fuel_cost_per_mwh['fuel_cost_per_mwh'] = fuel_cost_per_mwh['fuel_cost'] / fuel_cost_per_mwh['net_generation_mwh']

In [11]:
fuel_cost_per_mwh

Unnamed: 0,plant_id,report_date,generator_id,net_generation_mwh,energy_source,fuel_cost,fuel_cost_per_mwh
0,3,2009,1,221908.000,BIT,3.019373e+12,1.360642e+07
1,3,2009,2,394031.000,BIT,3.019373e+12,7.662779e+06
2,3,2009,3,1286393.000,BIT,3.019373e+12,2.347162e+06
3,3,2009,4,1626547.000,BIT,3.019373e+12,1.856308e+06
4,3,2009,5,4513101.000,BIT,3.019373e+12,6.690240e+05
5,3,2009,A1ST,1122697.000,NG,1.881205e+12,1.675612e+06
6,3,2009,A2ST,1033733.000,NG,1.881205e+12,1.819817e+06
7,3,2010,1,435334.000,BIT,3.124745e+12,7.177810e+06
8,3,2010,2,538495.000,BIT,3.124745e+12,5.802737e+06
9,3,2010,3,906322.000,BIT,3.124745e+12,3.447720e+06
