In [1]:
import sys
import os
import numpy as np
import pandas as pd
import sqlalchemy as sa
sys.path.append(os.path.abspath(os.path.join('..','..','..')))
from pudl import pudl, ferc1, eia923, settings, constants
from pudl import models, models_ferc1, models_eia923
from pudl import clean_eia923, clean_ferc1, clean_pudl
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
pudl_engine  = pudl.db_connect_pudl()

In [3]:
#Pull in the generation table
g_tbl = models.PUDLBase.metadata.tables['generation_eia923']
g_select = sa.sql.select([g_tbl,]) #creates a sql Select object
g = pd.read_sql(g_select, pudl_engine) #converts sql object to pandas dataframe

In [4]:
g = g.set_index(pd.DatetimeIndex(g['report_date']))
g_gb = g.groupby(by=['plant_id',pd.TimeGrouper(freq='A'),'generator_id'])
g_summed = g_gb.agg({'net_generation_mwh':np.sum})

In [5]:
g_summed.reset_index(inplace=True)

In [6]:
g_summed

Unnamed: 0,plant_id,report_date,generator_id,net_generation_mwh
0,3,2009-12-31,1,221908.000
1,3,2009-12-31,2,394031.000
2,3,2009-12-31,3,1286393.000
3,3,2009-12-31,4,1626547.000
4,3,2009-12-31,5,4513101.000
5,3,2009-12-31,A1ST,1122697.000
6,3,2009-12-31,A2ST,1033733.000
7,3,2010-12-31,1,435334.000
8,3,2010-12-31,2,538495.000
9,3,2010-12-31,3,906322.000


In [7]:
#Pull in the generator_eia860 table
g8_tbl = models.PUDLBase.metadata.tables['generators_eia860']
g8_select = sa.sql.select([g8_tbl,]) #creates a sql Select object
g8 = pd.read_sql(g8_select, pudl_engine) #converts sql object to pandas dataframe

In [8]:
g8 = g8[['plant_id','generator_id', 'energy_source_1']]
g8 = g8.rename(columns={'energy_source_1': 'energy_source'})
g8.drop_duplicates(['plant_id','generator_id'], inplace=True)

In [9]:
net_gen = g_summed.merge(g8, how='left', on=['plant_id','generator_id'])

In [10]:
net_gen

Unnamed: 0,plant_id,report_date,generator_id,net_generation_mwh,energy_source
0,3,2009-12-31,1,221908.000,BIT
1,3,2009-12-31,2,394031.000,BIT
2,3,2009-12-31,3,1286393.000,BIT
3,3,2009-12-31,4,1626547.000,BIT
4,3,2009-12-31,5,4513101.000,BIT
5,3,2009-12-31,A1ST,1122697.000,NG
6,3,2009-12-31,A2ST,1033733.000,NG
7,3,2010-12-31,1,435334.000,BIT
8,3,2010-12-31,2,538495.000,BIT
9,3,2010-12-31,3,906322.000,BIT


In [11]:
#Pull in the fuel_receipts table
frc_tbl = models.PUDLBase.metadata.tables['fuel_receipts_costs_eia923']
frc_select = sa.sql.select([frc_tbl,]) #creates a sql Select object
frc = pd.read_sql(frc_select, pudl_engine) #converts sql object to pandas dataframe

In [12]:
frc = frc.set_index(pd.DatetimeIndex(frc['report_date']))
frc_gb = frc.groupby(by=['plant_id',pd.TimeGrouper(freq='A'),'energy_source'])
frc_summed = frc_gb.agg({'fuel_cost':np.sum})

In [13]:
frc_summed = frc_summed.reset_index()

In [14]:
fuel_cost_per_mwh = net_gen.merge(frc_summed,how='left',on=['plant_id','report_date','energy_source'])

In [15]:
fuel_cost_per_mwh['fuel_cost_per_mwh'] = fuel_cost_per_mwh['fuel_cost'] / fuel_cost_per_mwh['net_generation_mwh']

In [16]:
fuel_cost_per_mwh

Unnamed: 0,plant_id,report_date,generator_id,net_generation_mwh,energy_source,fuel_cost,fuel_cost_per_mwh
0,3,2009-12-31,1,221908.000,BIT,17954.9,0.080911
1,3,2009-12-31,2,394031.000,BIT,17954.9,0.045567
2,3,2009-12-31,3,1286393.000,BIT,17954.9,0.013958
3,3,2009-12-31,4,1626547.000,BIT,17954.9,0.011039
4,3,2009-12-31,5,4513101.000,BIT,17954.9,0.003978
5,3,2009-12-31,A1ST,1122697.000,NG,10352.0,0.009221
6,3,2009-12-31,A2ST,1033733.000,NG,10352.0,0.010014
7,3,2010-12-31,1,435334.000,BIT,19198.2,0.044100
8,3,2010-12-31,2,538495.000,BIT,19198.2,0.035652
9,3,2010-12-31,3,906322.000,BIT,19198.2,0.021183
