In [8]:
import sys
import os
import numpy as np
import pandas as pd
import sqlalchemy as sa
sys.path.append(os.path.abspath(os.path.join('..','..','..')))
from pudl import pudl, ferc1, eia923, settings, constants
from pudl import models, models_ferc1, models_eia923
from pudl import clean_eia923, clean_ferc1, clean_pudl
import matplotlib.pyplot as plt
%matplotlib inline

In [9]:
pudl_engine  = pudl.connect_db()

In [12]:
#Pull in the generation table
g_tbl = models.PUDLBase.metadata.tables['generation_eia923']
g_select = sa.sql.select([g_tbl,]) #creates a sql Select object
g = pd.read_sql(g_select, pudl_engine) #converts sql object to pandas dataframe
#Set the datetimeindex
g = g.set_index(pd.DatetimeIndex(g['report_date'])) #sets report_date as index

In [14]:
#groupby plant_id and by year
g_yr = g.groupby([pd.TimeGrouper(freq='A'), 'plant_id', 'generator_id'])
#sum net_gen by year by plant
g_net_gen_gen = pd.DataFrame(g_yr.net_generation_mwh.sum())
g_net_gen_gen = g_net_gen_gen.reset_index(level=['generator_id'])
g_net_gen_gen

Unnamed: 0_level_0,Unnamed: 1_level_0,generator_id,net_generation_mwh
report_date,plant_id,Unnamed: 2_level_1,Unnamed: 3_level_1
2009-12-31,3,1,221908.00
2009-12-31,3,2,394031.00
2009-12-31,3,3,1286393.00
2009-12-31,3,4,1626547.00
2009-12-31,3,5,4513101.00
2009-12-31,3,A1ST,1122697.00
2009-12-31,3,A2ST,1033733.00
2009-12-31,7,1,212068.00
2009-12-31,7,2,51262.00
2009-12-31,8,10,3933248.00


In [15]:
#groupby plant_id and by year
g_net_gen_plant = g.groupby([pd.TimeGrouper(freq='A'), 'plant_id'])
#sum net_gen by year by plant and convert to datafram
g_net_gen_plant =  pd.DataFrame(g_net_gen_plant.net_generation_mwh.sum())
g_net_gen_plant

Unnamed: 0_level_0,Unnamed: 1_level_0,net_generation_mwh
report_date,plant_id,Unnamed: 2_level_1
2009-12-31,3,1.019841e+07
2009-12-31,7,2.633300e+05
2009-12-31,8,5.207454e+06
2009-12-31,10,2.364684e+06
2009-12-31,26,8.033064e+06
2009-12-31,47,2.939031e+06
2009-12-31,50,4.688202e+06
2009-12-31,51,4.465132e+06
2009-12-31,56,3.026254e+06
2009-12-31,59,5.916670e+05


In [16]:
#Merge the summed net generation by generator with the summed net generation by plant
g_merged = g_net_gen_gen.merge(g_net_gen_plant, how="left", left_index=True, right_index=True)
g_merged['proportion_of_generation'] = (g_merged.net_generation_mwh_x/g_merged.net_generation_mwh_y)
#Remove the net generation columns
g_merged = g_merged.drop(['net_generation_mwh_x','net_generation_mwh_y'], axis=1)
g_merged

Unnamed: 0_level_0,Unnamed: 1_level_0,generator_id,proportion_of_generation
report_date,plant_id,Unnamed: 2_level_1,Unnamed: 3_level_1
2009-12-31,3,1,0.021759
2009-12-31,3,2,0.038637
2009-12-31,3,3,0.126137
2009-12-31,3,4,0.159490
2009-12-31,3,5,0.442530
2009-12-31,3,A1ST,0.110085
2009-12-31,3,A2ST,0.101362
2009-12-31,7,1,0.805332
2009-12-31,7,2,0.194668
2009-12-31,8,10,0.755311


In [None]:
#Pull in the neg_generation data in MMBtu from generation_fuel table

In [18]:
#Pull in the fuel_receipts_cost table
gf_tbl = models.PUDLBase.metadata.tables['generation_fuel_eia923']
gf_select = sa.sql.select([gf_tbl,])
gf = pd.read_sql(gf_select, pudl_engine)
#Set the datetimeindex
gf = gf.set_index(pd.DatetimeIndex(gf['report_date']))
#groupby plant_id and by year
gf_yr_fuel_mmbtu = gf.groupby([pd.TimeGrouper(freq='A'), 'plant_id'])
#sum fuel cost by year by plant
gf_yr_fuel_mmbtu = gf_yr_fuel_mmbtu.fuel_consumed_total_mmbtu.sum()
#Convert back into a dataframe
gf_yr_fuel_mmbtu_df = pd.DataFrame(gf_yr_fuel_mmbtu)
gf_yr_fuel_mmbtu_df

Unnamed: 0_level_0,Unnamed: 1_level_0,fuel_consumed_total_mmbtu
report_date,plant_id,Unnamed: 2_level_1
2009-12-31,2,2758750.0
2009-12-31,3,122482205.0
2009-12-31,4,8427915.0
2009-12-31,7,3886265.0
2009-12-31,8,52697640.0
2009-12-31,9,225529.0
2009-12-31,10,25383733.0
2009-12-31,11,2609220.0
2009-12-31,12,2113616.0
2009-12-31,13,4169112.0


In [24]:
merged = g_merged.merge(gf_yr_fuel_mmbtu_df, how="left", left_index=True, right_index=True)
merged['fuel_consumed_mmbtu'] = (merged.fuel_consumed_total_mmbtu*merged.proportion_of_generation)
merged = merged.drop(['fuel_consumed_total_mmbtu'], axis=1)


In [26]:
merged

Unnamed: 0_level_0,Unnamed: 1_level_0,generator_id,proportion_of_generation,fuel_consumed_mmbtu
report_date,plant_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2009-12-31,3,1,0.021759,2.665100e+06
2009-12-31,3,2,0.038637,4.732285e+06
2009-12-31,3,3,0.126137,1.544949e+07
2009-12-31,3,4,0.159490,1.953472e+07
2009-12-31,3,5,0.442530,5.420203e+07
2009-12-31,3,A1ST,0.110085,1.348351e+07
2009-12-31,3,A2ST,0.101362,1.241506e+07
2009-12-31,7,1,0.805332,3.129732e+06
2009-12-31,7,2,0.194668,7.565326e+05
2009-12-31,8,10,0.755311,3.980311e+07
