In [1]:
import sys
import os
import numpy as np
import pandas as pd
import sqlalchemy as sa
sys.path.append(os.path.abspath(os.path.join('..','..','..')))
from pudl import pudl, ferc1, eia923, settings, constants
from pudl import models, models_ferc1, models_eia923
from pudl import clean_eia923, clean_ferc1, clean_pudl
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
pudl_engine  = pudl.connect_db()

In [3]:
#Pull in the generation table
g_tbl = models.PUDLBase.metadata.tables['generation_eia923']
g_select = sa.sql.select([g_tbl,])
g = pd.read_sql(g_select, pudl_engine)
#Set the datetimeindex
g = g.set_index(pd.DatetimeIndex(g['report_date']))

In [4]:
#groupby plant_id and by year
g_yr = g.groupby([pd.TimeGrouper(freq='A'), 'plant_id', 'generator_id'])
#sum net_gen by year by plant
g_net_gen_gen = pd.DataFrame(g_yr.net_generation_mwh.sum())
g_net_gen_gen = g_net_gen_gen.reset_index(level=['generator_id'])
g_net_gen_gen

Unnamed: 0_level_0,Unnamed: 1_level_0,generator_id,net_generation_mwh
report_date,plant_id,Unnamed: 2_level_1,Unnamed: 3_level_1
2009-12-31,3,1,221908.00
2009-12-31,3,2,394031.00
2009-12-31,3,3,1286393.00
2009-12-31,3,4,1626547.00
2009-12-31,3,5,4513101.00
2009-12-31,3,A1ST,1122697.00
2009-12-31,3,A2ST,1033733.00
2009-12-31,7,1,212068.00
2009-12-31,7,2,51262.00
2009-12-31,8,10,3933248.00


In [5]:
#groupby plant_id and by year
g_net_gen_plant = g.groupby([pd.TimeGrouper(freq='A'), 'plant_id'])
#sum net_gen by year by plant and convert to datafram
g_net_gen_plant =  pd.DataFrame(g_net_gen_plant.net_generation_mwh.sum())
g_net_gen_plant

Unnamed: 0_level_0,Unnamed: 1_level_0,net_generation_mwh
report_date,plant_id,Unnamed: 2_level_1
2009-12-31,3,1.019841e+07
2009-12-31,7,2.633300e+05
2009-12-31,8,5.207454e+06
2009-12-31,10,2.364684e+06
2009-12-31,26,8.033064e+06
2009-12-31,47,2.939031e+06
2009-12-31,50,4.688202e+06
2009-12-31,51,4.465132e+06
2009-12-31,56,3.026254e+06
2009-12-31,59,5.916670e+05


In [6]:
#Merge the summed net generation by generator with the summed net generation by plant
g_merged = g_net_gen_gen.merge(g_net_gen_plant, how="left", left_index=True, right_index=True)
g_merged['proportion_of_generation'] = (g_merged.net_generation_mwh_x/g_merged.net_generation_mwh_y)
#Remove the net generation columns
g_merged = g_merged.drop(['net_generation_mwh_x','net_generation_mwh_y'], axis=1)
g_merged

Unnamed: 0_level_0,Unnamed: 1_level_0,generator_id,proportion_of_generation
report_date,plant_id,Unnamed: 2_level_1,Unnamed: 3_level_1
2009-12-31,3,1,0.021759
2009-12-31,3,2,0.038637
2009-12-31,3,3,0.126137
2009-12-31,3,4,0.159490
2009-12-31,3,5,0.442530
2009-12-31,3,A1ST,0.110085
2009-12-31,3,A2ST,0.101362
2009-12-31,7,1,0.805332
2009-12-31,7,2,0.194668
2009-12-31,8,10,0.755311


In [7]:
#Pull in the fuel_receipts_cost table
frc_tbl = models.PUDLBase.metadata.tables['fuel_receipts_costs_eia923']
frc_select = sa.sql.select([frc_tbl,])
frc = pd.read_sql(frc_select, pudl_engine)
#Set the datetimeindex
frc = frc.set_index(pd.DatetimeIndex(frc['report_date']))
#groupby plant_id and by year
frc_yr_cost = frc.groupby([pd.TimeGrouper(freq='A'), 'plant_id'])
#sum fuel cost by year by plant
frc_yr_cost = frc_yr_cost.fuel_cost.sum()
#Convert back into a dataframe
frc_yr_cost_df = pd.DataFrame(frc_yr_cost)
frc_yr_cost_df

Unnamed: 0_level_0,Unnamed: 1_level_0,fuel_cost
report_date,plant_id,Unnamed: 2_level_1
2009-12-31,3,28306.9
2009-12-31,7,63659.1
2009-12-31,8,53893.8
2009-12-31,9,4526.7
2009-12-31,10,16471.7
2009-12-31,26,49482.6
2009-12-31,47,41613.1
2009-12-31,50,60218.9
2009-12-31,51,10388.0
2009-12-31,54,15502.0
