In [1]:
%load_ext autoreload
%autoreload 1

import sys
import os
import numpy as np
import pandas as pd
import sqlalchemy as sa
sys.path.append(os.path.abspath(os.path.join('..','..','..')))
from pudl import pudl, ferc1, eia923, settings, constants
from pudl import models, models_ferc1, models_eia923
from pudl import clean_eia923, clean_ferc1, clean_pudl
import matplotlib.pyplot as plt
from scipy import stats

In [2]:
pudl_engine  = pudl.db_connect_pudl()

In [3]:
pt = models.PUDLBase.metadata.tables

plants_eia_select = sa.sql.select([
    pt['generation_fuel_eia923'],
    pt['plants_eia'].c.plant_name,
    pt['plants_eia'].c.plant_id_pudl
]).\
    where(sa.sql.and_(
        pt['generation_fuel_eia923'].c.plant_id == pt['plants_eia'].c.plant_id
    ))
    
plants_df = pd.read_sql(plants_eia_select, pudl_engine)

In [4]:
#Set the datetimeindex
plants_df = plants_df.set_index(pd.DatetimeIndex(plants_df['report_date'])) #sets report_date as index

In [5]:
plants_df_indexed = plants_df.set_index(['plant_id'], append=True)

In [6]:
#groupby plant_id and by year
plants_gb = plants_df.groupby([pd.TimeGrouper(freq='A'), 'plant_id','plant_name','plant_id_pudl'])
#sum net_gen by year by plant and convert to datafram

plants = plants_gb.agg({'fuel_consumed_total_mmbtu' : np.sum,
                        'net_generation_mwh' : np.sum})

In [7]:
plants['heat_rate_btu_per_kwh'] = (plants['fuel_consumed_total_mmbtu']*1000000) / (plants['net_generation_mwh']*1000)

In [8]:
plants

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,fuel_consumed_total_mmbtu,net_generation_mwh,heat_rate_btu_per_kwh
report_date,plant_id,plant_name,plant_id_pudl,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2009-12-31,2,Bankhead Dam,851,2758750.0,282659.0,9759.993490
2009-12-31,3,Barry,32,122482205.0,14086902.0,8694.758081
2009-12-31,4,Walter Bouldin Dam,850,8427915.0,863516.0,9759.998657
2009-12-31,7,Gadsden,204,3886265.0,263330.0,14758.155167
2009-12-31,8,Gorgas,227,52697640.0,5207454.0,10119.655402
2009-12-31,9,Copper,129,225529.0,17415.0,12950.272753
2009-12-31,10,Greene County,245,25383733.0,2491524.0,10188.034713
2009-12-31,11,H Neely Henry Dam,842,2609220.0,267338.0,9760.004189
2009-12-31,12,Holt Dam,852,2113616.0,216559.0,9760.000739
2009-12-31,13,Jordan Dam,853,4169112.0,427163.0,9760.002622


In [21]:
#Pull in the plants_eia860 table
p8_tbl = models.PUDLBase.metadata.tables['plants_eia860']
p8_select = sa.sql.select([p8_tbl,]) #creates a sql Select object
p8 = pd.read_sql(p8_select, pudl_engine) #converts sql object to pandas dataframe
#Set the datetimeindex
#p8 = p8.set_index(pd.DatetimeIndex(p8['year'])) #sets report_date as index

In [24]:
plants = plants.reset_index()

In [25]:
plants['year'] = pd.DatetimeIndex(plants['report_date']).year

In [26]:
plants.merge(p8,on=['plant_id','year'])

Unnamed: 0,index,report_date,plant_id,plant_name_x,plant_id_pudl,fuel_consumed_total_mmbtu,net_generation_mwh,heat_rate_btu_per_kwh,year,id,...,longitude,balancing_authority_code,balancing_authority_name,grid_voltage_kv,grid_voltage_2_kv,grid_voltage_3_kv,ash_impoundment,ash_impoundment_lined,ash_impoundment_status,natural_gas_pipeline
0,11346,2011-12-31,2,Bankhead Dam,851,1253705.0,1.290350e+05,9.716007e+03,2011,33,...,,,,115.00,,,,,,
1,11347,2011-12-31,3,Barry,32,102443454.0,1.235423e+07,8.292176e+03,2011,34,...,,,,230.00,,,,,,
2,11348,2011-12-31,4,Walter Bouldin Dam,850,4738434.0,4.876940e+05,9.715998e+03,2011,35,...,,,,115.00,,,,,,
3,11349,2011-12-31,7,Gadsden,204,3632868.0,2.302000e+05,1.578136e+04,2011,36,...,,,,115.00,,,,,,
4,11350,2011-12-31,8,Gorgas,227,50694113.0,4.936430e+06,1.026939e+04,2011,37,...,,,,230.00,,,,,,
5,11351,2011-12-31,9,Copper,129,687879.0,3.899700e+04,1.763928e+04,2011,1111,...,,,,69.00,,,,,,
6,11352,2011-12-31,10,Greene County,245,25131048.0,2.475757e+06,1.015085e+04,2011,38,...,,,,230.00,,,,,,
7,11353,2011-12-31,11,H Neely Henry Dam,842,1453603.0,1.496090e+05,9.716013e+03,2011,39,...,,,,115.00,,,,,,
8,11354,2011-12-31,12,Holt Dam,852,778476.0,8.012300e+04,9.716012e+03,2011,40,...,,,,115.00,,,,,,
9,11355,2011-12-31,13,Jordan Dam,853,2187081.0,2.251010e+05,9.715999e+03,2011,41,...,,,,115.00,,,,,,


In [27]:
# It's a mess down here / not being used -->

In [None]:
#Pull in the generation_fuel table
gf_tbl = models.PUDLBase.metadata.tables['generation_fuel_eia923']
gf_select = sa.sql.select([gf_tbl,]) #creates a sql Select object
gf = pd.read_sql(gf_select, pudl_engine) #converts sql object to pandas dataframe
#Set the datetimeindex
gf = gf.set_index(pd.DatetimeIndex(gf['report_date'])) #sets report_date as index

In [None]:
#groupby plant_id and by year
gf_plant_gb = gf.groupby([pd.TimeGrouper(freq='A'), 'plant_id'])
#sum net_gen by year by plant and convert to datafram

gf_plant = gf_plant_gb.agg({'fuel_consumed_total_mmbtu' : np.sum,
                            'net_generation_mwh' : np.sum})

In [None]:
gf_plant['heat_rate_btu_per_kwh'] = (gf_plant['fuel_consumed_total_mmbtu']*1000000) / (gf_plant['net_generation_mwh']*1000)

In [None]:
gf_heat_rate = gf_plant['heat_rate_btu_per_kwh'].reset_index()

In [None]:
gf_plant