In [2]:
import pandas as pd
import numpy as np
from scipy.stats import linregress as lm

In [37]:
# Define independent variable (energy demand - gload_mwh) and labels (greenhouse gas output)
LABELS = ['so2_kg', 'nox_kg', 'pm25_kg', 'co2_kg',
    'so2_dam_ap2', 'nox_dam_ap2', 'pm25_dam_ap2', 
    'so2_dam_eas', 'nox_dam_eas', 'pm25_dam_eas',
    'co2_dam']
XCOL = 'gload_mwh'

In [38]:
# Label the df with the year, month, hour, and season 
def label_temporal_groups(df):
    df = df.copy()
    df['year'] = df.index.year
    df['month'] = df.index.month
    df['hour'] = df.index.hour

    # Convert the months to season 
    month_to_season = ['winter'] * 3 + ['trans'] + ['summer'] * 5 + ['trans'] + ['winter'] * 2
    df['season'] = df.index.map(lambda x: month_to_season[x.month - 1])
    return df

In [39]:
def calc_aefs(data, cols):
    grouped = data.groupby(cols)
    result_dict = {}

    for name, group in grouped: 
        sums = group[[XCOL]+LABELS].dropna().sum()
        result_dict[name] = sums[LABELS] / sums[XCOL]

    return pd.DataFrame.from_dict(result_dict, orient='index')

In [40]:
isorto_data = pd.read_csv("../data/formatted_data/cems_isorto.csv", index_col=0, parse_dates=[0])
isorto_data.head()

Unnamed: 0_level_0,isorto,gload_mwh,so2_kg,nox_kg,pm25_kg,co2_kg,so2_dam_ap2,nox_dam_ap2,pm25_dam_ap2,so2_dam_eas,nox_dam_eas,pm25_dam_eas,co2_dam
DATE_UTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2006-01-01 03:00:00,CAISO,2514.91,5.551512,718.588611,70.034682,1136695.0,499.36,10054.94,17458.62,138.49,7384.93,5675.11,45467.79
2006-01-01 03:00:00,ERCOT,17710.3,44127.953546,10068.080439,782.537781,15207820.0,833587.32,89664.62,35242.08,757298.01,32103.4,42088.0,608312.79
2006-01-01 03:00:00,ISONE,4550.0,14001.659746,3160.450341,228.973494,3459118.0,388440.65,1631.24,17717.05,497104.83,45598.34,28926.09,138364.73
2006-01-01 03:00:00,MISO,36537.0,123951.250153,57380.768734,3372.823112,35255810.0,3893357.94,559888.28,189205.23,3320000.05,553499.55,276582.38,1410232.51
2006-01-01 03:00:00,NYISO,5935.0,10856.093574,3726.122202,553.292132,3849458.0,354881.33,17074.63,122390.37,346545.95,60452.59,74153.65,153978.32


### Example: Calculate AEFs for ISO/RTO, SeasonalTOD

In [42]:
isorto_data = label_temporal_groups(isorto_data)
isorto_data.head()

Unnamed: 0_level_0,isorto,gload_mwh,so2_kg,nox_kg,pm25_kg,co2_kg,so2_dam_ap2,nox_dam_ap2,pm25_dam_ap2,so2_dam_eas,nox_dam_eas,pm25_dam_eas,co2_dam,year,month,hour,season
DATE_UTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2006-01-01 03:00:00,CAISO,2514.91,5.551512,718.588611,70.034682,1136695.0,499.36,10054.94,17458.62,138.49,7384.93,5675.11,45467.79,2006,1,3,winter
2006-01-01 03:00:00,ERCOT,17710.3,44127.953546,10068.080439,782.537781,15207820.0,833587.32,89664.62,35242.08,757298.01,32103.4,42088.0,608312.79,2006,1,3,winter
2006-01-01 03:00:00,ISONE,4550.0,14001.659746,3160.450341,228.973494,3459118.0,388440.65,1631.24,17717.05,497104.83,45598.34,28926.09,138364.73,2006,1,3,winter
2006-01-01 03:00:00,MISO,36537.0,123951.250153,57380.768734,3372.823112,35255810.0,3893357.94,559888.28,189205.23,3320000.05,553499.55,276582.38,1410232.51,2006,1,3,winter
2006-01-01 03:00:00,NYISO,5935.0,10856.093574,3726.122202,553.292132,3849458.0,354881.33,17074.63,122390.37,346545.95,60452.59,74153.65,153978.32,2006,1,3,winter


In [43]:
cols = ['year', 'season', 'hour']
name = "SeasonalTOD"
calc_aefs(isorto_data, cols).head()

Unnamed: 0,Unnamed: 1,Unnamed: 2,so2_kg,nox_kg,pm25_kg,co2_kg,so2_dam_ap2,nox_dam_ap2,pm25_dam_ap2,so2_dam_eas,nox_dam_eas,pm25_dam_eas,co2_dam
2006,summer,0,3.2153,0.793416,0.127138,811.715738,103.098118,6.137391,12.254877,83.538128,6.486368,11.768868,32.46863
2006,summer,1,3.270685,0.799097,0.128688,820.118451,104.706013,6.1691,12.369908,84.882355,6.526919,11.899845,32.804738
2006,summer,2,3.306521,0.802835,0.129888,826.015993,105.791848,6.203096,12.488798,85.76563,6.560792,12.013103,33.04064
2006,summer,3,3.319053,0.804984,0.130688,829.517282,106.373475,6.209789,12.592012,86.1563,6.599358,12.101994,33.180691
2006,summer,4,3.327806,0.806283,0.131637,829.378087,107.092167,6.188648,12.771747,86.587027,6.651966,12.240988,33.175123


### Calculate for ISO/RTO and NERC 

In [44]:
grouping_names = ["SeasonalTOD", "MonthTOD", "TOD", "YearOnly", "Month"]
grouping_cols = [['year', 'season', 'hour'], ['year', 'month', 'hour'], ['year', 'hour'], ['year'], ['year', 'month']]

In [45]:
# Load ISO/RTO data 
isorto_data = pd.read_csv("../data/formatted_data/cems_isorto.csv", index_col=0, parse_dates=[0])

cols_isorto = grouping_cols
for c in cols_isorto:
    c.append('isorto')

In [None]:
for grouping_name, grouping in zip(grouping_names, cols_isorto):
    isorto_data = label_temporal_groups(isorto_data)
    results_df = calc_aefs(isorto_data, grouping)
    results_df.to_csv("results/isorto/" + grouping_name + ".csv")