In [1]:
import pandas as pd
import numpy as np

from datetime import datetime, timedelta

In [2]:
df = pd.read_csv("diff_data/cems_diffs_isorto.csv", index_col='DATE_UTC')

In [3]:
# Convert to UTC (as opposed to UTC-5)
df.index = pd.DatetimeIndex(df.reset_index().DATE_UTC) + timedelta(hours=5)

# Remove columns with damage data 
df = df[df.columns[~df.columns.str.endswith('ap2')]]
df = df[df.columns[~df.columns.str.endswith('dam')]]
df = df[df.columns[~df.columns.str.endswith('eas')]]

In [4]:
# calculate MEF (assumed to be emissions / generation)
df['co2_mef'] = df['co2_kg']/df['gload_mwh']
df['so2_mef'] = df['so2_kg']/df['gload_mwh']
df['nox_mef'] = df['nox_kg']/df['gload_mwh']
df['pm25_mef'] = df['pm25_kg']/df['gload_mwh']
df = df[df.replace([np.inf, -np.inf], np.nan).notnull().all(axis=1)]
df = df.dropna() 

In [5]:
df.describe()

Unnamed: 0,gload_mwh,so2_kg,nox_kg,pm25_kg,co2_kg,co2_mef,so2_mef,nox_mef,pm25_mef
count,736057.0,736057.0,736057.0,736057.0,736057.0,736057.0,736057.0,736057.0,736057.0
mean,0.11473,-0.439396,-0.118684,0.003811,55.77672,591.2107,0.649025,0.540102,0.069971
std,1467.368385,3195.501563,1085.100576,187.39382,978047.5,38701.06,283.239321,124.029999,11.022804
min,-10404.73,-128718.8527,-20854.566238,-6594.509202,-7671466.0,-18000640.0,-176188.604575,-48955.437468,-6822.031204
25%,-571.53,-372.91023,-232.13251,-34.47303,-320123.8,418.4159,0.00189,0.039139,0.038689
50%,-1.66,-0.021319,-3.57748,0.0,867.2689,557.0117,0.402684,0.344181,0.058396
75%,564.37,335.336937,214.166184,33.112253,305285.9,738.1082,1.511817,0.80024,0.094521
max,10664.38,72387.984792,26475.95412,2430.076459,7822484.0,24515400.0,60355.442914,69533.022803,5624.547003


### Why are MEFs negative? 

In [6]:
df['year'] = pd.DatetimeIndex(df.reset_index().DATE_UTC).year
df_pjm = df.loc[df['isorto'] == 'PJM']

# Keep only negative MEFs
mef_columns = ['co2_mef', 'so2_mef', 'nox_mef', 'pm25_mef']
df_pjm_neg = df_pjm[(df_pjm[mef_columns]<0).any(axis='columns')]

# Look at only 2017 data to compare with PJM data miner 
df_pjm_neg = df_pjm_neg.loc[df_pjm_neg['year']==2017]

df_pjm_neg

Unnamed: 0_level_0,isorto,gload_mwh,so2_kg,nox_kg,pm25_kg,co2_kg,co2_mef,so2_mef,nox_mef,pm25_mef,year
DATE_UTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2017-01-01 00:00:00,PJM,-345.67,978.663749,-318.302743,24.493995,-8.134274e+04,235.319070,-2.831208,0.920828,-0.070859,2017
2017-01-01 01:00:00,PJM,-1433.47,26.728816,-728.121301,-86.182575,-9.157642e+05,638.844377,-0.018646,0.507943,0.060122,2017
2017-01-01 13:00:00,PJM,649.87,-329.797218,388.223043,47.355057,4.421747e+05,680.404803,-0.507482,0.597386,0.072869,2017
2017-01-01 19:00:00,PJM,-352.71,-1354.056488,292.249779,-19.413759,-4.175773e+05,1183.911019,3.839008,-0.828584,0.055042,2017
2017-01-01 20:00:00,PJM,-96.62,-485.576133,390.376244,-7.166762,-2.535201e+05,2623.888488,5.025628,-4.040325,0.074175,2017
2017-01-01 21:00:00,PJM,1597.33,235.937693,-708.521590,78.652939,5.757522e+05,360.446631,0.147708,-0.443566,0.049240,2017
2017-01-02 01:00:00,PJM,-739.34,2000.196024,606.823996,-44.361346,-5.688295e+05,769.374698,-2.705381,-0.820764,0.060001,2017
2017-01-02 16:00:00,PJM,897.49,-448.795265,663.962526,65.770913,5.220931e+05,581.725849,-0.500056,0.739799,0.073283,2017
2017-01-02 18:00:00,PJM,-155.81,416.959456,165.688993,1.451496,-8.942758e+04,573.952736,-2.676076,-1.063404,-0.009316,2017
2017-01-02 19:00:00,PJM,-194.34,1063.741279,79.799533,-14.514960,-3.567777e+04,183.584294,-5.473610,-0.410618,0.074688,2017


A negative MEF means that either: 
- Generation decreased while emissions increased 
    - is there a situation where this can happen?
- Generation increased while emissions decreased 
    - perhaps generation shifted from coal to gas 

In [7]:
# Percent where generation increased while emission decreased 
df_pjm_neg.gload_mwh[df_pjm_neg.gload_mwh > 0].count() / df_pjm_neg.gload_mwh.count()

0.49636363636363634

##### Are MEFs negative because generation shifted from coal to gas? 

In [8]:
# Get data from PJM data miner 
df_pjm_gen = pd.read_csv("pjm_data/pjm_gen.csv", index_col='datetime_beginning_utc')
df_pjm_gen.index = pd.to_datetime(df_pjm_gen.index)
del df_pjm_gen['datetime_beginning_ept']
del df_pjm_gen['is_renewable']

# Keep only fossil fuel rows 
fossil_fuel = ['Coal', 'Gas', 'Oil']
df_pjm_gen = df_pjm_gen.loc[df_pjm_gen['fuel_type'].isin(fossil_fuel)]
df_pjm_gen.head()

Unnamed: 0_level_0,fuel_type,mw,fuel_percentage_of_total
datetime_beginning_utc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01 05:00:00,Coal,33866.5,0.41
2017-01-01 05:00:00,Gas,10516.4,0.13
2017-01-01 05:00:00,Oil,175.7,0.0
2017-01-01 06:00:00,Coal,33164.4,0.41
2017-01-01 06:00:00,Gas,10203.0,0.13


In [10]:
# Get dates where MEFs are negative and generation increased 
df_neg_increased_gen = df_pjm_neg[df_pjm_neg.gload_mwh > 0]
neg_dates = df_neg_increased_gen.index.values
yesterday = lambda x: pd.to_datetime(x) - timedelta(hours=1) 
yesterday_arr = yesterday(neg_dates)
yesterday_arr = np.delete(yesterday_arr, 0) # remove first elem as it's in 2016


# Show dates with negative index 
gen_mix_df = df_pjm_gen[df_pjm_gen.index.isin(df_neg_increased_gen.index) | df_pjm_gen.index.isin(yesterday_arr)]

# save as csv 
df.to_csv('gen_mix.csv')

gen_mix_df

Unnamed: 0_level_0,fuel_type,mw,fuel_percentage_of_total
datetime_beginning_utc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01 13:00:00,Coal,33596.5,0.41
2017-01-01 13:00:00,Gas,10663.5,0.13
2017-01-01 13:00:00,Oil,178.6,0.00
2017-01-01 20:00:00,Coal,31422.8,0.40
2017-01-01 20:00:00,Gas,11352.3,0.14
2017-01-01 20:00:00,Oil,173.2,0.00
2017-01-01 21:00:00,Coal,31520.7,0.39
2017-01-01 21:00:00,Gas,12701.3,0.16
2017-01-01 21:00:00,Oil,174.1,0.00
2017-01-02 15:00:00,Coal,36449.9,0.41


##### Check if generation data from CEMS matches data from PJM 

In [15]:
# Get total generation data from CEMS
df_isorto = pd.read_csv("../implement-emissions-assumptions/data/formatted_data/cems_isorto.csv", index_col='DATE_UTC')
df_isorto.index = pd.DatetimeIndex(df_isorto.reset_index().DATE_UTC) + timedelta(hours=5)
df_isorto = df_isorto.loc[df_isorto['isorto'] == 'PJM']
df_isorto = df_isorto.loc['2017-01-01':'2017-12-31']

In [14]:
# Find total fossil fuel generation at each time 
df_pjm_gen = df_pjm_gen[['mw']]
df_pjm_gen = df_pjm_gen.sum(level='datetime_beginning_utc')

# Dataframe to compare PJM's published total with CEMS published totals 
df_isorto = df_isorto[['gload_mwh']]
df_compare = pd.merge(df_isorto,df_pjm_gen, how='inner', left_index=True, right_index=True)
df_compare.columns = ['cems', 'pjm_data_miner']

# find difference between two columns, where calculated MEF is negative 
df_compare = df_compare.diff()
df_compare = df_compare.loc[(df_compare.index).isin(df_pjm.index)]
df_compare

Unnamed: 0,cems,pjm_data_miner
2017-01-01 05:00:00,,
2017-01-01 06:00:00,-1014.45,-1013.9
2017-01-01 07:00:00,-693.55,-1458.5
2017-01-01 08:00:00,-292.89,-686.2
2017-01-01 09:00:00,-137.00,24.1
2017-01-01 10:00:00,322.00,91.6
2017-01-01 11:00:00,420.95,503.9
2017-01-01 12:00:00,1384.18,1605.1
2017-01-01 13:00:00,649.87,813.9
2017-01-01 14:00:00,669.80,1019.2


### Compare with PJM's Published MEFs 

In [16]:
df_pjm = df_pjm.loc['2014-01-01':'2018-12-31']

# Keep only MEF cols 
df_pjm = df_pjm[mef_columns]

# convert to lbs/MWh
LBS_CONVERSION = 2.20462
df_pjm = df_pjm.applymap(lambda x: x * LBS_CONVERSION)

# Aggregate over month
df_pjm['year'] = pd.DatetimeIndex(df_pjm.reset_index().DATE_UTC).year
df_pjm['month'] = pd.DatetimeIndex(df_pjm.reset_index().DATE_UTC).month
group_by = df_pjm.groupby([(df_pjm['year']),(df_pjm['month'])]).mean()

group_by

Unnamed: 0_level_0,Unnamed: 1_level_0,co2_mef,so2_mef,nox_mef,pm25_mef,year,month
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014,1,1524.858319,0.154153,0.939685,0.1053,2014,1
2014,2,1402.895206,2.929072,0.966043,0.19046,2014,2
2014,3,1808.642317,1.144955,1.899357,0.204651,2014,3
2014,4,3021.934188,2.548116,6.120119,0.116401,2014,4
2014,5,1163.368044,0.971269,0.52364,0.096302,2014,5
2014,6,1369.913979,1.853775,1.09531,0.183909,2014,6
2014,7,1530.170236,1.519082,0.611457,0.188622,2014,7
2014,8,3174.174295,20.847987,4.09639,0.443954,2014,8
2014,9,-458.668066,-15.412292,-3.524204,0.137363,2014,9
2014,10,1442.801143,1.191122,-0.75636,0.186829,2014,10


### Try removing values 3 standard deviations away 

In [17]:
# Remove outliers 
df_noOutliers = df[np.abs(df.co2_mef-df.co2_mef.mean()) <= (3*df.co2_mef.std())]

In [18]:
df_noOutliers[mef_columns].describe()

Unnamed: 0,co2_mef,so2_mef,nox_mef,pm25_mef
count,735764.0,735764.0,735764.0,735764.0
mean,577.061642,0.907487,0.482052,0.073546
std,2355.481363,46.670653,13.896778,1.055602
min,-115448.716549,-11145.934779,-3306.603601,-301.099021
25%,418.476288,0.001891,0.039206,0.038695
50%,557.011366,0.402649,0.344187,0.058396
75%,738.026919,1.511223,0.799959,0.094498
max,115333.452999,30612.637749,5280.981714,235.212304


In [19]:
# Find monthly MEF for PJM (to compare with their published data )
df_noOutlier_pjm = df_noOutliers.loc[df_noOutliers['isorto'] == 'PJM']
df_noOutlier_pjm = df_noOutlier_pjm.loc['2014-01-01':'2018-12-31']

# Keep MEF columns only
df_noOutlier_pjm = df_noOutlier_pjm[mef_columns]

# convert to lbs/MWh
LBS_CONVERSION = 2.20462
df_noOutlier_pjm = df_noOutlier_pjm.applymap(lambda x: x * LBS_CONVERSION)

# Year and month 
df_noOutlier_pjm['year'] = pd.DatetimeIndex(df_noOutlier_pjm.reset_index().DATE_UTC).year
df_noOutlier_pjm['month'] = pd.DatetimeIndex(df_noOutlier_pjm.reset_index().DATE_UTC).month

# Aggregate over months 
group_by = df_noOutlier_pjm.groupby([(df_noOutlier_pjm['year']),(df_noOutlier_pjm['month'])]).mean()

group_by

Unnamed: 0_level_0,Unnamed: 1_level_0,co2_mef,so2_mef,nox_mef,pm25_mef,year,month
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014,1,1524.858319,0.154153,0.939685,0.1053,2014,1
2014,2,1402.895206,2.929072,0.966043,0.19046,2014,2
2014,3,1808.642317,1.144955,1.899357,0.204651,2014,3
2014,4,1771.781748,3.070639,2.088509,0.200012,2014,4
2014,5,1739.809923,4.130349,2.01144,0.22634,2014,5
2014,6,1369.913979,1.853775,1.09531,0.183909,2014,6
2014,7,1530.170236,1.519082,0.611457,0.188622,2014,7
2014,8,1567.546105,2.577871,1.611748,0.195268,2014,8
2014,9,1717.402121,2.981439,1.03793,0.22859,2014,9
2014,10,1442.801143,1.191122,-0.75636,0.186829,2014,10
