In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv("diff_data/cems_diffs_isorto.csv", index_col='DATE_UTC')

In [4]:
# Remove columns with damage data 
df = df[df.columns[~df.columns.str.endswith('ap2')]]
df = df[df.columns[~df.columns.str.endswith('dam')]]
df = df[df.columns[~df.columns.str.endswith('eas')]]
df.head()

Unnamed: 0_level_0,isorto,gload_mwh,so2_kg,nox_kg,pm25_kg,co2_kg
DATE_UTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2006-01-01 04:00:00,CAISO,-457.19,-0.896751,-43.825605,-8.436821,-189575.356635
2006-01-01 04:00:00,ERCOT,-445.7,-1853.107025,-138.292036,-20.5931,-341468.969925
2006-01-01 04:00:00,ISONE,-142.25,-525.355244,4.911948,-13.607775,-64021.85982
2006-01-01 04:00:00,MISO,-169.0,-211.712705,-208.724441,-30.753571,-161730.220245
2006-01-01 04:00:00,NYISO,10.0,-180.070127,0.725747,-3.084429,10160.472


In [5]:
# calculate MEF (assumed to be emissions / generation)
df['co2_mef'] = df['co2_kg']/df['gload_mwh']
df['so2_mef'] = df['so2_kg']/df['gload_mwh']
df['nox_mef'] = df['nox_kg']/df['gload_mwh']
df['pm25_mef'] = df['pm25_kg']/df['gload_mwh']
df = df[df.replace([np.inf, -np.inf], np.nan).notnull().all(axis=1)]
df = df.dropna() 

In [6]:
df.co2_mef.describe()

count    7.360570e+05
mean     5.912107e+02
std      3.870106e+04
min     -1.800064e+07
25%      4.184159e+02
50%      5.570117e+02
75%      7.381082e+02
max      2.451540e+07
Name: co2_mef, dtype: float64

### Why are MEFs negative? 

In [27]:
df_pjm = df.loc[df['isorto'] == 'PJM']

# Keep only negative MEFs
mef_columns = ['co2_mef', 'so2_mef', 'nox_mef', 'pm25_mef']
df_pjm_neg = df_pjm[(df_pjm[mef_columns]<0).any(axis='columns')]
df_pjm_neg

Unnamed: 0_level_0,isorto,gload_mwh,so2_kg,nox_kg,pm25_kg,co2_kg,co2_mef,so2_mef,nox_mef,pm25_mef
DATE_UTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2006-01-01 05:00:00,PJM,-4.00,-1804.548640,13.971541,78.743658,-6.214217e+04,15535.543125,451.137160,-3.492885,-19.685915
2006-01-01 06:00:00,PJM,702.00,-631.334293,705.651260,160.027434,2.559178e+05,364.555265,-0.899337,1.005201,0.227959
2006-01-01 11:00:00,PJM,-131.70,120.039948,211.556216,56.154752,-3.495475e+04,265.411885,-0.911465,-1.606349,-0.426384
2006-01-01 13:00:00,PJM,38.00,2451.936915,-202.981059,14.424242,3.674009e+04,966.844350,64.524656,-5.341607,0.379585
2006-01-02 03:00:00,PJM,-261.80,128.591971,-749.742670,-38.283207,-1.357103e+05,518.374103,-0.491184,2.863799,0.146231
2006-01-02 14:00:00,PJM,-62.45,555.491443,362.554725,-90.718500,-9.449239e+04,1513.088705,-8.894979,-5.805520,1.452658
2006-01-02 16:00:00,PJM,-82.84,-1425.488610,-1333.061529,67.222408,-2.197774e+05,2653.034279,17.207733,16.092003,-0.811473
2006-01-02 19:00:00,PJM,-580.80,4152.033751,934.626316,-140.250801,1.756927e+05,-302.501213,-7.148818,-1.609205,0.241479
2006-01-02 20:00:00,PJM,-453.67,-1949.864095,412.268408,-124.193627,-3.691944e+05,813.794956,4.297979,-0.908741,0.273753
2006-01-03 03:00:00,PJM,-384.64,-2613.872434,-91.851926,7.892509,-3.076446e+05,799.824712,6.795633,0.238800,-0.020519


In [14]:
# Get data from PJM data miner 
df_pjm_gen = pd.read_csv("pjm_data/pjm_gen.csv", index_col='datetime_beginning_utc')
df_pjm_gen.index = pd.to_datetime(df_pjm_gen.index)
del df_pjm_gen['datetime_beginning_ept']
del df_pjm_gen['is_renewable']

# keep only fossil fuel rows 
fossil_fuel = ['Coal', 'Gas', 'Oil']
df_pjm_gen = df_pjm_gen.loc[df_pjm_gen['fuel_type'].isin(fossil_fuel)]
df_pjm_gen.head()

Unnamed: 0_level_0,fuel_type,mw,fuel_percentage_of_total
datetime_beginning_utc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01 05:00:00,Coal,33866.5,0.41
2017-01-01 05:00:00,Gas,10516.4,0.13
2017-01-01 05:00:00,Oil,175.7,0.0
2017-01-01 06:00:00,Coal,33164.4,0.41
2017-01-01 06:00:00,Gas,10203.0,0.13


In [15]:
# Find total fossil fuel generation at each time 
df_pjm_gen = df_pjm_gen[['mw']]
df_pjm_gen = df_pjm_gen.sum(level='datetime_beginning_utc')
df_pjm_gen.head()

Unnamed: 0_level_0,mw
datetime_beginning_utc,Unnamed: 1_level_1
2017-01-01 05:00:00,44558.6
2017-01-01 06:00:00,43544.7
2017-01-01 07:00:00,42086.2
2017-01-01 08:00:00,41400.0
2017-01-01 09:00:00,41424.1


In [16]:
# Get total generation data from CEMS
df_isorto = pd.read_csv("../implement-emissions-assumptions/data/formatted_data/cems_isorto.csv", index_col='DATE_UTC')
df_isorto = df_isorto.loc[df_isorto['isorto'] == 'PJM']
df_isorto = df_isorto.loc['2017-01-01':'2017-12-31']
df_isorto.head()

Unnamed: 0_level_0,isorto,gload_mwh,so2_kg,nox_kg,pm25_kg,co2_kg,so2_dam_ap2,nox_dam_ap2,pm25_dam_ap2,so2_dam_eas,nox_dam_eas,pm25_dam_eas,co2_dam
DATE_UTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2017-01-01 00:00:00,PJM,44536.89,29126.29399,20943.820443,4556.971692,33800180.0,1219018.13,143250.87,419959.73,803371.99,155062.78,400937.5,1352007.31
2017-01-01 01:00:00,PJM,43522.44,27995.556719,20138.871544,4363.650569,33102450.0,1182566.21,138098.6,402719.31,772161.15,149520.53,381819.5,1324098.2
2017-01-01 02:00:00,PJM,42828.89,27274.991354,19403.799399,4230.566529,32448700.0,1142891.16,134688.57,389884.11,751741.45,144036.2,369406.81,1297948.01
2017-01-01 03:00:00,PJM,42536.0,27072.351396,19046.430138,4200.720142,32114890.0,1140297.18,132821.65,390347.97,747395.45,141525.75,368723.21,1284595.73
2017-01-01 04:00:00,PJM,42399.0,25851.675904,18707.736621,4188.200989,32079240.0,1087919.14,129517.44,388768.51,712626.55,139261.36,368199.26,1283169.64


In [25]:
# Compare PJM's published total with CEMS published totals 
df_isorto = df_isorto[['gload_mwh']]
df_compare = pd.merge(df_isorto,df_pjm_gen, how='inner', left_index=True, right_index=True)

df_compare.columns = ['cems', 'pjm_data_miner']
df_compare.head()

Unnamed: 0,cems,pjm_data_miner
2017-01-01 05:00:00,42721.0,44558.6
2017-01-01 06:00:00,43141.95,43544.7
2017-01-01 07:00:00,44526.13,42086.2
2017-01-01 08:00:00,45176.0,41400.0
2017-01-01 09:00:00,45845.8,41424.1


In [26]:
# find difference between two columns 
df_compare = df_compare.diff()
df_compare
# Keep rows where one of the MEF estimates is negative 
# df_compare = df_compare.loc[(df_compare.index).isin(df_pjm.index)]
# df_compare

Unnamed: 0,cems,pjm_data_miner
2017-01-01 05:00:00,,
2017-01-01 06:00:00,420.95,-1013.9
2017-01-01 07:00:00,1384.18,-1458.5
2017-01-01 08:00:00,649.87,-686.2
2017-01-01 09:00:00,669.80,24.1
2017-01-01 10:00:00,36.20,91.6
2017-01-01 11:00:00,-531.38,503.9
2017-01-01 12:00:00,-321.62,1605.1
2017-01-01 13:00:00,-480.00,813.9
2017-01-01 14:00:00,-352.71,1019.2


### Compare with PJM's Published MEFs 

In [29]:
df_pjm = df_pjm.loc['2014-01-01':'2018-12-31']

# Keep only MEF cols 
df_pjm = df_pjm[mef_columns]

# convert to lbs/MWh
LBS_CONVERSION = 2.20462
df_pjm = df_pjm.applymap(lambda x: x * LBS_CONVERSION)

# Aggregate over month
df_pjm['year'] = pd.DatetimeIndex(df_pjm.reset_index().DATE_UTC).year
df_pjm['month'] = pd.DatetimeIndex(df_pjm.reset_index().DATE_UTC).month
group_by = df_pjm.groupby([(df_pjm['year']),(df_pjm['month'])]).mean()

group_by

Unnamed: 0_level_0,Unnamed: 1_level_0,co2_mef,so2_mef,nox_mef,pm25_mef,year,month
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014,1,1526.791105,0.104546,0.929027,0.10386,2014,1
2014,2,1399.334895,2.963419,0.969641,0.191083,2014,2
2014,3,1810.982844,1.176831,1.906775,0.205165,2014,3
2014,4,3020.689142,2.544008,6.122572,0.115926,2014,4
2014,5,1164.66528,0.980404,0.522258,0.096738,2014,5
2014,6,1369.223151,1.845258,1.093549,0.183488,2014,6
2014,7,1535.536745,1.515477,0.608796,0.188721,2014,7
2014,8,3172.059165,20.8674,4.101899,0.444553,2014,8
2014,9,-460.853515,-15.426093,-3.523708,0.137173,2014,9
2014,10,1446.063315,1.271309,-0.723539,0.187407,2014,10


### Try removing values 3 standard deviations away 

In [42]:
# Remove outliers 
df_noOutliers = df[np.abs(df.co2_mef-df.co2_mef.mean()) <= (3*df.co2_mef.std())]

In [43]:
df_noOutliers[mef_columns].describe()

Unnamed: 0,co2_mef,so2_mef,nox_mef,pm25_mef
count,735764.0,735764.0,735764.0,735764.0
mean,577.061642,0.907487,0.482052,0.073546
std,2355.481363,46.670653,13.896778,1.055602
min,-115448.716549,-11145.934779,-3306.603601,-301.099021
25%,418.476288,0.001891,0.039206,0.038695
50%,557.011366,0.402649,0.344187,0.058396
75%,738.026919,1.511223,0.799959,0.094498
max,115333.452999,30612.637749,5280.981714,235.212304


In [44]:
# Find monthly MEF for PJM (to compare with their published data )
df_noOutlier_pjm = df_noOutliers.loc[df_noOutliers['isorto'] == 'PJM']
df_noOutlier_pjm = df_noOutlier_pjm.loc['2014-01-01':'2018-12-31']

# Keep MEF columns only
df_noOutlier_pjm = df_noOutlier_pjm[mef_columns]

# convert to lbs/MWh
LBS_CONVERSION = 2.20462
df_noOutlier_pjm = df_noOutlier_pjm.applymap(lambda x: x * LBS_CONVERSION)

# Year and month 
df_noOutlier_pjm['year'] = pd.DatetimeIndex(df_noOutlier_pjm.reset_index().DATE_UTC).year
df_noOutlier_pjm['month'] = pd.DatetimeIndex(df_noOutlier_pjm.reset_index().DATE_UTC).month

# Aggregate over months 
group_by = df_noOutlier_pjm.groupby([(df_noOutlier_pjm['year']),(df_noOutlier_pjm['month'])]).mean()

group_by

Unnamed: 0_level_0,Unnamed: 1_level_0,co2_mef,so2_mef,nox_mef,pm25_mef,year,month
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014,1,1526.791105,0.104546,0.929027,0.10386,2014,1
2014,2,1399.334895,2.963419,0.969641,0.191083,2014,2
2014,3,1810.982844,1.176831,1.906775,0.205165,2014,3
2014,4,1770.53497,3.066526,2.090967,0.199536,2014,4
2014,5,1741.108905,4.139497,2.010057,0.226776,2014,5
2014,6,1369.223151,1.845258,1.093549,0.183488,2014,6
2014,7,1535.536745,1.515477,0.608796,0.188721,2014,7
2014,8,1565.428128,2.59731,1.617265,0.195869,2014,8
2014,9,1715.213633,2.96762,1.038425,0.228399,2014,9
2014,10,1446.063315,1.271309,-0.723539,0.187407,2014,10
