In [None]:
%reload_ext autoreload
%autoreload 2

# import packages
import pandas as pd
import numpy as np
#from pathlib import Path
import plotly.express as px
import plotly
from os import path
import missingno
from statsmodels.formula.api import ols

#allow the notebook to display plots in html report
plotly.offline.init_notebook_mode()

# Desription
The purpose of this notebook is to examine 5-min operational data published by CAISO to determine whether sub-hourly accounting significantly improves hourly accounting, and whether emissions vary that much within a single hour

Steps:
- download the data from https://raw.githubusercontent.com/grgmiller/CAISO_data/master/CAISOdata.csv
- calculate the 5-minute emission rate
- explore variance, etc

In [None]:
# read the data from github
data = pd.read_csv('https://raw.githubusercontent.com/grgmiller/CAISO_data/master/CAISOdata.csv', parse_dates={'datetime_local':['date','5min_ending']})
data.head(5)

In [None]:
# keep data for 2019
data = data[data['datetime_local'].dt.year == 2019]
data['generation_MW'] = data.renewable_MW + data.natgas_MW + data.lg_hydro_MW + data.imports_MW + data.nuclear_MW + data.coal_MW + data.other_MW
# calculate the conumed emissions, converting from mT/MWh to lb/MWh
data['ef'] =  (data.natgas_co2 + data.imports_co2 + data.coal_co2 + data.biogas_co2 + data.biomass_co2 + data.geothermal_co2) / data.demand_actual * 2204.62
data['ef_gen'] =  (data.natgas_co2 + data.imports_co2 + data.coal_co2 + data.biogas_co2 + data.biomass_co2 + data.geothermal_co2) / data.generation_MW * 2204.62

ef = data.copy()[['datetime_local','ef']].set_index('datetime_local')

#resample to 5min to make sure we have a complete timeseries index
ef = ef.resample('5T').mean()

# screen out super high and low values
ef[ef['ef'] <= 0 ] = np.NaN
ef[ef['ef'] > 1000] = np.NaN

# calculate different averages
ef = ef.rename(columns={'ef':'5min'})

resolutions = ['15min','30min','H','MS','QS','AS']
resolution_name = {'15min':'15min','30min':'30min','H':'hourly','MS':'monthly','QS':'quarterly','AS':'annual'}

for res in resolutions:
    ef = ef.merge(ef.copy()[['5min']].resample(res).mean().rename(columns={'5min':resolution_name[res]}), how='left', left_index=True, right_index=True).fillna(method='ffill')

ef

In [None]:
variation = pd.DataFrame(columns=['MAPE [%]','STDEV [lbCO2/MWh]','COV [%]'])

def cov(x):
    return x.std(ddof=0) / x.mean()

# calculate MAPE
for res in resolution_name.values():
    mape = round(abs((ef['5min'] - ef[res]) / ef['5min']).mean() * 100, 1)
    variation.loc[res, 'MAPE [%]'] = mape

# calculate standard deviation and COV
for res in resolutions:
    variation.loc[resolution_name[res], 'STDEV [lbCO2/MWh]'] = round(ef[['5min']].resample(res).std(ddof=0).mean()[0], 1)
    variation.loc[resolution_name[res], 'COV [%]'] = round(ef[['5min']].resample(res).apply(cov).mean()[0] * 100, 1)


variation

In [None]:
variation_plot_data = variation.reset_index().rename(columns={'index':'Period'}).replace({'hourly':'hour','monthly':'month','quarterly':'quarter','annual':'year'})

In [None]:
caiso_5min_cov = px.bar(variation_plot_data,
       x='Period', 
       y='COV [%]', 
       text='COV [%]', 
       template='plotly_white', 
       labels={'COV [%]':'Coefficient of Variation (%)','Period':'Emission Factor Resolution'},
       category_orders={'Period':['year','quarter','month','hour','30min','15min']}) \
.update_traces(textposition='outside') \
.update_yaxes(range=[0,35])


caiso_5min_cov.write_image("../results/figures/caiso_5min_cov.jpeg")
caiso_5min_cov.show()

In [None]:
# monthly COV

def cov(x):
    return x.std(ddof=0) / x.mean()

stdev_df = 
for res in resolutions:
    ef[['5min']].resample(res).std(ddof=0).mean()

In [None]:
px.line(ef, x=ef.index, y='ef')