## Marginal Operating Emissions Rate (MOER) data

This notebook gives some examples of using the `MOERLoader` included in SustainGym, which loads marginal operating emissions rate (MOER) data from [SGIPSIGNAL](https://sgipsignal.com/). SGIPSIGNAL provides real-time and forecasted MOER data for California.

This notebook also shows how to use the raw SGIPSIGNAL API.

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%cd ..

## Load MOER data

And plot correlation between actual values vs. forecasted values

In [None]:
from datetime import datetime, timedelta

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pytz
from scipy.stats import pearsonr
import seaborn as sns

from sustaingym.data.load_moer import load_monthly_moer, MOERLoader

sns.set_style('darkgrid')
FIVEMINS = timedelta(minutes=5)

In [None]:
starttime = datetime(2019, 5, 1, tzinfo=pytz.timezone('US/Pacific'))
endtime = datetime(2019, 12, 1, tzinfo=pytz.timezone('US/Pacific'))
ml = MOERLoader(
    starttime=starttime, endtime=endtime, ba='SGIP_CAISO_PGE')

In [None]:
moer_array = ml.retrieve(starttime)
print(moer_array.shape)

In [None]:
df = ml.df
print(len(df))
display(df.head())
display(df.tail())

### Analyze the quality of the forecasts

In [None]:
def check_forecast_quality(df: pd.DataFrame, ax: plt.Axes) -> None:
    rs, rmses = [], []
    for i in range(1, 36+1):
        true = df['moer'].iloc[i:]
        pred = df[f'f{i}'].iloc[:-i]
        r = pearsonr(true, pred)[0]
        rmse = np.sqrt(np.mean((true - pred)**2))
        rs.append(r)
        rmses.append(rmse)

    ax.plot(range(1, 36+1), rs, color='tab:blue', label='r')
    ax.set(xlabel='timesteps ahead', ylabel='correlation $r$') # , ylim=(0.6, 1))
    ax2 = ax.twinx()
    ax2.plot(range(1, 36+1), rmses, color='tab:orange', label='rmse')
    ax2.set(ylabel='RMSE')  # , ylim=(0, 0.016))

In [None]:
starttime = datetime(2019, 5, 1, tzinfo=pytz.timezone('US/Pacific'))
endtime = datetime(2019, 5, 1, tzinfo=pytz.timezone('US/Pacific'))
ml_pge = MOERLoader(
    starttime=starttime, endtime=endtime, ba='SGIP_CAISO_PGE')
df_pge = ml_pge.df

ml_sce = MOERLoader(
    starttime=starttime, endtime=endtime, ba='SGIP_CAISO_SCE')
df_sce = ml_sce.df

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(9,4), tight_layout=True)

axs[0].set_title('PGE')
check_forecast_quality(df_pge, axs[0])
fig.legend(bbox_to_anchor=(0.9, 0.7))

axs[1].set_title('SCE')
check_forecast_quality(df_sce, axs[1])

plt.show()

In [None]:
def get_forecast_quality(year: int, month: int, ba: str) -> tuple[list[float], list[float]]:
    df = load_monthly_moer(year, month, ba)

    rs, rmses = [], []
    for i in range(1, 36+1):
        true = df['moer'].iloc[i:]
        pred = df[f'f{i}'].iloc[:-i]
        r = pearsonr(true, pred)[0]
        rmse = np.sqrt(np.mean((true - pred)**2))
        rs.append(r)
        rmses.append(rmse)
    return rs, rmses

fig, ax = plt.subplots(1, 1, figsize=(4, 4), tight_layout=True)

_, rmses = get_forecast_quality(year=2019, month=5, ba='SGIP_CAISO_PGE')
ax.plot(range(1, 36+1), rmses, color='tab:blue', label='PGE, May 2019')
_, rmses = get_forecast_quality(year=2021, month=5, ba='SGIP_CAISO_PGE')
ax.plot(range(1, 36+1), rmses, color='tab:blue', linestyle=':', label='PGE, May 2021')

_, rmses = get_forecast_quality(year=2019, month=5, ba='SGIP_CAISO_SCE')
ax.plot(range(1, 36+1), rmses, color='tab:orange', label='SCE, May 2019')
_, rmses = get_forecast_quality(year=2021, month=5, ba='SGIP_CAISO_SCE')
ax.plot(range(1, 36+1), rmses, linestyle=':', color='tab:orange', label='SCE, May 2021')
ax.legend()
ax.set(xlabel='timesteps ahead', ylabel='MOER forecast RMSE')

fig.savefig('plots/moer_forecast_rmse.png', dpi=300, bbox_inches='tight')
fig.savefig('plots/moer_forecast_rmse.pdf', dpi=300, bbox_inches='tight')

In [None]:
AM_LA = pytz.timezone('America/Los_Angeles')

def plot_moer(day: datetime, ba: str, forecast_freq=36):
    df = load_monthly_moer(day.year, day.month, ba)
    df = df[(day <= df.index) & (df.index <= day + timedelta(days=1, seconds=600))]
    df.sort_index(inplace=True)

    fig, ax = plt.subplots(figsize=(5, 4), tight_layout=True)

    # every so often, plot emissions forecast
    for i in range(0, len(df['moer']), forecast_freq):
        forecasts = df.iloc[i].drop('moer').values  # shape [36], contains [f1, f2, ..., f36]
        indices = df.iloc[i+1:i+1+len(forecasts)].index
        if i + len(forecasts) + 1> len(df['moer']):
            continue
        
        if i == 0:
            ax.plot(indices, forecasts, color='red', alpha=0.3, label='forecasts')
        else:
            ax.plot(indices, forecasts, color='red', alpha=0.3)

    dt_str = day.strftime('%Y-%m-%d')
    ax.plot(df['moer'], linewidth=2, label='actual')
    ax.set(xlabel='time (UTC)', ylabel='MOER (kg CO2 / kWh)',
           title=f'MOER vs. Forecasted MOER: {dt_str}')
    ax.tick_params(axis='x', rotation=30)
    ax.legend()

plot_moer(datetime(2021, 6, 3, tzinfo=AM_LA), 'SGIP_CAISO_SCE')

## Plot typical monthly MOER values

In [None]:
def plot_avg_moer(starttime, ba, ax, color, label):
    df = load_monthly_moer(starttime.year, starttime.month, ba)
    if starttime.month == 12:
        endtime = datetime(starttime.year + 1, 1, 1, tzinfo=pytz.timezone('US/Pacific'))
    else:
        endtime = datetime(starttime.year, starttime.month + 1, 1, tzinfo=pytz.timezone('US/Pacific'))

    df_month = df.loc[starttime:endtime - FIVEMINS, ['moer']]
    df_month['time_of_day'] = df_month.index.map(lambda x: x.strftime('%H:%M'))
    moer_by_tod = df_month.groupby('time_of_day').agg(['mean', 'std'])
    mean = moer_by_tod[('moer', 'mean')]
    std = moer_by_tod[('moer', 'std')]

    ax.plot(moer_by_tod.index, mean, label=label, color=color)
    ax.fill_between(moer_by_tod.index, mean - std, mean + std, color=color, alpha=0.5)
    ax.set(xticks=moer_by_tod.index[::36])
    ax.tick_params(axis='x', rotation=30)

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(9, 4), tight_layout=True, sharey=True)

may2019 = datetime(2019, 5, 1, tzinfo=pytz.timezone('US/Pacific'))
may2021 = datetime(2021, 5, 1, tzinfo=pytz.timezone('US/Pacific'))

ax = axs[0]
plot_avg_moer(may2019, 'SGIP_CAISO_PGE', ax, color='tab:blue', label='May 2019')
plot_avg_moer(may2021, 'SGIP_CAISO_PGE', ax, color='tab:orange', label='May 2021')
ax.set(xlabel='time of day', ylabel='MOER', title='PGE')
ax.legend()

ax = axs[1]
plot_avg_moer(may2019, 'SGIP_CAISO_SCE', ax, color='tab:blue', label='May 2019')
plot_avg_moer(may2021, 'SGIP_CAISO_SCE', ax, color='tab:orange', label='May 2021')
ax.set(xlabel='time of day', title='SCE')
ax.legend()

fig.savefig('plots/moer.png', dpi=300, bbox_inches='tight')
fig.savefig('plots/moer.pdf', dpi=300, bbox_inches='tight')

## Using the SGIPSIGNAL API

In [None]:
import pandas as pd

import requests

### Create an Account and Authenticate

From the [SGIPSIGNAL documentation](https://sgipsignal.com/api-documentation):
> Your access token will expire after 30 minutes and you'll need to sign in again to obtain a fresh access token.

In [None]:
username = 'caltech'
password = 'caltechsgip.2022'
email = 'cyeh@caltech.edu'

# Create an account (only do this once)
# register_data = dict(username=username, password=password, email=email)
# r = requests.post('https://sgipsignal.com/register', data=register_data)
# print(r.content)

In [None]:
r = requests.get('https://sgipsignal.com/login/', auth=(username, password))
token = r.json()['token']

### Download historical MOER values and historical forecasts

In [None]:
headers = {
  'Authorization': f'Bearer {token}'
}
params = dict(
    ba='SGIP_CAISO_PGE',
    starttime='2020-02-20T00:00:00-0000',
    endtime='2020-02-21T00:00:00-0000',
    version='2.0'
)
r = requests.get('https://sgipsignal.com/sgipmoer/', params=params, headers=headers)

In [None]:
hist = pd.DataFrame(r.json())
hist.set_index('point_time', inplace=True)
hist.sort_index(inplace=True)
hist.head()

In [None]:
headers = {
  'Authorization': f'Bearer {token}'
}
params = dict(
    ba='SGIP_CAISO_PGE',
    starttime='2019-05-20T00:00:00-0000',
    endtime='2019-05-21T00:00:00-0000',
    version='1.0-1.0.0'
)
r = requests.get('https://sgipsignal.com/sgipforecast/', params=params, headers=headers)

In [None]:
forc_raw = pd.DataFrame(r.json())
forc_raw.head()

In [None]:
forc = forc_raw.copy()
forc.set_index('generated_at', inplace=True)
forc['forecast'] = forc['forecast'].map(lambda li: [d['value'] for d in li])
forc.head()