In [1]:
%reload_ext autoreload
%autoreload 2

import os

os.chdir(f"/home/{os.getlogin()}/watttime-python-client-aer-algo")

import math
import numpy as np
import pandas as pd
import datetime
from pytz import UTC, timezone
import seaborn as sns
from datetime import datetime, timedelta
import concurrent.futures
import contextlib
import io

from watttime import WattTimeForecast, WattTimeHistorical, RecalculatingWattTimeOptimizer

import data.s3 as s3u
import evaluation.eval_framework as efu
from plotnine import *

username = os.getenv("WATTTIME_USER")
password = os.getenv("WATTTIME_PASSWORD")

actual_data = WattTimeHistorical(username, password)
hist_data = WattTimeForecast(username, password)

s3 = s3u.s3_utils()
pd.options.mode.chained_assignment = None  # default='warn'
import random
from tqdm import tqdm
import warnings

In [2]:
def random_date_with_time(start, end):

    time_between_dates = end - start
    random_number_of_seconds = random.randint(0, int(time_between_dates.total_seconds()))
    random_date = start + timedelta(seconds=random_number_of_seconds)
    
    random_hour = random.randint(0, 23)
    random_minute = random.randint(0, 59)
    
    random_date_with_time = random_date.replace(hour=random_hour, minute=random_minute)
    return pd.to_datetime(random_date_with_time, utc = True)

In [3]:
def full_requery_sim(region, full_forecast, full_history, increments, start_time, end_time, usage_power_kw, time_needed, method = "simple"):

    results = {}
    all_relevant_forecasts = full_forecast.set_index("generated_at")[start_time - timedelta(minutes = 5):end_time].reset_index()
    all_relevant_forecasts = all_relevant_forecasts.set_index("generated_at")[start_time - timedelta(minutes = 5):end_time]
    baseline_forecast = all_relevant_forecasts.loc[all_relevant_forecasts.index.min()].reset_index()
    schedules = []

    ideal = efu.get_schedule_and_cost_api_requerying(region = region,
                                        usage_power_kw = usage_power_kw,
                                        time_needed = time_needed,
                                        start_time = start_time,
                                        end_time = end_time, 
                                        optimization_method="simple",
                                        moer_list = [full_history.set_index("point_time")[start_time - timedelta(minutes = 5):end_time].reset_index()]).reset_index().rename({"pred_moer" : "actual_moer"}, axis = 1)

    results["ideal_emissions"] = round(ideal["emissions_co2e_lb"].sum(), 2)
    ideal["increment"] = "Ideal"
    ideal["pred_moer"] = ideal["actual_moer"]
    ideal["actual_emissions"] = ideal["actual_moer"]*ideal["energy_usage_mwh"]
    schedules.append(ideal)


    baseline = efu.get_schedule_and_cost_api_requerying(region = region,
                                        usage_power_kw = usage_power_kw,
                                        time_needed = time_needed,
                                        start_time = start_time,
                                        end_time = end_time, 
                                        optimization_method="baseline",
                                        moer_list = [baseline_forecast]).reset_index()

    baseline = baseline.merge(ideal[["point_time", "actual_moer"]])

    baseline["increment"] = "Baseline"
    baseline["actual_emissions"] = baseline["actual_moer"]*baseline["energy_usage_mwh"]

    schedules.append(baseline)

    results["baseline_predicted_emissions"] = round(baseline["emissions_co2e_lb"].sum(), 2)
    results["baseline_actual_emissions"] = round((baseline["actual_moer"]*baseline["energy_usage_mwh"]).sum(), 2)

    no_requery = efu.get_schedule_and_cost_api_requerying(region = region,
                                        usage_power_kw = usage_power_kw,
                                        time_needed = time_needed,
                                        start_time = start_time,
                                        end_time = end_time, 
                                        optimization_method=method,
                                        moer_list = [baseline_forecast]).reset_index()

    no_requery = no_requery.merge(ideal[["point_time", "actual_moer"]])
    no_requery["increment"] = "No requery"
    no_requery["actual_emissions"] = no_requery["actual_moer"]*no_requery["energy_usage_mwh"]
    schedules.append(no_requery)

    results["no_requery_predicted_emissions"] = round(no_requery["emissions_co2e_lb"].sum(), 2)
    results["no_requery_actual_emissions"] = round((no_requery["actual_moer"]*no_requery["energy_usage_mwh"]).sum(), 2)



    for increment in increments:
        inc_times = pd.date_range(all_relevant_forecasts.index.min(), all_relevant_forecasts.index.max(), freq=timedelta(minutes=increment))
        moer_list = [all_relevant_forecasts.loc[timestamp].reset_index() for timestamp in inc_times]

        print(len(moer_list))

        schedule = efu.get_schedule_and_cost_api_requerying(region = region,
                                        usage_power_kw = usage_power_kw,
                                        time_needed = time_needed,
                                        start_time = start_time,
                                        end_time = end_time, 
                                        optimization_method=method,
                                        moer_list = moer_list).reset_index()
        
        
        schedule = schedule.merge(ideal[["point_time", "actual_moer"]])
        schedule["actual_emissions"] = schedule["actual_moer"]*schedule["energy_usage_mwh"]
        schedule["increment"] = f"Requery {increment} minutes"
        schedules.append(schedule)


        results[f"schedule_predicted_emissions_requery_{increment}"] = round(schedule["emissions_co2e_lb"].sum(), 2)
        results[f"schedule_actual_emissions_requery_{increment}"] = round((schedule["actual_moer"]*schedule["energy_usage_mwh"]).sum(), 2)

    increment_order = [f"Requery {increment} minutes" for increment in increments]
    order = ["Ideal", "Baseline", "No requery"] + increment_order[::-1]
    full_schedules = pd.concat(schedules)
    full_schedules["increment"] = pd.Categorical(full_schedules["increment"], order, ordered = True)

    return full_schedules

In [4]:
# Some basic paramaters to get simple data. Will eventually be expanded to the synthetic users

increments = [5, 15, 30, 60, 120, 180, 240, 360]
start_time = random_date_with_time(datetime(2023, 1, 1), datetime(2023, 12, 31))
end_time = start_time + timedelta(hours = 12)
usage_power_kw = 2
time_needed = 180


regions = [
 'CAISO_NORTH',
 'SPP_TX',
 'ERCOT_EASTTX',
 'FPL',
 'SOCO',
 'PJM_CHICAGO',
 'LDWP',
 'PJM_DC',
 'NYISO_NYC'
]

dates = [pd.to_datetime(random_date_with_time(datetime(2023, 1, 1), datetime(2023, 12, 31))) for i in range(0, 1000)]

In [None]:
out = []
for region in regions:
    print(region)
    full_forecast = s3.load_parquetdataframe(f"complete_2023_forecast_history/{region}.parquet").drop_duplicates()
    full_forecast['point_time'] = pd.to_datetime(full_forecast['point_time'], utc=True)
    full_history = s3.load_parquetdataframe(f"complete_2023_actual_history/{region}.parquet").drop_duplicates()

    for date in tqdm(dates):
        try:
            with contextlib.redirect_stdout(io.StringIO()), warnings.catch_warnings():
                warnings.simplefilter("ignore", category=RuntimeWarning)  
                schedules = full_requery_sim(region, full_forecast, full_history, increments, date, date + timedelta(hours = 12), usage_power_kw, time_needed, method = "simple")
            schedules["init_time"] = date
            schedules["region"] = region
            out.append(schedules)
        except Exception as e:
            print(e)

out_df = pd.concat(out)
s3.store_parquetdataframe(out_df, f'historical_requery_sim_1000_simple_fit.parquet')


CAISO_NORTH


  7%|▋         | 72/1000 [18:49<2:31:37,  9.80s/it]

Timestamp('2023-07-03 06:10:00+0000', tz='UTC')


 24%|██▍       | 242/1000 [1:01:39<2:53:21, 13.72s/it]




 37%|███▋      | 368/1000 [1:36:20<2:22:18, 13.51s/it]

Timestamp('2023-09-03 13:45:00+0000', tz='UTC')


 45%|████▍     | 447/1000 [1:56:58<2:13:49, 14.52s/it]




 59%|█████▉    | 593/1000 [2:34:52<1:11:59, 10.61s/it]

Timestamp('2023-05-24 04:05:00+0000', tz='UTC')


 68%|██████▊   | 676/1000 [2:58:44<57:22, 10.63s/it]  

Timestamp('2023-01-20 16:15:00+0000', tz='UTC')


 81%|████████  | 810/1000 [3:32:24<34:43, 10.97s/it]  




 88%|████████▊ | 879/1000 [3:49:58<19:41,  9.77s/it]

Timestamp('2023-09-03 13:45:00+0000', tz='UTC')


 88%|████████▊ | 883/1000 [3:50:51<20:04, 10.29s/it]

Timestamp('2023-06-02 03:50:00+0000', tz='UTC')


 96%|█████████▋| 963/1000 [4:11:14<06:49, 11.06s/it]

NaT


100%|██████████| 1000/1000 [4:20:59<00:00, 15.66s/it]


SPP_TX


  7%|▋         | 72/1000 [17:15<2:32:18,  9.85s/it]

Timestamp('2023-07-03 06:10:00+0000', tz='UTC')


 24%|██▍       | 242/1000 [36:18<1:15:30,  5.98s/it]




 45%|████▍     | 447/1000 [57:46<55:02,  5.97s/it]  




 59%|█████▉    | 593/1000 [1:12:48<31:25,  4.63s/it]

Timestamp('2023-05-24 04:05:00+0000', tz='UTC')


 81%|████████  | 810/1000 [1:35:05<18:25,  5.82s/it]




 88%|████████▊ | 883/1000 [1:42:33<08:59,  4.62s/it]

Timestamp('2023-06-02 03:50:00+0000', tz='UTC')


 96%|█████████▋| 963/1000 [1:50:46<02:53,  4.68s/it]

NaT


100%|██████████| 1000/1000 [1:54:40<00:00,  6.88s/it]


ERCOT_EASTTX


  7%|▋         | 72/1000 [07:24<1:12:10,  4.67s/it]

Timestamp('2023-07-03 06:10:00+0000', tz='UTC')


 24%|██▍       | 242/1000 [25:05<1:15:18,  5.96s/it]




 45%|████▍     | 447/1000 [46:08<54:11,  5.88s/it]  




 81%|████████  | 810/1000 [1:23:24<18:17,  5.78s/it]




 88%|████████▊ | 883/1000 [1:30:43<08:54,  4.57s/it]

Timestamp('2023-06-02 03:55:00+0000', tz='UTC')


 96%|█████████▋| 963/1000 [1:38:49<02:49,  4.57s/it]

NaT


100%|██████████| 1000/1000 [1:42:36<00:00,  6.16s/it]


FPL


  1%|          | 12/1000 [01:08<1:14:56,  4.55s/it]

Timestamp('2023-11-03 11:10:00+0000', tz='UTC')


  7%|▋         | 72/1000 [07:11<1:12:08,  4.66s/it]

Timestamp('2023-07-03 06:10:00+0000', tz='UTC')


 24%|██▍       | 242/1000 [24:35<1:12:32,  5.74s/it]




 45%|████▍     | 447/1000 [45:33<53:30,  5.80s/it]  




 50%|████▉     | 495/1000 [50:26<39:05,  4.65s/it]

Timestamp('2023-09-03 04:55:00+0000', tz='UTC')


 81%|████████  | 810/1000 [1:22:32<18:21,  5.80s/it]




 88%|████████▊ | 879/1000 [1:29:29<09:09,  4.54s/it]

Timestamp('2023-09-03 04:55:00+0000', tz='UTC')


 88%|████████▊ | 883/1000 [1:29:49<08:11,  4.20s/it]

Timestamp('2023-06-02 03:55:00+0000', tz='UTC')


100%|██████████| 1000/1000 [1:41:46<00:00,  6.11s/it]


SOCO


  7%|▋         | 72/1000 [07:15<1:10:35,  4.56s/it]

Timestamp('2023-07-03 06:10:00+0000', tz='UTC')


 24%|██▍       | 242/1000 [24:48<1:12:29,  5.74s/it]




 45%|████▍     | 447/1000 [45:47<54:36,  5.93s/it]  




 81%|████████  | 810/1000 [1:23:03<18:16,  5.77s/it]




 88%|████████▊ | 883/1000 [1:30:25<08:56,  4.58s/it]

Timestamp('2023-06-02 03:55:00+0000', tz='UTC')


100%|██████████| 1000/1000 [1:42:27<00:00,  6.15s/it]


PJM_CHICAGO


  7%|▋         | 72/1000 [07:19<1:09:21,  4.48s/it]

Timestamp('2023-07-03 06:10:00+0000', tz='UTC')


 24%|██▍       | 242/1000 [24:43<1:13:02,  5.78s/it]




 30%|██▉       | 295/1000 [30:03<53:33,  4.56s/it]  

NaT


 37%|███▋      | 368/1000 [37:26<48:16,  4.58s/it]  

Timestamp('2023-09-03 13:45:00+0000', tz='UTC')


 45%|████▍     | 447/1000 [45:29<53:26,  5.80s/it]  




 59%|█████▉    | 593/1000 [1:00:19<31:25,  4.63s/it]

Timestamp('2023-05-24 04:05:00+0000', tz='UTC')


 81%|████████  | 810/1000 [1:22:32<18:21,  5.80s/it]




 87%|████████▋ | 868/1000 [1:28:28<13:50,  6.29s/it]