# With Synthetic Data

In [1]:
import os
PATH = os.getenv("HOME")
os.chdir(f"{PATH}/watttime-python-client-aer-algo")

import pandas as pd
import evaluation.eval_framework as evu
from datetime import datetime

import seaborn as sns
import evaluation.metrics as m
from datetime import timedelta

import random
import math
from watttime import WattTimeForecast, WattTimeHistorical
import data.s3 as s3u
import importlib

username = os.getenv("WATTTIME_USER")
password = os.getenv("WATTTIME_PASSWORD")

In [2]:
s3 = s3u.s3_utils()

## Analysis Details
- 2024 dates only in non-tz aware local time.
- 1000 users
- sanity check on 9 current regions + 9 randomly selected other regions
- set of requery increments to test: none, 5,15,60,180
- charging windows of lengths 3,6,12 hours
- Charge needed at least 45 minutes (25% of smallest window)

## Prepared forecast data
- already converted to UTC for region specific files

In [3]:
df_req = s3.load_csvdataframe("requery_data/20241203_1k_synth_users_96_days.csv")

### Single test

In [4]:
import watttime.api as wt

In [179]:
#importlib.reload(wt)

<module 'watttime.api' from '/home/jennifer.badolato/watttime-python-client-aer-algo/watttime/api.py'>

In [None]:
class HistoricalDataProcessor:
    def __init__(self, username: str, password: str):
        """
        Initialize the HistoricalDataProcessor with credentials.
        
        Parameters:
        -----------
        username : str
            WattTime API username
        password : str
            WattTime API password
        """
        self.username = username
        self.password = password
        self.TZ_DICTIONARY = {
            "CAISO_NORTH": "America/Los_Angeles",
            "SPP_TX": "America/Chicago",
            "FPL": "America/New_York",
            "SOCO": "America/Chicago",
            "PJM_CHICAGO": "America/Chicago",
            "LDWP": "America/Los_Angeles",
            "PJM_DC": "America/New_York",
            "NYISO_NYC": "America/New_York",
            "PACE": "America/Denver",
            "PNM": "America/Denver",
            "MISO_INDIANAPOLIS": "America/Indiana/Indianapolis",
            "WALC": "America/Phoenix",
            "ERCOT_AUSTIN": "America/Chicago",
            "SPP_KANSAS": "America/Chicago",
            "ISONE_VT": "America/New_York",
            "SPP_SIOUX": "America/Chicago",
            "SC": "America/New_York",
        }
        
    def get_timezone_from_dict(self, key: str, dictionary: dict = None) -> str:
        """
        Retrieve the timezone value from the dictionary based on the given key.
        
        Parameters:
        -----------
        key : str
            The key whose corresponding timezone value is to be retrieved.
        dictionary : dict, optional
            The dictionary from which to retrieve the value.
            
        Returns:
        --------
        str or None
            The timezone value corresponding to the given key if it exists, otherwise None.
        """
        if dictionary is None:
            dictionary = self.TZ_DICTIONARY
        return dictionary.get(key)

    def get_raw_file_from
    
    def prepare_set_of_historic_actuals(
        self,
        full_history,
        start_time,
        end_time
        ):

        moer_list = full_history.loc[
                (full_history["point_time"] >= start_time - timedelta(minutes=5)) &
                (full_history["point_time"] <= end_time - timedelta(minutes=5))
                ]
        return moer_list
        
    def get_historical_fcst_data(self, session_start_time: datetime, horizon: int, region: str) -> pd.DataFrame:
        """
        Retrieve historical forecast data for a specific plug-in time, horizon, and region.
        
        Parameters:
        -----------
        session_start_time : datetime
            The time at which the EV was plugged in.
        horizon : int
            The number of hours to forecast ahead.
        region : str
            The region for which to retrieve the forecast data.
            
        Returns:
        --------
        pd.DataFrame
            A DataFrame containing historical forecast data.
        """
        time_zone = self.get_timezone_from_dict(region)
        session_start_time = pd.Timestamp(convert_to_utc(session_start_time, time_zone))
        horizon = math.ceil(horizon / 12)
        hist_data = WattTimeForecast(self.username, self.password)
        
        return hist_data.get_historical_forecast_pandas(
            start=session_start_time - pd.Timedelta(minutes=5),
            end=session_start_time,
            horizon_hours=horizon,
            region=region,
        )
        
    def get_historical_actual_data(self, session_start_time: datetime, horizon: int, region: str) -> pd.DataFrame:
        """
        Retrieve historical actual data for a specific plug-in time, horizon, and region.
        
        Parameters:
        -----------
        session_start_time : datetime
            The time at which the EV was plugged in.
        horizon : int
            The number of hours to retrieve data for.
        region : str
            The region for which to retrieve the actual data.
            
        Returns:
        --------
        pd.DataFrame
            A DataFrame containing historical actual data.
        """
        time_zone = self.get_timezone_from_dict(region)
        session_start_time = pd.Timestamp(convert_to_utc(session_start_time, time_zone))
        horizon = math.ceil(horizon / 12)
        hist_data = WattTimeHistorical(self.username, self.password)
        
        return hist_data.get_historical_pandas(
            start=session_start_time - pd.Timedelta(minutes=5),
            end=session_start_time + pd.Timedelta(hours=horizon),
            region=region,
        )
    

In [6]:
region = "PJM_CHICAGO"

full_forecast = s3.load_parquetdataframe(
    f"complete_2024_forecast_history/{region}.parquet"
).drop_duplicates()

full_forecast["point_time"] = pd.to_datetime(
    full_forecast["point_time"], utc=True
)

full_history = s3.load_parquetdataframe(
    f"complete_2024_actual_history/{region}.parquet"
).drop_duplicates()

In [9]:
full_forecast.head()

Unnamed: 0,point_time,value,generated_at,region
0,2023-12-31 06:00:00+00:00,1176.8,2023-12-31 06:00:00+00:00,PJM_CHICAGO
1,2023-12-31 06:05:00+00:00,1176.8,2023-12-31 06:00:00+00:00,PJM_CHICAGO
2,2023-12-31 06:10:00+00:00,1176.2,2023-12-31 06:00:00+00:00,PJM_CHICAGO
3,2023-12-31 06:15:00+00:00,1175.0,2023-12-31 06:00:00+00:00,PJM_CHICAGO
4,2023-12-31 06:20:00+00:00,1172.8,2023-12-31 06:00:00+00:00,PJM_CHICAGO


In [10]:
# Sanitize input data
df_req["sanitize_intervals_plugged_in"] = df_req.apply(lambda x: sanitize_total_intervals(x.total_intervals_plugged_in), axis=1)
df_req["sanitize_time_needed"] = df_req.apply(lambda x: sanitize_time_needed(x.total_seconds_to_95, x.length_of_session_in_seconds), axis=1)

### Inputs

In [36]:
synth_data = df_req.sample(100).copy()
synth_data.session_start_time = pd.to_datetime(synth_data.session_start_time)
synth_data.session_end_time = pd.to_datetime(synth_data.session_end_time)

## Looping

In [13]:
import warnings
warnings.filterwarnings("ignore")

In [14]:
def prepare_set_of_historic_actuals(
        full_history,
        start_time,
        end_time
        ):

        moer_list = full_history.loc[
                (full_history["point_time"] >= start_time - timedelta(minutes=5)) &
                (full_history["point_time"] <= end_time - timedelta(minutes=5))
                ]
        return moer_list

def prepare_set_of_forecasts(
                forecasts,
                start_time,
                end_time,
                increment
    ):
        inc_times = pd.date_range(
            start_time,
            end_time,
            freq=timedelta(minutes=increment),
        ).tolist()

        moer_list = [
            forecasts.loc[
            forecasts["generated_at"] == timestamp].sort_values(by=["point_time"], ascending=True)
            for timestamp in inc_times
        ]

        return moer_list

def get_recalculating_optimizer_results(
    region: str,
    moer_list: pd.DataFrame,
    start_time: datetime,
    end_time: datetime,
    usage_power_kw,
    time_needed,
    increment,
    charge_per_interval = None,
):
    
    if charge_per_interval is None:
        wt_opt_rc = wt.RecalculatingWattTimeOptimizer(
            region=region,
            watttime_username=username,
            watttime_password=password,
            usage_time_required_minutes=time_needed,
            usage_power_kw=usage_power_kw,
            optimization_method="auto"
            )
    else:
        wt_opt_rc = wt.RecalculatingWattTimeOptimizerWithContiguity(
            username,
            password,
            region,
            usage_time_required_minutes=time_needed,
            usage_power_kw=usage_power_kw,
            optimization_method='auto',
            charge_per_interval=charge_per_interval,
        )

    new_start_time = start_time
    while new_start_time < end_time:
        for fcst_data in moer_list:
            new_start_time = pd.Timestamp(fcst_data["point_time"].min())
            wt_opt_rc.get_new_schedule(
                    new_start_time=new_start_time,
                    new_end_time=end_time,
                    curr_fcst_data=fcst_data
                )
    print("combining schedules")
    usage_plan = wt_opt_rc.get_combined_schedule(end_time=end_time)
    usage_plan["requery_increment"] = increment

    return usage_plan
     

In [37]:
all_synth_users_list = []
increments = [5,30,60,120,240]
forecast_length=[]
for i in range(0,synth_data.shape[0]):
    try:
        loc_num = i
        time_zone = evu.get_timezone_from_dict(region)                    
        start_time_utc = pd.Timestamp(evu.convert_to_utc(synth_data.iloc[loc_num]['session_start_time'].round('5min') , time_zone))
        end_time_utc = pd.Timestamp(evu.convert_to_utc(synth_data.iloc[loc_num]['session_end_time'].round('5min'), time_zone))
        time_needed = synth_data.iloc[loc_num]["sanitize_time_needed"]
        total_intervals_plugged_in = synth_data.iloc[loc_num]["sanitize_intervals_plugged_in"]
        usage_power_kw = float(synth_data.iloc[loc_num]["power_output_rate"])
        user_type = synth_data.iloc[loc_num]["user_type"]
        optimization_method = "auto"
        print(user_type)
        
        results_dfs = []
        for increment in increments:
            try:
                print(increment)
                moer_list = prepare_set_of_forecasts(
                forecasts=full_forecast,
                increment=increment,
                start_time=start_time_utc,
                end_time=end_time_utc
                )

                results = get_recalculating_optimizer_results(
                region=region,
                moer_list = moer_list,
                start_time=start_time_utc,
                end_time=end_time_utc,
                time_needed=time_needed,
                usage_power_kw=usage_power_kw,
                increment=increment
                )

                results_dfs.append(results)
            except:
                continue

        analysis = pd.concat(results_dfs)

        # baseline + ideal
        moer_list_actuals = prepare_set_of_historic_actuals(
            full_history=full_history,
            start_time=start_time_utc,
            end_time=end_time_utc
            )
        
        ideal = evu.get_schedule_and_cost_api(
                total_time_horizon = total_intervals_plugged_in,
                usage_power_kw=usage_power_kw,
                time_needed=time_needed,
                optimization_method="auto",
                moer_data=moer_list_actuals,
                charge_per_interval=None
                )
        ideal["requery_increment"] = "ideal"

        baseline = evu.get_schedule_and_cost_api(
                total_time_horizon = total_intervals_plugged_in,
                usage_power_kw=usage_power_kw,
                time_needed=time_needed,
                optimization_method="baseline",
                moer_data=moer_list_actuals,
                charge_per_interval=None
                )
        baseline["requery_increment"] = "baseline"

        analysis_full = pd.concat([analysis,baseline,ideal]).merge(moer_list_actuals, on="point_time", how="left")
        analysis_full["emissions_co2e_lb_actual"] = analysis_full["value"]*analysis_full["energy_usage_mwh"]
        analysis_full["user_type"] = user_type
        all_synth_users_list.append(analysis_full)
    except:
        forecast_length.append((total_intervals_plugged_in,increment,user_type))
        continue

r28.560000000000002_tc91_avglc24582_sdlc7271_contFalse
5
remaining_time: 70
== Simple fit! ==
remaining_time: 70
== Simple fit! ==
ctx1: (Timestamp('2024-11-04 20:15:00+0000', tz='UTC'), Timestamp('2024-11-05 02:15:00+0000', tz='UTC'))
ctx2: 2024-11-04 20:15:00+00:00
new_schedule_start_time: 2024-11-04 20:20:00+00:00
remaining_time: 70
== Simple fit! ==
ctx1: (Timestamp('2024-11-04 20:20:00+0000', tz='UTC'), Timestamp('2024-11-05 02:15:00+0000', tz='UTC'))
ctx2: 2024-11-04 20:20:00+00:00
new_schedule_start_time: 2024-11-04 20:25:00+00:00
remaining_time: 65
== Simple fit! ==
ctx1: (Timestamp('2024-11-04 20:25:00+0000', tz='UTC'), Timestamp('2024-11-05 02:15:00+0000', tz='UTC'))
ctx2: 2024-11-04 20:25:00+00:00
new_schedule_start_time: 2024-11-04 20:30:00+00:00
remaining_time: 60
== Simple fit! ==
ctx1: (Timestamp('2024-11-04 20:30:00+0000', tz='UTC'), Timestamp('2024-11-05 02:15:00+0000', tz='UTC'))
ctx2: 2024-11-04 20:30:00+00:00
new_schedule_start_time: 2024-11-04 20:35:00+00:00
remain

In [38]:
# requery doesn't work when the total_intervals_plugged_in < (increment/5*2)
# total_intervals_plugged_in, increment, user_type)
forecast_length

[(144, 240, 'r21.675_tc72_avglc29853_sdlc7487_contFalse'),
 (144, 240, 'r23.8425_tc53_avglc28543_sdlc6935_contFalse'),
 (72, 240, 'r31.11_tc77_avglc27510_sdlc7415_contFalse'),
 (144, 240, 'r32.2575_tc87_avglc23569_sdlc7676_contFalse'),
 (72, 240, 'r38.1225_tc87_avglc27831_sdlc7882_contFalse'),
 (72, 240, 'r21.76_tc59_avglc26230_sdlc7739_contFalse'),
 (36, 240, 'r34.934999999999995_tc78_avglc23891_sdlc7200_contFalse'),
 (144, 240, 'r34.7225_tc88_avglc24107_sdlc6871_contFalse'),
 (144, 240, 'r31.322499999999998_tc123_avglc21525_sdlc6970_contFalse')]

In [43]:
analysis = pd.concat(all_synth_users_list)

In [44]:
analysis.head()

Unnamed: 0,point_time,pred_moer,usage,emissions_co2e_lb,energy_usage_mwh,requery_increment,value,region,emissions_co2e_lb_actual,user_type
0,2024-11-04 20:15:00+00:00,1212.3,0.0,0.0,0.0,5,1158.0,PJM_CHICAGO,0.0,r28.560000000000002_tc91_avglc24582_sdlc7271_c...
1,2024-11-04 20:20:00+00:00,1204.4,0.0,0.0,0.0,5,1165.0,PJM_CHICAGO,0.0,r28.560000000000002_tc91_avglc24582_sdlc7271_c...
2,2024-11-04 20:25:00+00:00,1047.0,5.0,2.49186,0.00238,5,1156.0,PJM_CHICAGO,2.75128,r28.560000000000002_tc91_avglc24582_sdlc7271_c...
3,2024-11-04 20:30:00+00:00,1046.1,5.0,2.489718,0.00238,5,1182.0,PJM_CHICAGO,2.81316,r28.560000000000002_tc91_avglc24582_sdlc7271_c...
4,2024-11-04 20:35:00+00:00,1047.4,5.0,2.492812,0.00238,5,1182.0,PJM_CHICAGO,2.81316,r28.560000000000002_tc91_avglc24582_sdlc7271_c...


In [45]:
analysis = analysis.drop_duplicates(subset=["user_type","requery_increment","point_time"])

In [47]:
analysis.groupby(["user_type","requery_increment"])["emissions_co2e_lb_actual"].sum()

user_type                                     requery_increment
r21.5475_tc109_avglc25119_sdlc7182_contFalse  5                    50.952655
                                              30                   50.911356
                                              60                   50.746158
                                              120                  51.135809
                                              baseline             49.943514
                                                                     ...    
r38.1225_tc82_avglc24756_sdlc7934_contFalse   60                   35.012339
                                              120                  31.088899
                                              240                  31.003123
                                              baseline             37.064601
                                              ideal                 2.477963
Name: emissions_co2e_lb_actual, Length: 481, dtype: float64

In [48]:
s3.store_csvdataframe(analysis,"results/analysis_requery_20241208.csv")
s3.store_csvdataframe(synth_data,"results/synth_data_requery_20241208.csv")

Successful S3 put_object response. Status - 200
Successful S3 put_object response. Status - 200
