In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%cd ..

# Scripts for generating events and running environment

In [None]:
from datetime import datetime, timedelta
import os
import pytz

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style("darkgrid")

from sustaingym.data.load_moer import load_monthly_moer
from sustaingym.envs.evcharging import EVChargingEnv, RealTraceGenerator, GMMsTraceGenerator, DiscreteActionWrapper
from sustaingym.algorithms.evcharging.baselines import GreedyAlgorithm, MPC, RandomAlgorithm, RLAlgorithm, OfflineOptimal


test_ranges = (
    ('2019-05-01', '2019-08-31'),
    ('2019-09-01', '2019-12-31'),
    ('2020-02-01', '2020-05-31'),
    ('2021-05-01', '2021-08-31'),
)


## Run environment Simple

In [None]:
env = EVChargingEnv(RealTraceGenerator('caltech', test_ranges[0]))

done = False
obs, episode_info = env.reset(seed=100, return_info=True)
steps = 0
while not done:
    action = np.ones((54,))
    obs, reward, done, info = env.step(action)
    steps += 1

print(steps)
print(info['reward_breakdown'])

## Real trace generator + GMMs trace generator

In [None]:
# Check able to generate on all days
for site in ['caltech', 'jpl']:
    for test_range in test_ranges:
        print('testing: ', site, test_range)
        rtg = RealTraceGenerator(site, test_range)
        for _ in range(123):  # 4 months -> 123 days maximum
            _, _, num_plug_events = rtg.get_event_queue()
        print(rtg)


for site in ['caltech', 'jpl']:
    for test_range in test_ranges:
        print('testing: ', site, test_range)
        gmmg = GMMsTraceGenerator(site, test_range)
        for _ in range(123):  # 4 months -> 123 days maximum
            _, _, num_plug_events = gmmg.get_event_queue()
        print(gmmg)

# Checking the Environment makes sense
- sanity check on rewards for policies
- seed setting
- printing, __repr__, step, reset, render, close
- discrete action wrapper

# Basic
Run selective full charge on environment

In [None]:
env = EVChargingEnv(GMMsTraceGenerator('caltech', test_ranges[0]))
print('--- Print environment ---')
print(repr(env))
done = False

obs = env.reset(seed=100)
steps = 0
while not done:
    action = np.where(obs['demands'] > 0, 1, 0)
    obs, reward, done, info = env.step(action, return_info=True)
    steps += 1

try:
    env.render()
except NotImplementedError:
    print("Render is not implemented as expected")
env.close()

print("--- Number of steps taken ---")
print(steps)

print("--- Print keys for info ---")
print(info.keys())
# print('evs: ', info['evs'])
print('num_evs: ', info['num_evs'])
print('avg_plugin_time: ', info['avg_plugin_time'])
print('max_profit: ', info['max_profit'])
print('reward_breakdown: ', info['reward_breakdown'])
print('moer: ', info['moer'])
print('active_evs: ', info['active_evs'])
print('pilot_signals: ', info['pilot_signals'])


## Rewards from policies

In [None]:
policies = ['random', 'full', 'selective_full']
env1 = EVChargingEnv(RealTraceGenerator('caltech', test_ranges[0]))
env2 = EVChargingEnv(RealTraceGenerator('caltech', test_ranges[0]), project_action_in_env=False)

np.random.seed(42)
random_action = np.random.randint(0, 5, size=(54,)).astype(float) / 4

for policy in policies:
    for env in [env1, env2]:
        num_episodes = 1
        rewards = []
        reward_comps = {'profit': 0, 'carbon_cost': 0, 'excess_charge': 0}
        for _ in range(num_episodes):
            obs, info = env.reset(seed=43, return_info=True)
            done = False
            tot_reward = 0
            timestep = 0
            while not done:
                timestep += 1
                if policy == 'random':
                    action = random_action
                elif policy == 'full':
                    action = np.full((54,), 1)
                elif policy == 'none':
                    action = np.zeros((54,))
                else:
                    action = np.where(obs['demands'] > 0, 1, 0)
                obs, reward, done, info = env.step(action)

                tot_reward += reward
                # for reward_comp in info['reward']:
                #     reward_comps[reward_comp] += info['reward'][reward_comp]
            rewards.append(tot_reward)
        print(f'{policy} {env.project_action_in_env}')
        
        rd = info['reward_breakdown']
        print("Outside: ", tot_reward, "Inside: ", rd['profit'] - rd['carbon_cost'] - rd['excess_charge'])
        print("Best possible: ", info['max_profit'])
        print('reward components:', info['reward_breakdown'])  # total reward contribution over num_episodes


## Check seed setting is done correctly

In [None]:
generators = []
for sequential in [False, True]:
    gen = RealTraceGenerator('caltech', test_ranges[1], sequential=sequential)
    generators.append(gen)

gen = GMMsTraceGenerator('caltech', test_ranges[1])
generators.append(gen)

for gen in generators:
    print('--- ', gen)
    for seed in [None, 11]:
        print(f'seed is {seed}')
        for _ in range(3):
            gen.set_seed(seed)
            days = []
            for _ in range(7):
                _, _, num_event = gen.get_event_queue()
                days.append(gen.day.strftime("%m/%d"))
            print(days)

## Discrete Action Wrapper

In [None]:
env = DiscreteActionWrapper(EVChargingEnv(RealTraceGenerator('caltech', test_ranges[0])))

action = np.random.randint(0, 5, size=(54,))

done = False
obs, episode_info = env.reset(seed=100, return_info=True)
steps = 0
while not done:
    obs, reward, done, info = env.step(action)
    steps += 1

print(steps)
print(info['reward_breakdown'])

## Easy Algorithms

In [None]:
env_discrete = DiscreteActionWrapper(EVChargingEnv(RealTraceGenerator('caltech', test_ranges[2])))
env_continuous = EVChargingEnv(RealTraceGenerator('caltech', test_ranges[2]))

algorithms = {
    'random': RandomAlgorithm,
    'mpc': MPC,
    'greedy': GreedyAlgorithm,
}

for k in algorithms:
    for action_type, env in zip(['discrete', 'continuous'], [env_discrete, env_continuous]):
        try:
            algorithm = algorithms[k](env)

            reward_breakdown = algorithm.run(seeds=5)
            print(f'{k}, {action_type}')
            print(reward_breakdown)
        except AssertionError as e:
            print(e)

In [None]:
rb1 = reward_breakdown.copy()
rb2 = reward_breakdown.copy()

In [None]:
pd.concat([pd.DataFrame({}), rb1])

In [None]:
rb1['timestep'] = 25
rb2['timestep'] = 50

In [None]:
np.array(rb1['reward'])

## Hard Algorithm: Offline Optimal

In [None]:
env_continuous = EVChargingEnv(RealTraceGenerator('caltech', test_ranges[2]))
oo = OfflineOptimal(env_continuous)

reward_breakdown = oo.run(seeds=[2])
print(reward_breakdown)


In [None]:
#       reward     profit  carbon_cost  excess_charge  max_profit
# 0  10.925368  13.099406     2.174038            0.0    14.45262

In [None]:
for i in range(54):
    plt.plot(oo.traj.value[i])
plt.hlines(y=6/32, xmin=0, xmax=288, color='b')

In [None]:
print([(ev.remaining_demand, ev.requested_energy, ev.energy_delivered, ev.departure - ev.arrival) for ev in env_continuous.evs])

In [None]:
env_continuous.A_PERS_TO_KWH * 32# * env_continuous.ACTION_SCALE_FACTOR

In [None]:
reward_breakdown.to_csv("test_for_to_csv.csv", compression='gzip', index=False)
rb = pd.read_csv("test_for_to_csv.csv", compression='gzip')

rb.head()

In [None]:
import pdb

ga = GreedyAlgorithm(project_action=True)
mpc1 = MPC(lookahead=1)
mpc2 = MPC(lookahead=2)
mpc6 = MPC(lookahead=6)
mpc12 = MPC(lookahead=12)
mpc36 = MPC(lookahead=36)

lbls = ['mpc1']
algs = [mpc1]
# lbls = ['greedy', 'mpc1', 'mpc2', 'mpc6', 'mpc12', 'mpc36']
# algs = [ga, mpc1, mpc2, mpc6, mpc12, mpc36]

DEFAULT_DATE_RANGES = (
    ('2019-05-01', '2019-08-31'),
    ('2019-09-01', '2019-12-31'),
    ('2020-02-01', '2020-05-31'),
    ('2021-05-01', '2021-08-31'),
)

# bug fix: commit 18be9933cbf14e19e17332c9a870f480471eea86
# 2019-08-14: StationOccupiedError: Station CA-303 is occupied with ev 2_39_139_28_2019-08-15 07:07:28.618042.
# 2019-10-25: StationOccupiedError: Station CA-317 is occupied with ev 2_39_91_437_2019-10-26 07:36:37.638121. -> change mask: mask = (self.day.day == max_depart.dt.day) instead of mask = (df['arrival'].dt.day == max_depart.dt.day)
# 2020-03-15: StationOccupiedError: Station CA-303 is occupied with ev 2_39_139_28_2020-03-16 07:51:17.415039.

DATE_FORMAT = '%Y-%m-%d'
def num_days_in_period(xs) -> int:
    """Returns the number of days in period."""
    dts = tuple(datetime.strptime(x, DATE_FORMAT) for x in xs)
    td = dts[1] - dts[0]
    return td.days + 1

for alg, lbl in zip(algs, lbls):
    for season in DEFAULT_DATE_RANGES:
        gen = RealTraceGenerator('caltech', season, sequential=True)
        env = EVChargingEnv(gen)
        rewards, breakdown = alg.run(num_days_in_period(season), env)

        print(f'{lbl} average reward: ', np.mean(rewards))
        print(f'{lbl} rewards: ', rewards)
        print(f'{lbl} reward breakdown: ', breakdown)