# TODO: update this notebook

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%cd ../..

In [None]:
import os

import numpy as np
import seaborn as sns
# import stable_baselines3 as sb3
from tqdm.auto import tqdm

from sustaingym.envs import CongestedElectricityMarketEnv
from sustaingym.envs.electricitymarket.plot_utils import *
from sustaingym.envs.electricitymarket.wrapped import DiscreteActions, CongestedDiscreteActions
from examples.electricitymarket.run_electricitymarket import *


sns.set_style("darkgrid", {"grid.color": ".6", "grid.linestyle": ":"})

In [None]:
env = CongestedElectricityMarketEnv(use_intermediate_rewards=True)
discrete_env = CongestedDiscreteActions(env)
reset_seed = 15
seeds = np.arange(30)

In [None]:
env = CongestedElectricityMarketEnv(month="2020-07",use_intermediate_rewards=True)

seeds = range(30)

results = run_random(seeds, env, False)

In [None]:
from collections import defaultdict

ep_rewards = np.sum(results['rewards'], axis=1)

lst_ep_rewards = list(ep_rewards)

rand_data = defaultdict(list)

rand_data['seeds'] = seeds
rand_data['ep_rewards'] = lst_ep_rewards

rand_df = pd.DataFrame(rand_data)
rand_df.to_csv('random_results.csv', index=False)

In [None]:
env = CongestedElectricityMarketEnv(month="2020-07",use_intermediate_rewards=True)

seeds = range(30)

results = run_mpc(seeds, env)

## Run offline models

In [None]:
opt_results = run_offline_optimal(seeds, env)
save_results(opt_results, seeds=seeds, path='examples/congested_intermreward/offline_results.npz')

In [None]:
opt_results = np.load('examples/congested_intermreward/offline_results.npz')
follow_results = congested_run_follow_offline_optimal(
    seeds, env,
    opt_dispatches=opt_results['dispatch'],
    opt_energies=opt_results['energy'])
save_results(follow_results, seeds=seeds, path='examples/congested_intermreward/follow_offline_results.npz')

In [None]:
results = run_random(seeds, env, discrete=False)
save_results(results, seeds=seeds, path='examples/congested_intermreward/random_results.npz')

In [None]:
results = run_random(seeds, discrete_env, discrete=True)
save_results(results, seeds=seeds, path='examples/congested_intermreward/random_discrete_results.npz')

## Train RL Models

### PPO Models

In [None]:

%run examples/train_rllib -m 7 -i -a ppo -l 3e-03 -o ppo_summer_interm_lr3e-03

In [None]:
# Trained on 2020 February data (evaluating on 2020 May data during training phase) with intermediate rewards and learning rate of 0.0003
%run examples/train_rllib -m 2 -v 5 -i -a ppo -l 0.0003 -o examples/interm_results

In [None]:
# Trained on 2020 February data (evaluating on 2020 May data during training phase) with intermediate rewards and learning rate of 3e-05
%run examples/train_rllib -m 2 -v 5 -i -a ppo -l 3e-05 -o examples/interm_results

In [None]:
# Trained on 2020 February data (evaluating on 2020 May data during training phase) with terminal rewards and learning rate of 0.003
%run examples/train_rllib -m 2 -v 5 -a ppo -l 0.003 -o examples/interm_results

In [None]:
# Trained on 2020 February data (evaluating on 2020 May data during training phase) with terminal rewards and learning rate of 0.0003
%run examples/train_rllib -m 2 -v 5 -a ppo -l 0.0003 -o examples/interm_results

In [None]:
# Trained on 2020 February data (evaluating on 2020 May data during training phase) with terminal rewards and learning rate of 3e-05
%run examples/train_rllib -m 2 -v 5 -a ppo -l 3e-05 -o examples/interm_results

## Read results and make plots

In [None]:
results_paths = {
    'oracle': 'examples/congested_intermreward/offline_results.npz',
    'follow oracle': 'examples/congested_intermreward/follow_offline_results.npz',
    'rand': 'examples/congested_intermreward/random_results.npz',
    'rand discrete': 'examples/congested_intermreward/random_discrete_results.npz',

    # 'PPO (2019)': os.path.join(ppo2019_model_dir, 'eval2021/results.npz'),
    # 'PPO (2021)': os.path.join(ppo2021_model_dir, 'eval2021/results.npz'),
    # 'PPO discrete (2019)': os.path.join(ppodiscrete2019_model_dir, 'eval2021/results.npz'),
    # 'PPO discrete (2021)': os.path.join(ppodiscrete2021_model_dir, 'eval2021/results.npz'),
    # 'SAC (2019)': os.path.join(sac2019_model_dir, 'eval2021/results.npz'),
    # 'SAC (2021)': os.path.join(sac2021_model_dir, 'eval2021/results.npz'),
    # 'DQN (2019)': os.path.join(dqn2019_model_dir, 'eval2021/results.npz'),
    # 'DQN (2021)': os.path.join(dqn2021_model_dir, 'eval2021/results.npz')
}

In [None]:
results = {label: np.load(path) for label, path in results_paths.items()}

In [None]:
results['oracle']['rewards']

In [None]:
fig, ax = plot_returns(results, ylim=(-16000, 3000))
fig.savefig('plots/em_returns.png', dpi=300, pad_inches=0, bbox_inches='tight')

In [None]:
seed = 13
ep_data = {}
for label, d in results.items():
    data = {k: d[k][seed] for k in ['rewards', 'prices', 'energy']}
    data['model_name'] = label
    if 'SAC (2021)' in label:
        data['bids'] = d['actions'][seed]
    ep_data[label] = data

In [None]:
env.reset(seed)
fig, axs, times = setup_episode_plot(env, '2020-05', include_returns=True, include_bids=True)
for label in ['oracle', 'follow oracle', 'rand', 'rand discrete']:
    plot_episode(axs, times[:env.MAX_STEPS_PER_EPISODE], **ep_data[label])

for plot in ['prices', 'energy', 'rewards', 'bids']:
    axs[plot].legend(bbox_to_anchor=(1,1))

fig.savefig('plots/episode_plot.png', dpi=300, pad_inches=0, bbox_inches='tight')

In [None]:
max_price = env.action_space.high[0][0, 0]

env.settlement_interval = 12

charge_action = np.zeros((2,1))
# charge_action[:, 0] = np.array([max_price*0.95, max_price/0.95])
charge_action = np.array([[*charge_action], ] * (env.settlement_interval+1)).transpose().reshape(2,1,(env.settlement_interval+1))
charge_action[:, 0, 0] = np.array([max_price*0.95, max_price/0.95])

discharge_action = np.zeros((2,1))
# discharge_action[:, 0] = np.array([-10000, -100000])
discharge_action = np.array([[*discharge_action], ] * (env.settlement_interval+1)).transpose().reshape(2,1,(env.settlement_interval+1))
# discharge_action[:, 0, 0] = np.array([-10000, -10000])
discharge_action[:, 0, 0] = np.array([0, -max_price/0.95])

no_action = np.zeros((2,1))
# no_action[:, 0] = np.array([-100, max_price/0.95])
no_action = np.array([[*no_action], ] * (env.settlement_interval+1)).transpose().reshape(2,1,(env.settlement_interval+1))
no_action[:, 0, 0] = np.array([-max_price*0.95, max_price/0.95])

# env.reset(seed = 67)

# obs, reward, _, _ = env.step(charge_action)

# print("charge dispatch: ", env.dispatch)

env.reset()

obs, reward, _, _ = env.step(no_action)
obs, reward, _, _ = env.step(no_action)
obs, reward, _, _ = env.step(no_action)

# env.reset(seed = 67)

# obs, reward, _, _ = env.step(no_action)

# print("no action dispatch: ", env.dispatch)

# env.reset()

# while True:
#     action = env.action_space.sample()
#     obs, reward, done, _ = env.step(action)

#     if env.dispatch >= 0 or done:
#         print("dispatch: ", env.dispatch)
#         print("action: ", action[:, 0, 0])
#         break

In [None]:
env.reset()

print("opportunity cost for returning to init charge: ", env._calculate_terminal_cost(40.))
print("opportunity cost for returning to below init charge: ", env._calculate_terminal_cost(20.))
print("opportunity cost for returning to above init charge: ", env._calculate_terminal_cost(60.))


In [None]:
import pandas as pd

df = pd.read_csv('../sustaingym/data/demand_data/DAY_AHEAD_regional_Load.csv')

days = set(df['Day'])
months = set(df['Month'])

for month in months:
    for day in days:
        hrs = df.loc[df['Month'] == month].loc[df['Day'] == day]['Period']

        if set(hrs) != {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}:
            print("Month: ", month)
            print("Day: ", day)