In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%cd ..

In [None]:
import os

import numpy as np
import seaborn as sns
import stable_baselines3 as sb3
from tqdm.auto import tqdm

from sustaingym.envs import ElectricityMarketEnv
from sustaingym.envs.battery.plot_utils import *
from sustaingym.envs.battery.wrapped import DiscreteActions
from sustaingym.evaluate.run_electricitymarket import *


sns.set_style("darkgrid", {"grid.color": ".6", "grid.linestyle": ":"})

In [None]:
env_interm = ElectricityMarketEnv(month='2021-05', seed=215, use_intermediate_rewards=True)
env = ElectricityMarketEnv(month='2021-05', seed=215, use_intermediate_rewards=False)
discrete_env = DiscreteActions(env)
reset_seed = 15
seeds = np.arange(30)

## Run offline models

In [None]:
opt_results = run_offline_optimal(seeds, env)
save_results(opt_results, seeds=seeds, path='examples/termreward/offline_results.npz')

In [None]:
opt_results = np.load('examples/termreward/offline_results.npz')
follow_results = run_follow_offline_optimal(
    seeds, env,
    opt_dispatches=opt_results['dispatch'],
    opt_energies=opt_results['energy'])
save_results(follow_results, seeds=seeds, path='examples/termreward/follow_offline_results.npz')

In [None]:
results = run_random(seeds, env, discrete=False)
save_results(results, seeds=seeds, path='examples/termreward/random_results.npz')

In [None]:
results = run_random(seeds, discrete_env, discrete=True)
save_results(results, seeds=seeds, path='examples/termreward/random_discrete_results.npz')

## Run RL Models

In [None]:
# determine best PPO 2019 model
for model_name in ['PPO_2019_g0.9999_lr0.003', 'PPO_2019_g0.9999_lr0.0003', 'PPO_2019_g0.9999_lr3e-05']:
    evals_path = f'examples/termreward/{model_name}/eval2019/evaluations.npz'
    npz = np.load(evals_path)
    print(npz['results'].mean(axis=1).max())

In [None]:
# determine best PPO 2021 model
for model_name in ['PPO_2021_g0.9999_lr0.003', 'PPO_2021_g0.9999_lr0.0003', 'PPO_2021_g0.9999_lr3e-05']:
    evals_path = f'examples/termreward/{model_name}/eval2021/evaluations.npz'
    npz = np.load(evals_path)
    print(npz['results'].mean(axis=1).max())

In [None]:
# determine best SAC 2019 model
for model_name in ['SAC_2019_g0.9999_lr0.003', 'SAC_2019_g0.9999_lr0.0003', 'SAC_2019_g0.9999_lr3e-05']:
    evals_path = f'examples/termreward/{model_name}/eval2019/evaluations.npz'
    npz = np.load(evals_path)
    print(npz['results'].mean(axis=1).max())

In [None]:
# determine best SAC 2021 model
for model_name in ['SAC_2021_g0.9999_lr0.003', 'SAC_2021_g0.9999_lr0.0003', 'SAC_2021_g0.9999_lr3e-05']:
    evals_path = f'examples/termreward/{model_name}/eval2021/evaluations.npz'
    npz = np.load(evals_path)
    print(npz['results'].mean(axis=1).max())

In [None]:
# determine best DQN 2019 model
for model_name in ['DQN_discrete_2019_g0.9999_lr0.001', 'DQN_discrete_2019_g0.9999_lr0.0001', 'DQN_discrete_2019_g0.9999_lr1e-05']:
    evals_path = f'examples/termreward/{model_name}/eval2019/evaluations.npz'
    npz = np.load(evals_path)
    print(npz['results'].mean(axis=1).max())

In [None]:
# determine best DQN 2021 model
for model_name in ['DQN_discrete_2021_g0.9999_lr0.001', 'DQN_discrete_2021_g0.9999_lr0.0001', 'DQN_discrete_2021_g0.9999_lr1e-05']:
    evals_path = f'examples/termreward/{model_name}/eval2021/evaluations.npz'
    npz = np.load(evals_path)
    print(npz['results'].mean(axis=1).max())

In [None]:
ppo2019_model_dir = ''
ppo2021_model_dir = ''
sac2019_model_dir = 'examples/termreward/SAC_2019_g0.9999_lr0.0003/'
sac2021_model_dir = 'examples/termreward/SAC_2021_g0.9999_lr3e-05/'
dqn2019_model_dir = 'examples/termreward/DQN_discrete_2019_g0.9999_lr0.001/'
dqn2021_model_dir = 'examples/termreward/DQN_discrete_2021_g0.9999_lr0.001/'

In [None]:
ppo2019 = sb3.SAC.load(os.path.join(ppo2019_model_dir, 'eval2019/best_model.zip'))
results = run_model(ppo2019, seeds=seeds, env=env, discrete=False)
save_results(results, seeds=seeds, path=os.path.join(ppo2019_model_dir, 'eval2021/results.npz'))

In [None]:
ppo2021 = sb3.SAC.load(os.path.join(ppo2021_model_dir, 'eval2021/best_model.zip'))
results = run_model(ppo2021, seeds=seeds, env=env, discrete=False)
save_results(results, seeds=seeds, path=os.path.join(ppo2021_model_dir, 'eval2021/results.npz'))

In [None]:
sac2019 = sb3.SAC.load(os.path.join(sac2019_model_dir, 'eval2019/best_model.zip'))
results = run_model(sac2019, seeds=seeds, env=env, discrete=False)
save_results(results, seeds=seeds, path=os.path.join(sac2019_model_dir, 'eval2021/results.npz'))

In [None]:

sac2021 = sb3.SAC.load(os.path.join(sac2021_model_dir, 'eval2021/best_model.zip'))
results = run_model(sac2021, seeds=seeds, env=env, discrete=False)
save_results(results, seeds=seeds, path=os.path.join(sac2021_model_dir, 'eval2021/results.npz'))

In [None]:
dqn2019 = sb3.DQN.load(os.path.join(dqn2019_model_dir, 'eval2019/best_model.zip'))
results = run_model(dqn2019, seeds=seeds, env=discrete_env, discrete=True)
save_results(results, seeds=seeds, path=os.path.join(dqn2019_model_dir, 'eval2021/results.npz'))

In [None]:
dqn2021 = sb3.DQN.load(os.path.join(dqn2021_model_dir, 'eval2021/best_model.zip'))
results = run_model(dqn2021, seeds=seeds, env=discrete_env, discrete=True)
save_results(results, seeds=seeds, path=os.path.join(dqn2021_model_dir, 'eval2021/results.npz'))

## Read results and make plots

In [None]:
results_paths = {
    'offline': 'examples/termreward/offline_results.npz',
    'follow offline': 'examples/termreward/follow_offline_results.npz',
    'rand': 'examples/termreward/random_results.npz',
    'rand (discrete)': 'examples/termreward/random_discrete_results.npz',

    'PPO (2019)': os.path.join(ppo2019_model_dir, 'eval2021/results.npz'),
    'PPO (2021)': os.path.join(ppo2021_model_dir, 'eval2021/results.npz'),
    'SAC (2019)': os.path.join(sac2019_model_dir, 'eval2021/results.npz'),
    'SAC (2021)': os.path.join(sac2021_model_dir, 'eval2021/results.npz'),
    'DQN (2019)': os.path.join(dqn2019_model_dir, 'eval2021/results.npz'),
    'DQN (2021)': os.path.join(dqn2021_model_dir, 'eval2021/results.npz')
}

In [None]:
results = {label: np.load(path) for label, path in results_paths.items()}

In [None]:
fig, ax = plot_returns(results)
# fig.savefig('plots/em_returns_termreward.png', dpi=300, pad_inches=0, bbox_inches='tight')

In [None]:
seed = 13
ep_data = {}
for label, d in results.items():
    data = {k: d[k][seed] for k in ['rewards', 'prices', 'energy']}
    data['model_name'] = label
    if 'SAC (2021)' in label:
        data['bids'] = d['actions'][seed]
    ep_data[label] = data

In [None]:
env.reset(seed)
fig, axs, times = setup_episode_plot(env, '2021-05', include_returns=False, include_bids=True)
for label in ['offline', 'follow offline', 'random', 'random (discrete)', 'SAC (2021)', 'DQN (2021)', 'PPO (2021)']:
    plot_episode(axs, times, **ep_data[label])

for plot in ['prices', 'energy', 'bids']:
    axs[plot].legend(bbox_to_anchor=(1,1))

In [None]:
fig.savefig('plots/em_episode_termreward.png', dpi=300, pad_inches=0, bbox_inches='tight')