In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%cd ..

In [None]:
!ls

In [None]:
%run examples/train.py -y 2019 -m PPO -l 0.0001
%run examples/train -y 2021 -m PPO -l 0.0001
%run examples/train -y 2019 -d -m DQN -l 0.0001
%run examples/train -y 2021 -d -m DQN -l 0.0001
%run examples/train -y 2019 -d -m A2c -l 0.0001

In [None]:
import time

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style("darkgrid", {"grid.color": ".6", "grid.linestyle": ":"})

from stable_baselines3 import PPO, A2C, DQN
from stable_baselines3.common.callbacks import EvalCallback, CallbackList, StopTrainingOnNoModelImprovement
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv



from sustaingym.envs import ElectricityMarketEnv
from sustaingym.envs.battery.plot_utils import get_follow_offline_optimal, get_offline_optimal, setup_episode_plot, plot_episode, plot_model_training_reward_curves, plot_reward_distribution, plot_state_of_charge_and_prices, plot_reward_over_episode, run_model_for_evaluation
from sustaingym.envs.battery.wrapped import DiscreteActions

In [None]:
ax = plot_model_training_reward_curves(None, 'DQN_discrete_2019_g0.9999_lr0.0001', ['in_dist', 'out_dist'])
plt.savefig('examples/ppo_reward_curves.png', dpi=300)

In [None]:
env_2021 = ElectricityMarketEnv(month='2021-05', seed=215)


ppo_2019 = PPO.load('examples/PPO_2019_g0.9999_lr0.0001/model.zip')
ppo_2021 = PPO.load('examples/PPO_2021_g0.9999_lr0.0001/model.zip')
dqn_2019 = DQN.load('examples/DQN_discrete_2019_g0.9999_lr0.0001/model.zip')
dqn_2021 = DQN.load('examples/DQN_discrete_2021_g0.9999_lr0.0001/model.zip')

ax = plot_reward_distribution(None, env_2021, [ppo_2021, ppo_2019, dqn_2021, dqn_2019],
    ['ppo in dist', 'ppo out dist', 'dqn in dist', 'dqn out dist'], '2021')

plt.xticks(rotation=30)

plt.savefig('algo_comp_2021.png', dpi=300)

In [None]:
env_2019 = ElectricityMarketEnv(month='2019-05', seed=195)

dqn = DQN.load('examples/DQN_discrete_2019_g0.9999_lr0.0001/model.zip')

results = run_model_for_evaluation(dqn, 1, DiscreteActions(env_2019))

In [None]:
import datetime
import gym
import matplotlib.dates as mdates
import matplotlib.ticker as plticker

df_load = pd.read_csv('sustaingym/data/demand_data/CAISO-demand-2019-05.csv.gz', compression='gzip', index_col=0)

env = ElectricityMarketEnv(month='2019-05', seed=195)
discrete_env = DiscreteActions(env)

a2c = A2C.load('examples/A2C_discrete_2019_g0.9999_lr0.0001/model.zip')

ax, ax2 = plot_state_of_charge_and_prices(None, df_load, a2c, 'a2c in-dist', discrete_env)

plt.savefig('prices_and_soc_over_episode.png', dpi=300)

In [None]:
env = ElectricityMarketEnv(month='2019-05', seed=195)
env = DiscreteActions(env)
ppo = PPO.load('examples/PPO_2019_g0.9999_lr0.0001/model.zip')

ax, ax2 = plot_reward_over_episode(None, ppo, env)

plt.savefig('reward_over_epsiode.png', dpi=300)

In [None]:
env = ElectricityMarketEnv(month='2019-05', seed=195)

# run offline optimal
env.reset(seed=0)
results = get_offline_optimal(seeds=[0], env=env)
dispatch = results['dispatch']
offline_data = {
    'rewards': results['rewards'][0],
    'prices': results['prices'][0],
    'model_name': 'offline',
    'energy_level': results['energy'][0]
}

# run follow optimal deterministic model
env.reset(seed=0)
follow_rewards, follow_energy, follow_prices = get_follow_offline_optimal(seeds=[0],
                                                env=env, optimal_dispatches=dispatch, optimal_eng_lvl=results['energy'])
follow_offline_data = {
    'rewards': follow_rewards[0],
    'prices': follow_prices[0],
    'model_name': 'follow offline',
    'energy_level': follow_energy[0]
}

# run PPO model
env.reset(seed=0)
ppo = PPO.load('examples/PPO_2019_g0.9999_lr0.0001/model.zip')
ppo_results = run_model_for_evaluation(ppo, 1, env, False)

ppo_data = {
    'rewards': ppo_results['rewards'][0],
    'prices': ppo_results['prices'][0],
    'model_name': 'ppo',
    'energy_level': ppo_results['energies'][0],
    'bids': ppo_results['actions'][0]
}

# run DQN model
env.reset(seed=0)
dqn = DQN.load('examples/DQN_discrete_2019_g0.9999_lr0.0001/model.zip')
dqn_results = run_model_for_evaluation(dqn, 1, env, True)

dqn_data = {
    'rewards': dqn_results['rewards'][0],
    'prices': dqn_results['prices'][0],
    'model_name': 'ppo',
    'energy_level': dqn_results['energies'][0]
}

fig, axs, times = setup_episode_plot(env, '2019-05', include_bids=True)
plot_episode(axs, times, **offline_data)
plot_episode(axs, times, **follow_offline_data)
plot_episode(axs, times, **ppo_data)
plot_episode(axs, times, **dqn_data)

for ax in axs:
    ax.legend()

plt.savefig('epsiode_plot.png', dpi=300)