In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%cd ..

In [None]:
!ls

In [None]:
import time

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style("darkgrid", {"grid.color": ".6", "grid.linestyle": ":"})

from stable_baselines3 import PPO, A2C, DQN
from stable_baselines3.common.callbacks import EvalCallback, CallbackList, StopTrainingOnNoModelImprovement
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv



from sustaingym.envs import ElectricityMarketEnv
from sustaingym.envs.battery.plot_utils import plot_model_training_reward_curves, plot_reward_distribution, plot_state_of_charge_and_prices, plot_reward_over_episode, run_model_for_evaluation
from sustaingym.envs.battery.wrapped import DiscreteActions

In [None]:
ax = plot_model_training_reward_curves(None, 'PPO', ['in_dist', 'out_dist'])

plt.savefig('examples/logs_PPO/ppo_reward_curves.png', dpi=300)

In [None]:
ax = plot_model_training_reward_curves(None, 'A2C', ['in_dist', 'out_dist'])

plt.savefig('examples/logs_A2C/a2c_reward_curves.png', dpi=300)

In [None]:
env_2019 = ElectricityMarketEnv(month='2019-05', seed=195)

ppo = PPO.load('examples/model_PPO_2019_5.zip')
a2c = A2C.load('examples/model_A2C_2019_5.zip')

ax = plot_reward_distribution(None, env_2019, [ppo, a2c],
    ['ppo in dist', 'a2c in dist'], 10, '2019')

plt.xticks(rotation=30)

plt.savefig('algo_comp_2019.png', dpi=300)



In [None]:
env_2021 = BatteryStorageInGridEnv(month='2021-05', seed=215)

ppo = PPO.load('examples/model_PPO_2019_5.zip')
a2c = A2C.load('examples/model_A2C_2019_5.zip')

ax = plot_reward_distribution(None, env_2021, [ppo, a2c],
    ['ppo out dist', 'a2c out dist'], 10, '2021')

plt.savefig('algo_comp_2021.png', dpi=300)

In [None]:
import datetime
import gym
import matplotlib.dates as mdates
import matplotlib.ticker as plticker

# class DiscreteActions(gym.ActionWrapper):
#     def __init__(self, env: ElectricityMarketEnv):
#         super().__init__(env)
#         self.env = env
#         self.charge_action = (env.action_space.high[0], env.action_space.high[0])
#         self.discharge_action = (0.01*env.action_space.high[0], 0.01*env.action_space.high[0])
#         self.action_space = gym.spaces.Discrete(2)
    
#     def action(self, action: int):
#         assert action in [0, 1]
#         if action == 0:
#             return self.charge_action
#         else:
#             return self.discharge_action
    
#     def _calculate_prices_without_agent(self) -> np.ndarray:
#         return self.env._calculate_prices_without_agent()
    
#     def _calculate_price_taking_optimal(
#             self, prices: np.ndarray, init_charge: float,
#             final_charge: float) -> tuple[float, np.ndarray]:
#         return self.env._calculate_price_taking_optimal(prices, init_charge, final_charge)
    
#     def _calculate_terminal_cost(self, agent_battery_charge: float) -> float:
#         return self.env._caculate_terminal_cost(agent_battery_charge)

df_load = pd.read_csv('sustaingym/data/demand_data/CAISO-demand-2019-05.csv.gz', compression='gzip', index_col=0)

env = ElectricityMarketEnv(month='2019-05', seed=195)
discrete_env = DiscreteActions(env)

a2c = A2C.load('examples/discrete_model_A2C_2019_5.zip')

ax, ax2 = plot_state_of_charge_and_prices(None, df_load, a2c, 'a2c in-dist', discrete_env)

plt.savefig('prices_and_soc_over_episode.png', dpi=300)

In [None]:
env = ElectricityMarketEnv(month='2019-05', seed=195)
env = DiscreteActions(env)
ppo = PPO.load('examples/discrete_model_PPO_2019_5.zip')

ax, ax2 = plot_reward_over_episode(None, ppo, env)

plt.savefig('reward_over_epsiode.png', dpi=300)