In [11]:
import pandas as pd
import numpy as np

from dataset import get_dataset, add_derivatives
from env_continuous import Battery
from qlearning import QLearning

from sklearn.preprocessing import MinMaxScaler

from tqdm import tqdm

from plot import display_profit, display_schedule
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import datetime
import warnings

from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import DQN

In [12]:
df = get_dataset()

In [13]:
df_train = df[df.timestamp.dt.year==2020].reset_index(drop=True)
df_test = df[df.timestamp.dt.year==2021].reset_index(drop=True)

In [14]:
def add_rolling_history_prices(df, n_days=14, k=3):
    cols = []
    df["p_0"] = df.price / df.price.rolling(n_days*24).mean()
    cols.append("p_0")

    for i in range(1, k):
        cols.append(f"p_{i}")
        df[f"p_{i}"] = df[f"p_{i-1}"].shift(1)

    # def get_price(i):
    #     if i - (24*n_days) < 0:
    #         return np.nan
    #     return df.p_0[i-(24*(n_days)):i:24].mean()
    
    df["h_0"] = df_train.p_0.rolling(n_days*24).apply(lambda x: x[::24].mean())
    # df["h_0"] = np.array([get_price(i) for i in range(len(df))])
    cols.append("h_0")

    for h in range(1, 24):
        col_name = f"h_{h}"
        df[col_name] = df[f"h_{h-1}"].shift(1)
        cols.append(str(col_name))

    return df, cols


In [15]:
n_days = 14
k = 5
df_train, cols = add_rolling_history_prices(df_train, n_days=n_days, k=k)
df_test, cols = add_rolling_history_prices(df_test, n_days=n_days, k=k)

In [16]:
start_hour = (n_days * 24 * 2) + max(24, k) - 3

def reward(env, action):
    if action == 2:
        return -env.df.price[env.hour] 
    
    if action == 0:
        return env.df.price[env.hour]
    
    return 0

train_env = Battery(df_train,cols,start_hour=start_hour, reward_function=reward)
train_env_copy = Battery(df_train,cols,start_hour=start_hour, reward_function=reward)
test_env = Battery(df_test,cols,start_hour=start_hour, reward_function=reward)


In [17]:
check_env(train_env)

In [18]:
from stable_baselines3.common.callbacks import EvalCallback, CallbackList, BaseCallback

class Cometlogger(BaseCallback):

    """
    Custom callback to plot additional values in comet.
    """

    def __init__(self, experiment, train_env_copy,test_env,eval_freq=10000):

        super(Cometlogger, self).__init__()
        self.test_env = test_env
        self.train_env = train_env_copy
        self.eval_freq = eval_freq
        self.experiment = experiment

    def _on_step(self) -> bool:
        if self.eval_freq > 0 and self.n_calls % self.eval_freq == 0:

            reward_train, df_optim_train = self.train_env.test(self.model)
            reward_test, df_optim_test = self.test_env.test(self.model)

            profit_train = - (df_optim_train.price * df_optim_train.schedule).sum() / 10**6
            profit_test = - (df_optim_test.price * df_optim_test.schedule).sum() / 10**6

            self.experiment.log_metric("profit_train",profit_train )
            self.experiment.log_metric("profit_test",profit_test )

            self.experiment.log_metric("reward_train",reward_train )
            self.experiment.log_metric("reward_test",reward_test)
            print("---------")
            print("profit_train ", profit_train)
            print("profit_test ", profit_test)
            print("---------")

        return True


In [19]:
# Import comet_ml at the top of your file
from comet_ml import Experiment

# Create an experiment with your api key
experiment = Experiment(
    api_key="GYoAMnAcbnbZ9p1PurkZCaSX0",
    project_name="battery-rl",
    workspace="albanpuech",
)

# Report multiple hyperparameters using a dictionary:
hyper_params = {
    "k":k,
    
#
}
experiment.log_parameters(hyper_params)


COMET INFO: ---------------------------
COMET INFO: Comet.ml Experiment Summary
COMET INFO: ---------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.com/albanpuech/battery-rl/b1136009b30a42dd98b4129a0680948c
COMET INFO:   Uploads:
COMET INFO:     environment details      : 1
COMET INFO:     filename                 : 1
COMET INFO:     git metadata             : 1
COMET INFO:     git-patch (uncompressed) : 1 (34.54 KB)
COMET INFO:     installed packages       : 1
COMET INFO:     notebook                 : 1
COMET INFO:     source_code              : 1
COMET INFO: ---------------------------
COMET INFO: Experiment is live on comet.com https://www.comet.com/albanpuech/battery-rl/6559924d97f9492a8072a065ac686242



In [20]:
train_env.reset()

model = DQN("MlpPolicy", train_env, verbose=1)



logger_callback = Cometlogger(experiment, train_env_copy, test_env,
                              eval_freq=50000)

callback = CallbackList([logger_callback])

model.learn(total_timesteps=len(df_train)*500, callback=callback)
experiment.end()


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 8.09e+03 |
|    ep_rew_mean      | 1.9e+04  |
|    exploration_rate | 0.93     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 4811     |
|    time_elapsed     | 6        |
|    total_timesteps  | 32360    |
----------------------------------
---------
profit_train  -34.069
profit_test  -62.632
---------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 8.09e+03 |
|    ep_rew_mean      | 2.08e+04 |
|    exploration_rate | 0.86     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1207     |
|    time_elapsed     | 53       |
|    total_timesteps  | 64720    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 6.04     |
| 

In [None]:
cum_reward, df_optim = test_env.test(model)

display_schedule(df_optim)

display_profit(df_optim)