In [None]:
import pandas as pd
import numpy as np

from dataset import get_dataset, add_derivatives
from env_continuous import Battery
from qlearning import QLearning

from sklearn.preprocessing import MinMaxScaler

from tqdm import tqdm

from plot import display_profit, display_schedule
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import datetime
import warnings
import os

from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import DQN, PPO

In [None]:
df = get_dataset(year=2020)
# df_Austria = get_dataset(year=2020, country='Austria')

In [None]:
df_train = df[df.timestamp.dt.year==2021].reset_index(drop=True)
df_eval = df[df.timestamp.dt.year==2020].reset_index(drop=True)
df_test = df[df.timestamp.dt.year==2022].reset_index(drop=True)

# scaler = MinMaxScaler()
# df_train["scaled_price"] = scaler.fit_transform(df_train.price.to_numpy().reshape(-1, 1))
# df_test["scaled_price"]  = scaler.transform(df_test.price.to_numpy().reshape(-1, 1))

rolling_mean_price_hours = 24
df_train["rolling_mean_price"] = df_train.price.rolling(rolling_mean_price_hours).mean()
df_eval["rolling_mean_price"] = df_eval.price.rolling(rolling_mean_price_hours).mean()
df_test["rolling_mean_price"] = df_test.price.rolling(rolling_mean_price_hours).mean()


In [None]:
def add_rolling_history_prices(df, n_days=14, k=3):
    cols = []
    df["p_0"] = df.price #/ df.price.rolling(n_days*24).mean()
    cols.append("p_0")

    for i in range(1, k):
        cols.append(f"p_{i}")
        df[f"p_{i}"] = df[f"p_{i-1}"].shift(1)

    # def get_price(i):
    #     if i - (24*n_days) < 0:
    #         return np.nan
    #     return df.p_0[i-(24*(n_days)):i:24].mean()
    
    df["h_0"] = df.p_0.rolling(n_days*24).apply(lambda x: x[::24].mean())
    # df["h_0"] = np.array([get_price(i) for i in range(len(df))])
    cols.append("h_0")

    for h in range(1, 24):
        col_name = f"h_{h}"
        df[col_name] = df[f"h_{h-1}"].shift(1)
        cols.append(str(col_name))

    return df, cols


In [None]:
n_days = 14
k = 5
df_train, cols = add_rolling_history_prices(df_train, n_days=n_days, k=k)
df_eval, cols = add_rolling_history_prices(df_eval, n_days=n_days, k=k)
df_test, cols = add_rolling_history_prices(df_test, n_days=n_days, k=k)

In [None]:
start_hour = (n_days * 24 * 2) + max(24, k) - 3

def reward(env, action):
    if action == 2:
        return env.df.rolling_mean_price[env.hour]-env.df.price[env.hour] 
    
    if action == 0:
        return env.df.price[env.hour]-env.df.rolling_mean_price[env.hour]
    
    return 0

reward = None

train_env = Battery(df_train,cols,start_hour=start_hour, reward_function=reward)
train_env_copy = Battery(df_train,cols,start_hour=start_hour, reward_function=reward)
test_env = Battery(df_test,cols,start_hour=start_hour, reward_function=reward)
eval_env = Battery(df_eval,cols,start_hour=start_hour, reward_function=reward)


In [None]:
check_env(train_env)

In [None]:
from stable_baselines3.common.callbacks import EvalCallback, CallbackList, BaseCallback

class Cometlogger(BaseCallback):

    """
    Custom callback to plot additional values in comet.
    """

    def __init__(self, experiment, train_env_copy,eval_env,eval_freq=10000):

        super(Cometlogger, self).__init__()
        self.eval_env = eval_env
        self.train_env = train_env_copy
        self.eval_freq = eval_freq
        self.experiment = experiment

    def _on_step(self) -> bool:
        if self.eval_freq > 0 and self.n_calls % self.eval_freq == 0:

            reward_train, df_optim_train = self.train_env.test(self.model)
            reward_eval, df_optim_eval = self.eval_env.test(self.model)

            profit_train = - (df_optim_train.price * df_optim_train.schedule).sum() / 10**6
            profit_eval = - (df_optim_eval.price * df_optim_eval.schedule).sum() / 10**6

            self.experiment.log_metric("profit_train",profit_train )
            self.experiment.log_metric("profit_eval",profit_eval )

            self.experiment.log_metric("reward_train",reward_train )
            self.experiment.log_metric("reward_eval",reward_eval)
            print("---------")
            print("profit_train ", profit_train)
            print("profit_eval ", profit_eval)
            print("---------")

        return True


In [None]:
# Import comet_ml at the top of your file
from comet_ml import Experiment

# Create an experiment with your api key
experiment = Experiment(
    api_key="GYoAMnAcbnbZ9p1PurkZCaSX0",
    project_name="battery-rl",
    workspace="albanpuech",
)

# Report multiple hyperparameters using a dictionary:
hyper_params = {
    "k":k,
    "n_days":n_days,
    "rolling_mean_price_hours": rolling_mean_price_hours,
    "reward": "difference of valuations with current price",
#
}
experiment.log_parameters(hyper_params)


In [None]:
experiment.log_code(os.path.abspath("env_continuous_nb.ipynb"))

In [None]:
train_env.reset()

model = PPO("MlpPolicy", train_env, verbose=1)



logger_callback = Cometlogger(experiment, train_env_copy, eval_env,
                              eval_freq=10000)
eval_callback = EvalCallback(eval_env, best_model_save_path="./logs2/",
                             log_path="./logs2/", eval_freq=10000,
                             deterministic=True, render=False)
callback = CallbackList([logger_callback,eval_callback])

model.learn(total_timesteps=len(df_train)*500, callback=callback)
experiment.end()


In [None]:
model = model.load('logs/best_model')

In [None]:
cum_reward, df_optim = test_env.test(model)

df_optim = df_optim[test_env.start_hour:]
display_schedule(df_optim)

display_profit(df_optim)

In [None]:
experiment.end()