In [1]:
import pandas as pd
import numpy as np

from dataset import get_dataset, add_derivatives
from env_discrete import BatteryDiscrete
from qlearning import QLearning

from sklearn.preprocessing import MinMaxScaler

from tqdm import tqdm

from plot import display_profit, display_schedule
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import datetime
import warnings


In [2]:
df = get_dataset()

In [3]:
df_train = df[df.timestamp.dt.year==2021].reset_index(drop=True)
df_test = df[df.timestamp.dt.year<2021].reset_index(drop=True)

scaler = MinMaxScaler()
df_train["scaled_price"] = scaler.fit_transform(df_train.price.to_numpy().reshape(-1, 1))
df_test["scaled_price"]  = scaler.transform(df_test.price.to_numpy().reshape(-1, 1))

In [22]:
derivatives_rolling_windows = [3, 2]

df_train = add_derivatives(df_train, colname="price", nders=2, rolling_windows=derivatives_rolling_windows)
df_test = add_derivatives(df_test, colname="price", nders=2, rolling_windows=derivatives_rolling_windows)

price_bins, price_der1_bins, price_der2_bins = 10, 10, 3

# we places prices into discretized bins based on quantiles
df_train["dprice"], price_quantiles = pd.qcut(df_train.price, q=price_bins, retbins=True, labels=[i for i in range(price_bins)])
# for the test data we set bottom bound of bottom bin and upper bound of upper bin to be infinity
price_quantiles[0], price_quantiles[-1] = -np.inf, np.inf
# we use same quantiles as train data to place test data into appropriate bins
df_test["dprice"] = pd.cut(df_test.price, bins=price_quantiles, labels=[i for i in range(price_bins)])

# we repeat above process for first and second price derivatives (rolling differences)
df_train["dprice_der1"], price_der1_quantiles = pd.qcut(df_train.price_der1, q=price_der1_bins, retbins=True, labels=[i for i in range(price_der1_bins)])
price_der1_quantiles[0], price_der1_quantiles[-1] = -np.inf, np.inf
df_test["dprice_der1"] = pd.cut(df_test.price_der1, bins=price_der1_quantiles, labels=[i for i in range(price_der1_bins)])

df_train["dprice_der2"], price_der2_quantiles = pd.qcut(df_train.price_der2, q=price_der2_bins, retbins=True, labels=[i for i in range(price_der2_bins)])
price_der2_quantiles[0], price_der2_quantiles[-1] = -np.inf, np.inf
df_test["dprice_der2"] = pd.cut(df_test.price_der2, bins=price_der2_quantiles, labels=[i for i in range(price_der2_bins)])

# Get environments

In [23]:
discrete_cols = ["dprice_der1", "dprice_der2"]
nbins = [price_der1_bins, price_der2_bins]

train_env = BatteryDiscrete(df_train, k=5, nbins=nbins, discrete_cols=discrete_cols, start_hour=sum(derivatives_rolling_windows))
test_env = BatteryDiscrete(df_test, k=5, nbins=nbins, discrete_cols=discrete_cols, start_hour=sum(derivatives_rolling_windows))


invalid value encountered in cast



# Q-Learning policy

In [27]:
train_env.reset()
state_shape = tuple(nbins) + (3,)
model = QLearning(train_env, state_shape=state_shape, nactions=3,epsilon=0.1)
model.Q[:,:,0,0] = - np.inf # if SOC == 0 (action 0), action 0 (discharge) is impossible
model.Q[:,:,2,2] = - np.inf # if SOC == 1 (index 2), action 2 (charge) is impossible
model.learn(total_timesteps=len(df_train)*100)

In [28]:
(model.Q!=0).mean()

0.8740740740740741

In [35]:
model.Q[9,2,:]

array([[       -inf,  0.14217031,  0.22502838],
       [-0.00106217,  0.        ,  0.        ],
       [ 0.02965729,  0.10662444,        -inf]])

# Test on train env:

In [30]:
cum_reward, df_optim = train_env.test(model)

100%|█████████▉| 8754/8760 [00:00<00:00, 144642.57it/s]


In [31]:
display_profit(df_optim)

# Test on test env

In [32]:
cum_reward, df_optim = test_env.test(model)

100%|█████████▉| 8777/8783 [00:00<00:00, 117848.91it/s]


In [33]:
display_schedule(df_optim)

In [34]:
display_profit(df_optim)