In [1]:
import gym
from gym import spaces
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None 

import matplotlib.pyplot as plt
from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import DQN
from env import get_dataset, Battery

from plot import display_profit, display_schedule

from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm



import plotly.express as px
import datetime


import plotly.graph_objects as go
from plotly.subplots import make_subplots

import datetime
import warnings

from scipy.optimize import linprog


# Get datasets and envs

In [2]:
k = 5 
df = get_dataset()

df_train = df[df.timestamp.dt.year==2021].reset_index(drop=True)
df_test = df[df.timestamp.dt.year==2022].reset_index(drop=True)

scaler = MinMaxScaler()
df_train["scaled_price"] = scaler.fit_transform(df_train.price.to_numpy().reshape(-1, 1))
df_test["scaled_price"]  = scaler.transform(df_test.price.to_numpy().reshape(-1, 1))

train_env = Battery(df_train,k=k)
test_env = Battery(df_test,k=k)

# Get mean hourly prices from training set :

In [3]:
df_hourly = df_train.groupby(df_train.timestamp.dt.hour).mean().reset_index()


  df_hourly = df_train.groupby(df_train.timestamp.dt.hour).mean().reset_index()


# Get optimized schedule for mean hourly prices

In [4]:

P_day =  df_hourly.price

hourly_policy_env = Battery(df_hourly,k=1)


      ## (I) lhs <= rhs
lhs_ineq1 = np.tril(np.ones((24,24)))
rhs_ineq1 = np.ones(24) * (hourly_policy_env.NEC)

## (II) lhs <= rhs
lhs_ineq2 = -np.tril(np.ones((24,24)))
rhs_ineq2 = np.zeros(24)

## (III)
bnd = np.array([(-hourly_policy_env.E1H, hourly_policy_env.E1H) for i in range(24)])

rhs_ineq = np.hstack((rhs_ineq1,rhs_ineq2))
lhs_ineq = np.vstack((lhs_ineq1,lhs_ineq2))


# format stuff as the linprog function wants
opt = linprog(c=P_day, A_ub=lhs_ineq, b_ub=rhs_ineq,bounds=bnd)
t  = list(opt.x)
# t = t[1:]+t[:1]

def policy(hour, E1H) :
      return (np.array(t)/E1H +1)[hour-1] 

# Test policy on test environment 

In [5]:
obs = test_env.reset()
reward_list = []
for i in tqdm(range(len(test_env.df))) :
    hour = test_env.df.timestamp.dt.hour[test_env.hour]
    action = policy(hour,test_env.E1H)
    obs, reward, done, _ = test_env.step(action)
    reward_list.append(reward)
    if done : break

df_optim = test_env._get_info()


100%|█████████▉| 8754/8760 [00:11<00:00, 773.48it/s]


In [6]:
display_profit(df_optim)

In [7]:
display_schedule(df_optim)