### Import Library Dependencies

In [1]:
import os
import sys
import traceback
import logging as log
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from microgrid_env import MicrogridEnv
from q_learning_agent import QLearningAgent
from torch.utils.tensorboard.writer import SummaryWriter

log.basicConfig(format="Line:%(lineno)d-%(funcName)s-%(levelname)s:  %(message)s")
log.getLogger().setLevel(log.INFO)

### Environment Data Pre-processing

In [2]:
df = pd.read_csv("load_profile_data_nov2024.csv", header=0)

In [3]:
df.head()

Unnamed: 0,entry_time,weekday,tou_time_slot,grid_import_energy,solar_prod_energy,solar_ctlr_setpoint,site_load_energy
0,11/1/24 0:00,Friday,o,1339.4,0.0,100.0,1339.4
1,11/1/24 1:00,Friday,o,1388.9,0.0,100.0,1388.9
2,11/1/24 2:00,Friday,o,1444.8,0.0,100.0,1444.8
3,11/1/24 3:00,Friday,o,1463.7,0.0,100.0,1463.7
4,11/1/24 4:00,Friday,o,1467.0,0.0,100.0,1467.0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 720 entries, 0 to 719
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   entry_time           720 non-null    object 
 1   weekday              720 non-null    object 
 2   tou_time_slot        720 non-null    object 
 3   grid_import_energy   720 non-null    float64
 4   solar_prod_energy    720 non-null    float64
 5   solar_ctlr_setpoint  720 non-null    float64
 6   site_load_energy     720 non-null    float64
dtypes: float64(4), object(3)
memory usage: 39.5+ KB


In [5]:
df['timestamp'] = pd.to_datetime(df['entry_time'], format="mixed")

In [6]:
df['ts_hour'] = df['timestamp'].dt.hour

In [7]:
def convert_weekday(wkd: str) -> str:
    if wkd in ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']:
        return 'week'
    else:
        return wkd.lower()

In [8]:
# One-hot encode the Weekday field
df['weekday_fmt'] = df['weekday'].map(lambda wkd: convert_weekday(wkd))
df['day_week'] = df['weekday_fmt'].map(lambda wkd: 1 if wkd == 'week' else 0)
df['day_saturday'] = df['weekday_fmt'].map(lambda wkd: 1 if wkd == 'saturday' else 0)
df['day_sunday'] = df['weekday_fmt'].map(lambda wkd: 1 if wkd == 'sunday' else 0)

In [9]:
# One-hot encode the TOU Timeslot field
df['tou_offpeak'] = df['tou_time_slot'].map(lambda tou: 1 if tou == 'o' else 0)
df['tou_standard'] = df['tou_time_slot'].map(lambda tou: 1 if tou == 's' else 0)
df['tou_peak'] = df['tou_time_slot'].map(lambda tou: 1 if tou == 'p' else 0)

In [10]:
# Convert hour field to unit circle coordinates
df['ts_hour_sin'] = np.sin( df['ts_hour'] )
df['ts_hour_cos'] = np.cos( df['ts_hour'] )

In [11]:
df.set_index('timestamp', inplace=True)

In [12]:
df.drop(['entry_time', 'ts_hour', 'weekday', 'weekday_fmt', 'tou_time_slot'], axis=1, inplace=True)

In [13]:
load_profile_df = df[['ts_hour_sin', 'ts_hour_cos', 'tou_offpeak', 'tou_standard', 'tou_peak', 'day_week', 'day_saturday', 'day_sunday', 'site_load_energy', 'solar_prod_energy', 'solar_ctlr_setpoint', 'grid_import_energy']]

In [14]:
load_profile_df.head(48)

Unnamed: 0_level_0,ts_hour_sin,ts_hour_cos,tou_offpeak,tou_standard,tou_peak,day_week,day_saturday,day_sunday,site_load_energy,solar_prod_energy,solar_ctlr_setpoint,grid_import_energy
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2024-11-01 00:00:00,0.0,1.0,1,0,0,1,0,0,1339.4,0.0,100.0,1339.4
2024-11-01 01:00:00,0.841471,0.540302,1,0,0,1,0,0,1388.9,0.0,100.0,1388.9
2024-11-01 02:00:00,0.909297,-0.416147,1,0,0,1,0,0,1444.8,0.0,100.0,1444.8
2024-11-01 03:00:00,0.14112,-0.989992,1,0,0,1,0,0,1463.7,0.0,100.0,1463.7
2024-11-01 04:00:00,-0.756802,-0.653644,1,0,0,1,0,0,1467.0,0.0,100.0,1467.0
2024-11-01 05:00:00,-0.958924,0.283662,1,0,0,1,0,0,1402.91,24.21,100.0,1378.7
2024-11-01 06:00:00,-0.279415,0.96017,0,0,1,1,0,0,1458.7,173.8,100.0,1284.9
2024-11-01 07:00:00,0.656987,0.753902,0,0,1,1,0,0,1488.2,519.5,100.0,968.7
2024-11-01 08:00:00,0.989358,-0.1455,0,0,1,1,0,0,1442.5,851.6,100.0,590.9
2024-11-01 09:00:00,0.412118,-0.91113,0,1,0,1,0,0,1381.8,1106.1,100.0,275.7


In [15]:
load_profile_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
ts_hour_sin,720.0,0.040825,0.697899,-0.99999,-0.595763,0.07056,0.701904,0.990607
ts_hour_cos,720.0,-0.022538,0.715646,-0.999961,-0.680155,-0.070537,0.683713,1.0
tou_offpeak,720.0,0.443056,0.497092,0.0,0.0,0.0,1.0,1.0
tou_standard,720.0,0.411111,0.492377,0.0,0.0,0.0,1.0,1.0
tou_peak,720.0,0.145833,0.353184,0.0,0.0,0.0,0.0,1.0
day_week,720.0,0.7,0.458576,0.0,0.0,1.0,1.0,1.0
day_saturday,720.0,0.166667,0.372937,0.0,0.0,0.0,0.0,1.0
day_sunday,720.0,0.133333,0.340171,0.0,0.0,0.0,0.0,1.0
site_load_energy,720.0,1243.478583,327.529219,0.0,1063.915,1322.4,1488.25,1934.7
solar_prod_energy,720.0,306.043861,414.50255,0.0,0.0,36.0,561.375,1443.1


In [16]:
# Filter the load profile data for a specific date period
load_profile_filtered_df = load_profile_df.loc['2024-11-04':'2024-11-10']

In [17]:
load_profile_filtered_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 168 entries, 2024-11-04 00:00:00 to 2024-11-10 23:00:00
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   ts_hour_sin          168 non-null    float64
 1   ts_hour_cos          168 non-null    float64
 2   tou_offpeak          168 non-null    int64  
 3   tou_standard         168 non-null    int64  
 4   tou_peak             168 non-null    int64  
 5   day_week             168 non-null    int64  
 6   day_saturday         168 non-null    int64  
 7   day_sunday           168 non-null    int64  
 8   site_load_energy     168 non-null    float64
 9   solar_prod_energy    168 non-null    float64
 10  solar_ctlr_setpoint  168 non-null    float64
 11  grid_import_energy   168 non-null    float64
dtypes: float64(6), int64(6)
memory usage: 17.1 KB


### Run Environment

#### Fix Policy Agent

In [18]:
# env = MicrogridEnv(data=load_profile_filtered_df, debug_flag=True)

# # Action Space:
# # {0: 'charge-1500', 1: 'charge-1000', 2: 'charge-500', 3: 'do-nothing', 4: 'discharge-500', 5: 'discharge-1000', 6: 'discharge-1500'}

# episode_reward = 0.0

# state, reward, done = env.reset()

# episode_reward += reward

# for idx in range(168):

#     action = env.rule_based_policy()

#     state, reward, done = env.step(action=action)

#     episode_reward += reward

# print(f"Episode reward: {episode_reward: .2f}")

In [19]:
# Episode reward: -348359.38

#### Tabular Q-Learning Agent

In [20]:
# iter_no = 1
# total_num_steps = 6*168_000

# best_reward = -999_999

# daily_episode_reward = 0.0
# daily_episode_memory = []

# weekly_episode_reward = 0.0
# weekly_episode_memory = []

# agent = QLearningAgent(env=MicrogridEnv(data=load_profile_filtered_df, debug_flag=False), 
#                        epsilon_start=1.0, 
#                        epsilon_end=0.1, 
#                        decay_steps=total_num_steps)

# while True:
    
#     iter_no += 1

#     # Take one step in the environment
#     state, action, reward, next_state = agent.sample_env(use_fixed_rule_policy=False)

#     # Immediate q-value update
#     agent.value_update(state, action, reward, next_state)

#     daily_episode_reward += reward
#     daily_episode_memory.append( (state, action, reward, next_state) )
    
#     weekly_episode_reward += reward
#     weekly_episode_memory.append( (state, action, reward, next_state) )

#     if (iter_no % 24) == 0:

#         # print(f"[{iter_no}] daily episode reward: {daily_episode_reward: .2f}")

#         # Daily q-value update
#         for state_d, action_d, reward_d, next_state_d in daily_episode_memory:
#             agent.value_update(state_d, action_d, daily_episode_reward, next_state_d)

#         daily_episode_reward = 0.0
#         daily_episode_memory = []
        
#         if (iter_no % 168) == 0:
    
#             # print(f"[{iter_no}] weekly episode reward: {weekly_episode_reward: .2f}")

#             # Weekly q-value update
#             # for state_w, action_w, reward_w, next_state_w in weekly_episode_memory:
#             #     agent.value_update(state_w, action_w, weekly_episode_reward, next_state_w)
    
#             if weekly_episode_reward > best_reward:
#                 print(f"[{iter_no}] Best weekly episode reward updated {best_reward: .3f} -> {weekly_episode_reward: .3f}")
#                 best_reward = weekly_episode_reward
    
#             if weekly_episode_reward > -10.0:
                
#                 print(f"\nSolved in {iter_no} iterations !\n")

#                 agent.display_action_value_table()
    
#                 # print("\n\nRun final episode with trained agent: ")
#                 # final_reward = agent.run_test_episode(env=MicrogridEnv(data=load_profile_filtered_df, debug_flag=True))
#                 # print(f"Final reward: {final_reward: .2f}")
                
#                 break
    
#             if iter_no >= total_num_steps:
    
#                 print(f"\nBest weekly episode reward: {best_reward: .2f}\n")
                
#                 print("\nDone !!!\n")
    
#                 agent.display_action_value_table()
    
#                 # print("\n\nRun final episode with trained agent: ")
#                 # final_reward = agent.run_test_episode(env=MicrogridEnv(data=load_profile_filtered_df, debug_flag=True))
#                 # print(f"Final reward: {final_reward: .2f}")
                
#                 break
    
#             weekly_episode_reward = 0.0
#             weekly_episode_memory = []

In [21]:
# Rule based policy best episode reward -> -6380.69
# RL Agent policy best episode reward -> -5856.00

In [22]:
env = MicrogridEnv(data = load_profile_filtered_df, 
                   grid_notified_maximum_demand = 2000.0, 
                   bess_capacity = 3000.0, 
                   bess_cycle_efficiency = 0.9, 
                   bess_step_sizes = [1500.0, 1000.0, 500.0, 0.0, 500.0, 1000.0, 1500.0], 
                   tou_peak_tariff = 5.0, 
                   tou_standard_tariff = 2.0, 
                   tou_offpeak_tariff = 1.0, 
                   solar_ppa_tariff = 1.4,
                   debug_flag = True)

Environment Defaults: 

          Grid Notified Maximum Demand: 2000.0 kVA
          BESS Capacity: 3000.0 kWh
          BESS Actions: charge-1500, charge-1000, charge-500, do-nothing, discharge-500, discharge-1000, discharge-1500
          

Data Summary: 
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 168 entries, 2024-11-04 00:00:00 to 2024-11-10 23:00:00
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   ts_hour_sin          168 non-null    float64
 1   ts_hour_cos          168 non-null    float64
 2   tou_offpeak          168 non-null    int64  
 3   tou_standard         168 non-null    int64  
 4   tou_peak             168 non-null    int64  
 5   day_week             168 non-null    int64  
 6   day_saturday         168 non-null    int64  
 7   day_sunday           168 non-null    int64  
 8   site_load_energy     168 non-null    float64
 9   solar_prod_energy    168 non-null    float64


In [23]:
# {0: 'charge-1500', 1: 'charge-1000', 2: 'charge-500', 3: 'do-nothing', 4: 'discharge-500', 5: 'discharge-1000', 6: 'discharge-1500'}

In [32]:
# Fixed Policy

episode_reward = 0

state, reward, done = env.reset(index=3)


# TOU OFF-PEAK
# ==============================================================
# hour:3 -> do-nothing -> rw: 3:1000, 0:1000, 6:-3308
state, reward, done = env.step(action=3)
episode_reward += reward

# hour:4 -> charge -> rw: 
state, reward, done = env.step(action=3)
episode_reward += reward

# hour:5 -> charge -> rw: 
state, reward, done = env.step(action=3)
episode_reward += reward


# TOU PEAK
# ==============================================================
# hour:6 -> discharge -> rw: 3:1000, 0:1000, 6:3846
state, reward, done = env.step(action=6)
episode_reward += reward

# hour:7 -> discharge -> rw: 3:0, 0:-3212, 6:2241
state, reward, done = env.step(action=6)
episode_reward += reward

# hour:8 -> discharge -> rw: 3:0, 0:-4583, 6:1436
state, reward, done = env.step(action=0)
episode_reward += reward


# TOU STANDARD
# ==============================================================
# hour:9 -> do-nothing -> rw: 3:0, 0:1050, 6:-1145
state, reward, done = env.step(action=0)
episode_reward += reward

# hour:10 -> do-nothing -> rw: 0
state, reward, done = env.step(action=3)
episode_reward += reward

# hour:11 -> charge -> rw: 3:0, 0:806, 6:35
state, reward, done = env.step(action=0)
episode_reward += reward

# hour:12 -> charge -> rw: 3:0, 0:1365, 6:37
state, reward, done = env.step(action=6)
episode_reward += reward

# hour:13 -> charge -> rw: 3:0, 0:1000, 6:963
state, reward, done = env.step(action=0)
episode_reward += reward


print(f"""
Episode Reward: {episode_reward: .2f}
""")

# Episode reward (always do-nothing): -14790.76

# Episode reward (discharge on peak only): -8147.14

# Episode reward (discharge on peak, and charge when maximum solar): -1415.38

# Episode reward (optimal policy): 14981.45 (9789.63)


            [3] Solar Surplus Energy Calculation ->
                    Control Setpoint Ratio:  1.00
                    Solar Production Energy:  0.00
                    Solar Full Production Energy:  0.00
                    Solar Surplus Production Energy:  0.00
             

          [3] Environment State ->
                  hour_sin:  0.1411
                  hour_cos: -0.9900
                  tou_offpeak:  1
                  tou_standard:  0
                  tou_peak:  0
                  day_week:  1
                  day_saturday:  0
                  day_sunday:  0
                  site_load_energy:  1145.30 (kWh)
                  solar_prod_energy:  0.00 (kWh)
                  solar_ctlr_setpoint:  100.00 (%)
                  solar_vs_load_ratio:  0.00 (%)
                  grid_import_energy:  1145.30 (kWh)
                  bess_capacity:  3000.00 (kWh)
                  bess_cycle_efficiency:  0.90 (kWh)
                  bess_avail_discharge_energy:  3000.00 