### Import All Library Dependencies

In [1]:
import os
import sys
import traceback
import logging as log
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from microgrid_env import MicrogridEnv
from q_learning_agent import QLearningAgent
from torch.utils.tensorboard.writer import SummaryWriter

log.basicConfig(format="Line:%(lineno)d-%(funcName)s-%(levelname)s:  %(message)s")
log.getLogger().setLevel(log.INFO)

### Data Pre-processing

In [2]:
df = pd.read_csv("load_profile_data_nov2024.csv", header=0)

In [3]:
df.head()

Unnamed: 0,entry_time,weekday,tou_time_slot,grid_import_energy,solar_prod_energy,solar_ctlr_setpoint,site_load_energy
0,11/1/24 0:00,Friday,o,1339.4,0.0,100.0,1339.4
1,11/1/24 1:00,Friday,o,1388.9,0.0,100.0,1388.9
2,11/1/24 2:00,Friday,o,1444.8,0.0,100.0,1444.8
3,11/1/24 3:00,Friday,o,1463.7,0.0,100.0,1463.7
4,11/1/24 4:00,Friday,o,1467.0,0.0,100.0,1467.0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 720 entries, 0 to 719
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   entry_time           720 non-null    object 
 1   weekday              720 non-null    object 
 2   tou_time_slot        720 non-null    object 
 3   grid_import_energy   720 non-null    float64
 4   solar_prod_energy    720 non-null    float64
 5   solar_ctlr_setpoint  720 non-null    float64
 6   site_load_energy     720 non-null    float64
dtypes: float64(4), object(3)
memory usage: 39.5+ KB


In [5]:
df['timestamp'] = pd.to_datetime(df['entry_time'], format="mixed")

In [6]:
df['ts_hour'] = df['timestamp'].dt.hour

In [7]:
def convert_weekday(wkd: str) -> str:
    if wkd in ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']:
        return 'week'
    else:
        return wkd.lower()

In [8]:
# One-hot encode the Weekday field
df['weekday_fmt'] = df['weekday'].map(lambda wkd: convert_weekday(wkd))
df['day_week'] = df['weekday_fmt'].map(lambda wkd: 1 if wkd == 'week' else 0)
df['day_saturday'] = df['weekday_fmt'].map(lambda wkd: 1 if wkd == 'saturday' else 0)
df['day_sunday'] = df['weekday_fmt'].map(lambda wkd: 1 if wkd == 'sunday' else 0)

In [9]:
# One-hot encode the TOU Timeslot field
df['tou_offpeak'] = df['tou_time_slot'].map(lambda tou: 1 if tou == 'o' else 0)
df['tou_standard'] = df['tou_time_slot'].map(lambda tou: 1 if tou == 's' else 0)
df['tou_peak'] = df['tou_time_slot'].map(lambda tou: 1 if tou == 'p' else 0)

In [10]:
# Convert hour field to unit circle coordinates
df['ts_hour_sin'] = np.sin( df['ts_hour'] )
df['ts_hour_cos'] = np.cos( df['ts_hour'] )

In [11]:
df.set_index('timestamp', inplace=True)

In [12]:
df.drop(['entry_time', 'ts_hour', 'weekday', 'weekday_fmt', 'tou_time_slot'], axis=1, inplace=True)

In [13]:
load_profile_df = df[['ts_hour_sin', 'ts_hour_cos', 'tou_offpeak', 'tou_standard', 'tou_peak', 'day_week', 'day_saturday', 'day_sunday', 'site_load_energy', 'solar_prod_energy', 'solar_ctlr_setpoint', 'grid_import_energy']]

In [14]:
load_profile_df.head(48)

Unnamed: 0_level_0,ts_hour_sin,ts_hour_cos,tou_offpeak,tou_standard,tou_peak,day_week,day_saturday,day_sunday,site_load_energy,solar_prod_energy,solar_ctlr_setpoint,grid_import_energy
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2024-11-01 00:00:00,0.0,1.0,1,0,0,1,0,0,1339.4,0.0,100.0,1339.4
2024-11-01 01:00:00,0.841471,0.540302,1,0,0,1,0,0,1388.9,0.0,100.0,1388.9
2024-11-01 02:00:00,0.909297,-0.416147,1,0,0,1,0,0,1444.8,0.0,100.0,1444.8
2024-11-01 03:00:00,0.14112,-0.989992,1,0,0,1,0,0,1463.7,0.0,100.0,1463.7
2024-11-01 04:00:00,-0.756802,-0.653644,1,0,0,1,0,0,1467.0,0.0,100.0,1467.0
2024-11-01 05:00:00,-0.958924,0.283662,1,0,0,1,0,0,1402.91,24.21,100.0,1378.7
2024-11-01 06:00:00,-0.279415,0.96017,0,0,1,1,0,0,1458.7,173.8,100.0,1284.9
2024-11-01 07:00:00,0.656987,0.753902,0,0,1,1,0,0,1488.2,519.5,100.0,968.7
2024-11-01 08:00:00,0.989358,-0.1455,0,0,1,1,0,0,1442.5,851.6,100.0,590.9
2024-11-01 09:00:00,0.412118,-0.91113,0,1,0,1,0,0,1381.8,1106.1,100.0,275.7


In [15]:
load_profile_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
ts_hour_sin,720.0,0.040825,0.697899,-0.99999,-0.595763,0.07056,0.701904,0.990607
ts_hour_cos,720.0,-0.022538,0.715646,-0.999961,-0.680155,-0.070537,0.683713,1.0
tou_offpeak,720.0,0.443056,0.497092,0.0,0.0,0.0,1.0,1.0
tou_standard,720.0,0.411111,0.492377,0.0,0.0,0.0,1.0,1.0
tou_peak,720.0,0.145833,0.353184,0.0,0.0,0.0,0.0,1.0
day_week,720.0,0.7,0.458576,0.0,0.0,1.0,1.0,1.0
day_saturday,720.0,0.166667,0.372937,0.0,0.0,0.0,0.0,1.0
day_sunday,720.0,0.133333,0.340171,0.0,0.0,0.0,0.0,1.0
site_load_energy,720.0,1243.478583,327.529219,0.0,1063.915,1322.4,1488.25,1934.7
solar_prod_energy,720.0,306.043861,414.50255,0.0,0.0,36.0,561.375,1443.1


In [16]:
load_profile_filtered_df = load_profile_df.loc['2024-11-04':'2024-11-10']

In [17]:
load_profile_filtered_df.head()

Unnamed: 0_level_0,ts_hour_sin,ts_hour_cos,tou_offpeak,tou_standard,tou_peak,day_week,day_saturday,day_sunday,site_load_energy,solar_prod_energy,solar_ctlr_setpoint,grid_import_energy
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2024-11-04 00:00:00,0.0,1.0,1,0,0,1,0,0,1031.9,0.0,100.0,1031.9
2024-11-04 01:00:00,0.841471,0.540302,1,0,0,1,0,0,1191.1,0.0,100.0,1191.1
2024-11-04 02:00:00,0.909297,-0.416147,1,0,0,1,0,0,1233.8,0.0,100.0,1233.8
2024-11-04 03:00:00,0.14112,-0.989992,1,0,0,1,0,0,1145.3,0.0,100.0,1145.3
2024-11-04 04:00:00,-0.756802,-0.653644,1,0,0,1,0,0,1247.1,0.0,100.0,1247.1


In [18]:
load_profile_filtered_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
ts_hour_sin,168.0,0.040825,0.699499,-0.99999,-0.595763,0.07056,0.701904,0.990607
ts_hour_cos,168.0,-0.022538,0.717287,-0.999961,-0.680155,-0.070537,0.683713,1.0
tou_offpeak,168.0,0.446429,0.498608,0.0,0.0,0.0,1.0,1.0
tou_standard,168.0,0.404762,0.492313,0.0,0.0,0.0,1.0,1.0
tou_peak,168.0,0.14881,0.356965,0.0,0.0,0.0,0.0,1.0
day_week,168.0,0.714286,0.453104,0.0,0.0,1.0,1.0,1.0
day_saturday,168.0,0.142857,0.350973,0.0,0.0,0.0,0.0,1.0
day_sunday,168.0,0.142857,0.350973,0.0,0.0,0.0,0.0,1.0
site_load_energy,168.0,1154.93994,304.294259,534.68,1036.775,1217.15,1377.6,1652.7
solar_prod_energy,168.0,224.364345,337.372179,0.0,0.0,18.125,324.8875,1266.3


### Run Environment

#### Fix Policy Agent

In [19]:
# epsilon = 0.2
# reward_buffer = []
# total_episode_reward = 0.0

# env = MicrogridEnv(data=load_profile_filtered_df)

# state, reward, done = env.reset()

# while(not done):

#     reward_buffer.append(total_episode_reward)

#     if np.random.random() <= epsilon:

#         action = np.random.choice(7)
        
#     else:
        
#         action = env.rule_based_policy(state=state)

#     state, reward, done = env.step(action=action)

#     total_episode_reward = (0.99 * total_episode_reward) + reward

#### Tabular Q-Learning Agent

In [22]:
agent = QLearningAgent(env=MicrogridEnv(data=load_profile_filtered_df, debug_flag=False))
# writer = SummaryWriter(comment="-q-learning")

TEST_EPISODES = 1

iter_no = 0
best_reward = 0.0

while True:
    
    iter_no += 1

    # Take one step in the environment
    state, action, reward, next_state = agent.sample_env(use_fixed_rule_policy=False)

    # Update q-value function
    agent.value_update(state, action, reward, next_state)

    if (iter_no % 167) > 0:

        continue

    elif iter_no > 500_000:

        print("\n\nCould not solve the environment !!!")
        print("   Best test reward achieved %.3f" % (best_reward))
        break

    else:
        
        test_reward = 0.0 
    
        # Run a test episode to track the performance of the agent
        print(f"[{iter_no}]: Test Agent's Performance: ")
        test_reward += agent.run_test_episode(env=MicrogridEnv(data=load_profile_filtered_df, debug_flag=False))
        print(f"   Average test episode reward: {test_reward: .2f}")
        
        # writer.add_scalar("reward", test_reward, iter_no)
    
        # Keep track of the best average reward achieved in the test episodes
        if test_reward > best_reward:
            print("   Best test reward updated %.3f -> %.3f" % (best_reward, test_reward))
            best_reward = test_reward
    
        # If the average reward achieved in the test episodes are above this threshold, then the agent has converged to a good solution
        # Rule based policy's best reward: 111.27
        # RL agent's best reward: 116.319
        if test_reward > 116.0:
            
            print("Solved in %d iterations!" % iter_no)

            print("\n\nRun final episode with trained agent: ")
            final_reward = agent.run_test_episode(env=MicrogridEnv(data=load_profile_filtered_df, debug_flag=True))
            print(f"Final reward: {final_reward: .2f}")
            
            break

# writer.close()

[167]: Test Agent's Performance: 
   Average test episode reward:  12.54
   Best test reward updated 0.000 -> 12.536
[334]: Test Agent's Performance: 
   Average test episode reward:  40.12
   Best test reward updated 12.536 -> 40.121
[501]: Test Agent's Performance: 
   Average test episode reward:  49.61
   Best test reward updated 40.121 -> 49.609
[668]: Test Agent's Performance: 
   Average test episode reward:  63.77
   Best test reward updated 49.609 -> 63.772
[835]: Test Agent's Performance: 
   Average test episode reward:  67.32
   Best test reward updated 63.772 -> 67.319
[1002]: Test Agent's Performance: 
   Average test episode reward:  72.64
   Best test reward updated 67.319 -> 72.638
[1169]: Test Agent's Performance: 
   Average test episode reward:  83.91
   Best test reward updated 72.638 -> 83.913
[1336]: Test Agent's Performance: 
   Average test episode reward:  86.71
   Best test reward updated 83.913 -> 86.708
[1503]: Test Agent's Performance: 
   Average test epi