### Import Library Dependencies

In [1]:
from microgrid_gym_env import MicrogridEnv
from double_dqn_agent import DoubleDQNAgent
from load_profile_data_loader import LoadProfileDataLoader

  from torch.distributed.optim import ZeroRedundancyOptimizer


### Import Microgrid Load Profile Data

In [2]:
data_loader = LoadProfileDataLoader(csv_file_path="load_profile_data_nov2024.csv")
train_load_profile_data = data_loader.load_data(from_date='2024-11-01', to_date='2024-11-22')
test_load_profile_data = data_loader.load_data(from_date='2024-11-23', to_date='2024-11-30')


Data successfully loaded from path => load_profile_data_nov2024.csv

Info: 

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 528 entries, 2024-11-01 00:00:00 to 2024-11-22 23:00:00
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   ts_hour_sin          528 non-null    float64
 1   ts_hour_cos          528 non-null    float64
 2   tou_offpeak          528 non-null    int64  
 3   tou_standard         528 non-null    int64  
 4   tou_peak             528 non-null    int64  
 5   day_week             528 non-null    int64  
 6   day_saturday         528 non-null    int64  
 7   day_sunday           528 non-null    int64  
 8   site_load_energy     528 non-null    float64
 9   solar_prod_energy    528 non-null    float64
 10  solar_ctlr_setpoint  528 non-null    float64
 11  grid_import_energy   528 non-null    float64
dtypes: float64(6), int64(6)
memory usage: 53.6 KB
None

Summary Statistics: 

 

In [3]:
mini_flex_high_demand_tariff = {'peak_rate': 624.05, 'standard_rate': 189.08, 'offpeak_rate': 102.70}
mini_flex_low_demand_tariff = {'peak_rate': 203.57, 'standard_rate': 140.10, 'offpeak_rate': 88.91}
selected_tariff = mini_flex_high_demand_tariff

### Initialize Microgrid Training Environment

In [4]:
train_env = MicrogridEnv(data = train_load_profile_data, 
                         grid_notified_maximum_demand = 2000.0, 
                         bess_capacity = 3000.0, 
                         bess_cycle_efficiency = 0.9, 
                         bess_step_sizes = [1000.0, 250.0, 0.0, 250.0, 1000.0], 
                         tou_peak_tariff = selected_tariff['peak_rate'] / 100.0, 
                         tou_standard_tariff = selected_tariff['standard_rate'] / 100.0, 
                         tou_offpeak_tariff = selected_tariff['offpeak_rate'] / 100.0, 
                         solar_ppa_tariff = 1.4,
                         debug_flag = False)

train_env.reset()

Line:445-display_info-INFO:  Environment Setup: 
Line:446-display_info-INFO:  
    Grid Notified Maximum Demand: 2000.0 kVA
    BESS Capacity: 3000.0 kWh
    BESS Actions: charge-1000, charge-250, do-nothing, discharge-250, discharge-1000
    
Line:452-display_info-INFO:  
Data Summary: 
Line:453-display_info-INFO:  None
Line:454-display_info-INFO:  



<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 528 entries, 2024-11-01 00:00:00 to 2024-11-22 23:00:00
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   ts_hour_sin          528 non-null    float64
 1   ts_hour_cos          528 non-null    float64
 2   tou_offpeak          528 non-null    int64  
 3   tou_standard         528 non-null    int64  
 4   tou_peak             528 non-null    int64  
 5   day_week             528 non-null    int64  
 6   day_saturday         528 non-null    int64  
 7   day_sunday           528 non-null    int64  
 8   site_load_energy     528 non-null    float64
 9   solar_prod_energy    528 non-null    float64
 10  solar_ctlr_setpoint  528 non-null    float64
 11  grid_import_energy   528 non-null    float64
dtypes: float64(6), int64(6)
memory usage: 53.6 KB


(array([0.0000e+00, 1.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00,
        1.0000e+00, 0.0000e+00, 0.0000e+00, 1.3394e+03, 0.0000e+00,
        1.0000e+02, 1.0000e+02]),
 {})

### Initialize Double DQN Agent For Training

In [None]:
agent = DoubleDQNAgent(env=train_env, 
                       discount_factor=0.98,
                       td_n_steps_unroll=3,
                       initial_lr=0.0005, 
                       final_lr_factor=0.01,
                       total_training_steps=200_000,
                       per_alpha=0.6,
                       per_beta=0.4,
                       per_beta_increment=0.6/200_000,
                       exp_batch_size=24*7*8, # 8 episodes
                       exp_buffer_size=24*7*1024, # 1024 episodes
                       exp_min_buffer_samples=24*7*32, # 32 episodes
                       target_model_sync_steps=24*7*32, # 32 episodes
                       episode_len=24*7, # 1 episode = 1 week
                       stop_reward=1000.0)

Line:96-display_dqn_net-INFO:  
Torch Compute Device -> mps

Line:98-display_dqn_net-INFO:  Double DQN Model Architecture -> 


DoubleDQNModel(
  (val_net): Sequential(
    (0): Linear(in_features=12, out_features=1024, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1024, out_features=1024, bias=True)
    (3): ReLU()
    (4): Linear(in_features=1024, out_features=512, bias=True)
    (5): ReLU()
    (6): Linear(in_features=512, out_features=512, bias=True)
    (7): ReLU()
    (8): Linear(in_features=512, out_features=1, bias=True)
  )
  (adv_net): Sequential(
    (0): NoisyLinear(in_features=12, out_features=1024, bias=True)
    (1): ReLU()
    (2): NoisyLinear(in_features=1024, out_features=1024, bias=True)
    (3): ReLU()
    (4): NoisyLinear(in_features=1024, out_features=512, bias=True)
    (5): ReLU()
    (6): NoisyLinear(in_features=512, out_features=512, bias=True)
    (7): ReLU()
    (8): NoisyLinear(in_features=512, out_features=5, bias=True)
  )
)


### Train Double DQN Agent

In [None]:
agent.learn()

    [5376] -> Sync target model weights !

                        New Best Episode Reward Metrics:
                            Episode Number: 0
                            Step Number: 5376
                            Best Episode Reward: Prev:-999999.00 -> New:-48.29
                        
    [10752] -> Sync target model weights !

                        New Best Episode Reward Metrics:
                            Episode Number: 40
                            Step Number: 12096
                            Best Episode Reward: Prev:-48.29 -> New:-29.38
                        

                        New Best Episode Reward Metrics:
                            Episode Number: 50
                            Step Number: 13776
                            Best Episode Reward: Prev:-29.38 -> New:-15.54
                        
    [16128] -> Sync target model weights !
    [21504] -> Sync target model weights !
    [26880] -> Sync target model weights !
    [32256] -> Sync target m

### Test Double DQN Model

In [None]:
import numpy as np
import torch
from double_dqn_model import DoubleDQNModel
from torch.utils.tensorboard.writer import SummaryWriter

dqn_agent_model = DoubleDQNModel(input_shape=12, n_actions=5)
dqn_agent_model.load_state_dict(torch.load("double_dqn_model_weights.pth", weights_only=True))
dqn_agent_model.eval()

In [None]:
test_env = MicrogridEnv(data = test_load_profile_data, 
                        grid_notified_maximum_demand = 2000.0, 
                        bess_capacity = 3000.0, 
                        bess_cycle_efficiency = 0.9, 
                        bess_step_sizes = [1000.0, 250.0, 0.0, 250.0, 1000.0], 
                        tou_peak_tariff = selected_tariff['peak_rate'] / 100.0, 
                        tou_standard_tariff = selected_tariff['standard_rate'] / 100.0, 
                        tou_offpeak_tariff = selected_tariff['offpeak_rate'] / 100.0, 
                        solar_ppa_tariff = 1.4,
                        debug_flag = True)

test_env.reset()

In [None]:
dqn_agent_reward = 0.0
action_energy_vals = [-1000.0, -250.0, 0.0, 250.0, 1000.0]

writer = SummaryWriter(comment="DoubleDQNAgent-TestEnv")

state, _ = test_env.reset()

for state_idx in range(24*7):

    state_t = torch.as_tensor(np.array(state)).float() # Convert state to torch tensor
    state_t = state_t.unsqueeze(0) # Add batch dimension (shape: [1, state_dim]) 
    
    q_vals = dqn_agent_model(state_t)

    action = torch.argmax(q_vals, dim=1).item()  # Select action with max Q-value

    action_energy = action_energy_vals[action]  # Convert action index to energy value

    state, reward, done, truncated, _ = test_env.step(action=action)

    writer.add_scalars("Microgrid Environment State", {"Grid Import Energy": state[8], 
                                                       "Solar PV Production Energy": state[9],
                                                       "BESS SoC": state[11],
                                                       "Agent Selected Action Energy": action_energy}, 
                                                       state_idx)
    
    writer.add_scalar("DoubleDQN Agent Selected Action Energy", action_energy, state_idx)

    writer.add_scalar("DoubleDQN Agent Cumulative Reward", dqn_agent_reward, state_idx)

    dqn_agent_reward += reward

In [None]:
print(f"Total DQN Agent Test Reward: {dqn_agent_reward: .2f}")

### Run Fixed Rule Policy For Baseline

In [None]:
fixed_rule_reward = 0.0
action_energy_vals = [-1000.0, -250.0, 0.0, 250.0, 1000.0]

writer = SummaryWriter(comment="FixedRuleAgent-TestEnv")

state, _ = test_env.reset()

for state_idx in range(24*7):

    action = test_env.rule_based_policy()

    action_energy = action_energy_vals[action]

    state, reward, done, truncated, _ = test_env.step(action=action)

    writer.add_scalars("Microgrid Environment State", {"Grid Import Energy": state[8], 
                                                       "Solar PV Production Energy": state[9],
                                                       "BESS SoC": state[11],
                                                       "Agent Selected Action Energy": action_energy}, 
                                                       state_idx)
    
    writer.add_scalar("FixedRule Agent Selected Action Energy", action_energy, state_idx)

    writer.add_scalar("FixedRule Agent Cumulative Reward", fixed_rule_reward, state_idx)

    fixed_rule_reward += reward

In [None]:
print(f"Total Fixed Rule Test Reward: {fixed_rule_reward: .2f}")