### Import Library Dependencies

In [None]:
from microgrid_gym_env import MicrogridEnv
from double_dqn_agent import DoubleDQNAgent
from load_profile_data_loader import LoadProfileDataLoader

### Import Microgrid Load Profile Data

In [None]:
train_data_loader = LoadProfileDataLoader(csv_file_path="load_profile_data_nov2024.csv")
train_load_profile_data = train_data_loader.load_data(from_date='2024-11-01', to_date='2024-11-22', fit_scalers=True)

In [None]:
test_data_loader = LoadProfileDataLoader(csv_file_path="load_profile_data_nov2024.csv")
test_load_profile_data = test_data_loader.load_data(from_date='2024-11-23', to_date='2024-11-30', fit_scalers=True)

In [None]:
mini_flex_high_demand_tariff = {'peak_rate': 624.05, 'standard_rate': 189.08, 'offpeak_rate': 102.70}
mini_flex_low_demand_tariff = {'peak_rate': 203.57, 'standard_rate': 140.10, 'offpeak_rate': 88.91}
selected_tariff = mini_flex_high_demand_tariff

### Initialize Microgrid Training Environment

In [None]:
train_env = MicrogridEnv(data = train_load_profile_data, 
                         loader = train_data_loader,
                         grid_notified_maximum_demand = 2000.0, 
                         bess_capacity = 3000.0, 
                         bess_cycle_efficiency = 0.9, 
                         bess_step_sizes = [1000.0, 250.0, 0.0, 250.0, 1000.0], 
                         tou_peak_tariff = selected_tariff['peak_rate'] / 100.0, 
                         tou_standard_tariff = selected_tariff['standard_rate'] / 100.0, 
                         tou_offpeak_tariff = selected_tariff['offpeak_rate'] / 100.0, 
                         solar_ppa_tariff = 1.4,
                         debug_flag = False)

train_env.reset()

### Initialize Double DQN Agent For Training

In [None]:
agent = DoubleDQNAgent(env=train_env, 
                       discount_factor=0.98,
                       td_n_steps_unroll=3,
                       initial_lr=0.001, 
                       final_lr_factor=0.01,
                       total_training_steps=50_000,
                       per_alpha=0.6,
                       per_beta=0.4,
                       per_beta_increment=0.6/50_000,
                       exp_batch_size=24*7, # 7 episodes
                       exp_buffer_size=24*1000, # 1000 episodes
                       exp_min_buffer_samples=24*7*3, # 21 episodes
                       target_model_sync_steps=24*7*12, # 84 episodes
                       episode_len=24, # 1 episode = 1 day
                       stop_reward=200.0)

### Train Double DQN Agent

In [None]:
agent.learn()

### Test Double DQN Policy

In [16]:
import numpy as np
import torch
from double_dqn_model_big import DoubleDQNModel
from torch.utils.tensorboard.writer import SummaryWriter

In [17]:
dqn_agent_model = DoubleDQNModel(input_shape=12, n_actions=5)
dqn_agent_model.load_state_dict(torch.load("./model_checkpoints/double_dqn_model_weights.pth", weights_only=True))
dqn_agent_model.eval()

DoubleDQNModel(
  (val_net): Sequential(
    (0): Linear(in_features=12, out_features=1024, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1024, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=512, bias=True)
    (5): ReLU()
    (6): Linear(in_features=512, out_features=1, bias=True)
  )
  (adv_net): Sequential(
    (0): NoisyLinear(in_features=12, out_features=1024, bias=True)
    (1): ReLU()
    (2): NoisyLinear(in_features=1024, out_features=512, bias=True)
    (3): ReLU()
    (4): NoisyLinear(in_features=512, out_features=512, bias=True)
    (5): ReLU()
    (6): NoisyLinear(in_features=512, out_features=5, bias=True)
  )
)

In [18]:
test_env = MicrogridEnv(data = test_load_profile_data, 
                        loader = test_data_loader,
                        grid_notified_maximum_demand = 2000.0, 
                        bess_capacity = 3000.0, 
                        bess_cycle_efficiency = 0.9, 
                        bess_step_sizes = [1000.0, 250.0, 0.0, 250.0, 1000.0], 
                        tou_peak_tariff = selected_tariff['peak_rate'] / 100.0, 
                        tou_standard_tariff = selected_tariff['standard_rate'] / 100.0, 
                        tou_offpeak_tariff = selected_tariff['offpeak_rate'] / 100.0, 
                        solar_ppa_tariff = 1.4,
                        debug_flag = True)

test_env.reset()

Line:722-display_info-INFO:  Environment Setup: 
Line:723-display_info-INFO:  
    Grid Notified Maximum Demand: 2000.0 kVA
    BESS Capacity: 3000.0 kWh
    BESS Actions: charge-1000, charge-250, do-nothing, discharge-250, discharge-1000
    
Line:729-display_info-INFO:  
Data Summary: 
Line:730-display_info-INFO:  None
Line:731-display_info-INFO:  

Line:131-get_solar_surplus_energy-INFO:  
            [0] Solar Surplus Energy Calculation ->
                    Control Setpoint:  100.00%
                    Control Setpoint Ratio:  1.00
                    Solar Production Energy:  0.00 kWh
                    Solar Full Production Energy:  0.00 kWh
                    Solar Surplus Production Energy:  0.00 kWh
             


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 192 entries, 2024-11-23 00:00:00 to 2024-11-30 23:00:00
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   ts_hour_sin          192 non-null    float64
 1   ts_hour_cos          192 non-null    float64
 2   tou_offpeak          192 non-null    int64  
 3   tou_standard         192 non-null    int64  
 4   tou_peak             192 non-null    int64  
 5   day_week             192 non-null    int64  
 6   day_saturday         192 non-null    int64  
 7   day_sunday           192 non-null    int64  
 8   site_load_energy     192 non-null    float64
 9   solar_prod_energy    192 non-null    float64
 10  solar_ctlr_setpoint  192 non-null    float64
 11  grid_import_energy   192 non-null    float64
dtypes: float64(6), int64(6)
memory usage: 19.5 KB


(array([ 0.        ,  1.        ,  1.        ,  0.        ,  0.        ,
         0.        ,  1.        ,  0.        , -0.09867163, -1.        ,
         1.        ,  1.        ], dtype=float32),
 {'grid_import_energy': 871.9,
  'grid_import_energy_with_bess': 0.0,
  'solar_prod_energy': 0.0,
  'solar_controller_setpoint': 100.00000000000001,
  'bess_soc': 100.0,
  'bess_avail_discharge': 3000.0,
  'bess_discharge_energy': 0.0,
  'bess_charge_from_grid_energy': 0.0,
  'bess_charge_from_solar_energy': 0.0,
  'raw_reward_earned': 0.0,
  'scaled_reward_earned': 0.0,
  'action_energy': 0.0})

In [19]:
dqn_agent_reward = 0.0
monitoring_metrics = {}

writer = SummaryWriter(comment="DoubleDQNAgent-TestEnv")

state, monitoring_metrics = test_env.reset()

total_steps = 24 * 8

for state_idx in range(total_steps):

    state_t = torch.as_tensor(np.array(state)).float() # Convert state to torch tensor
    state_t = state_t.unsqueeze(0) # Add batch dimension (shape: [1, state_dim]) 
    
    q_vals = dqn_agent_model(state_t)

    action = torch.argmax(q_vals, dim=1).item()  # Select action with max Q-value

    state, reward, done, truncated, monitoring_metrics = test_env.step(action=action)

    writer.add_scalars("Microgrid Environment State", {"grid_import_energy_without_bess": monitoring_metrics["grid_import_energy"], 
                                                       "grid_import_energy_with_bess": monitoring_metrics["grid_import_energy_with_bess"],
                                                       "solar_prod_energy": monitoring_metrics["solar_prod_energy"],
                                                       "solar_controller_setpoint": monitoring_metrics["solar_controller_setpoint"],
                                                       "bess_avail_discharge": monitoring_metrics["bess_avail_discharge"],
                                                       "bess_soc": monitoring_metrics["bess_soc"],
                                                       "raw_reward_earned": monitoring_metrics["raw_reward_earned"],
                                                       "scaled_reward_earned": monitoring_metrics["scaled_reward_earned"],
                                                       "total_cumulative_reward": dqn_agent_reward,
                                                       "action_energy": monitoring_metrics["action_energy"]
                                                       }, 
                                                       state_idx)

    dqn_agent_reward += reward

print(f"Total Reward: {dqn_agent_reward: .2f}")

Line:131-get_solar_surplus_energy-INFO:  
            [0] Solar Surplus Energy Calculation ->
                    Control Setpoint:  100.00%
                    Control Setpoint Ratio:  1.00
                    Solar Production Energy:  0.00 kWh
                    Solar Full Production Energy:  0.00 kWh
                    Solar Surplus Production Energy:  0.00 kWh
             
Line:1058-apply_bess_action-INFO:  
          [0] Selected BESS Action Name -> discharge-1000
          
Line:218-calculate_bess_discharge_saving-INFO:  
            [0] Current BESS Discharge Savings:  895.44)
            
Line:295-calculate_potential_future_charge_cost_when_discharging-INFO:  
            [0] Future BESS Charge Cost (Discharging) ->  3938.71
                    BESS Grid Charge Energy:  968.78 kWh
                    BESS Solar Charge Energy:  0.00 kWh
                    Solar Surplus Energy Available:  0.00 kWh
                    BESS SoC:  70.94%
                    TOU Timeslot: Peak=0.

Total Reward:  557.79


### Check Rule-based Policy For Baseline

In [20]:
fixed_rule_reward = 0.0
monitoring_metrics = {}

writer = SummaryWriter(comment="FixedRuleAgent-TestEnv")

is_test_env = True 

if is_test_env:
    total_steps = 24 * 8
    state, monitoring_metrics = test_env.reset()
else:
    total_steps = 24 * 22
    state, monitoring_metrics = train_env.reset()

for state_idx in range(total_steps):

    if is_test_env:
        action = test_env.rule_based_policy()
    else:
        action = train_env.rule_based_policy()

    if is_test_env:
        state, reward, done, truncated, monitoring_metrics = test_env.step(action=action)
    else:
        state, reward, done, truncated, monitoring_metrics = train_env.step(action=action)

    writer.add_scalars("Microgrid Environment State", {"grid_import_energy_without_bess": monitoring_metrics["grid_import_energy"], 
                                                       "grid_import_energy_with_bess": monitoring_metrics["grid_import_energy_with_bess"],
                                                       "solar_prod_energy": monitoring_metrics["solar_prod_energy"],
                                                       "solar_controller_setpoint": monitoring_metrics["solar_controller_setpoint"],
                                                       "bess_avail_discharge": monitoring_metrics["bess_avail_discharge"],
                                                       "bess_soc": monitoring_metrics["bess_soc"],
                                                       "raw_reward_earned": monitoring_metrics["raw_reward_earned"],
                                                       "scaled_reward_earned": monitoring_metrics["scaled_reward_earned"],
                                                       "total_cumulative_reward": fixed_rule_reward,
                                                       "action_energy": monitoring_metrics["action_energy"]
                                                       }, 
                                                       state_idx)

    fixed_rule_reward += reward

print(f"Total Reward: {fixed_rule_reward: .2f}")

Line:131-get_solar_surplus_energy-INFO:  
            [0] Solar Surplus Energy Calculation ->
                    Control Setpoint:  100.00%
                    Control Setpoint Ratio:  1.00
                    Solar Production Energy:  0.00 kWh
                    Solar Full Production Energy:  0.00 kWh
                    Solar Surplus Production Energy:  0.00 kWh
             
Line:905-rule_based_policy-INFO:  
            [0] Rule-Based Policy Selected Action -> do-nothing:
                Solar Surplus Energy:  0.00 kWh
                BESS Available Discharge Energy:  3000.00 kWh
                TOU Timeslot: Peak=0.0, Standard=0.0, Off-peak=1.0
            
Line:1058-apply_bess_action-INFO:  
          [0] Selected BESS Action Name -> do-nothing
          
Line:1079-apply_bess_action-INFO:  
              [0] Do-Nothing Action Applied:
                   BESS SOC: 100.00%
                   Grid Import Energy: 871.90 kWh
                   Solar Surplus Energy: 0.00 kWh
        

Total Reward:  124.19
