### Import Library Dependencies

In [None]:
from microgrid_gym_env import MicrogridEnv
from double_dqn_agent import DoubleDQNAgent
from load_profile_data_loader import LoadProfileDataLoader

### Import Microgrid Load Profile Data

In [None]:
train_data_loader = LoadProfileDataLoader(csv_file_path="load_profile_data_nov2024.csv")
train_load_profile_data = train_data_loader.load_data(from_date='2024-11-01', to_date='2024-11-22', fit_scalers=True)

In [None]:
test_data_loader = LoadProfileDataLoader(csv_file_path="load_profile_data_nov2024.csv")
test_load_profile_data = test_data_loader.load_data(from_date='2024-11-23', to_date='2024-11-30', fit_scalers=True)

In [None]:
mini_flex_high_demand_tariff = {'peak_rate': 624.05, 'standard_rate': 189.08, 'offpeak_rate': 102.70}
mini_flex_low_demand_tariff = {'peak_rate': 203.57, 'standard_rate': 140.10, 'offpeak_rate': 88.91}
selected_tariff = mini_flex_high_demand_tariff

### Initialize Microgrid Training Environment

In [None]:
train_env = MicrogridEnv(data = train_load_profile_data, 
                         loader = train_data_loader,
                         grid_notified_maximum_demand = 2000.0, 
                         bess_capacity = 3000.0, 
                         bess_cycle_efficiency = 0.9, 
                         bess_step_sizes = [1000.0, 250.0, 0.0, 250.0, 1000.0], 
                         tou_peak_tariff = selected_tariff['peak_rate'] / 100.0, 
                         tou_standard_tariff = selected_tariff['standard_rate'] / 100.0, 
                         tou_offpeak_tariff = selected_tariff['offpeak_rate'] / 100.0, 
                         solar_ppa_tariff = 1.4,
                         debug_flag = False)

train_env.reset()

### Initialize Double DQN Agent For Training

In [None]:
agent = DoubleDQNAgent(env=train_env, 
                       discount_factor=0.98,
                       td_n_steps_unroll=3,
                       initial_lr=0.001, 
                       final_lr_factor=0.01,
                       total_training_steps=50_000,
                       per_alpha=0.6,
                       per_beta=0.4,
                       per_beta_increment=0.6/30_000,
                       exp_batch_size=24*7, # 7 episodes
                       exp_buffer_size=24*1000, # 1000 episodes
                       exp_min_buffer_samples=24*7*3, # 21 episodes
                       target_model_sync_steps=24*7*9, # 63 episodes
                       episode_len=24, # 1 episode = 1 day
                       stop_reward=450)

### Train Double DQN Agent

In [None]:
agent.learn()

### Test Double DQN Policy

In [None]:
import numpy as np
import torch
from double_dqn_model_big import DoubleDQNModel
from torch.utils.tensorboard.writer import SummaryWriter

In [None]:
dqn_agent_model = DoubleDQNModel(input_shape=12, n_actions=5)
dqn_agent_model.load_state_dict(torch.load("./model_checkpoints/double_dqn_model_weights.pth", weights_only=True))
dqn_agent_model.eval()

In [None]:
test_env = MicrogridEnv(data = test_load_profile_data, 
                        loader = test_data_loader,
                        grid_notified_maximum_demand = 2000.0, 
                        bess_capacity = 3000.0, 
                        bess_cycle_efficiency = 0.9, 
                        bess_step_sizes = [1000.0, 250.0, 0.0, 250.0, 1000.0], 
                        tou_peak_tariff = selected_tariff['peak_rate'] / 100.0, 
                        tou_standard_tariff = selected_tariff['standard_rate'] / 100.0, 
                        tou_offpeak_tariff = selected_tariff['offpeak_rate'] / 100.0, 
                        solar_ppa_tariff = 1.4,
                        debug_flag = True)

test_env.reset()

In [None]:
dqn_agent_reward = 0.0
monitoring_metrics = {}

writer = SummaryWriter(comment="DoubleDQNAgent-TestEnv")

state, monitoring_metrics = test_env.reset()

for state_idx in range(24*7):

    state_t = torch.as_tensor(np.array(state)).float() # Convert state to torch tensor
    state_t = state_t.unsqueeze(0) # Add batch dimension (shape: [1, state_dim]) 
    
    q_vals = dqn_agent_model(state_t)

    action = torch.argmax(q_vals, dim=1).item()  # Select action with max Q-value

    state, reward, done, truncated, monitoring_metrics = test_env.step(action=action)

    writer.add_scalars("Microgrid Environment State", {"grid_import_energy_without_bess": monitoring_metrics["grid_import_energy"], 
                                                       "grid_import_energy_with_bess": monitoring_metrics["grid_import_energy_with_bess"],
                                                       "solar_prod_energy": monitoring_metrics["solar_prod_energy"],
                                                       "solar_controller_setpoint": monitoring_metrics["solar_controller_setpoint"],
                                                       "bess_avail_discharge": monitoring_metrics["bess_avail_discharge"],
                                                       "bess_soc": monitoring_metrics["bess_soc"],
                                                       "raw_reward_earned": monitoring_metrics["raw_reward_earned"],
                                                       "scaled_reward_earned": monitoring_metrics["scaled_reward_earned"],
                                                       "total_cumulative_reward": dqn_agent_reward,
                                                       "action_energy": monitoring_metrics["action_energy"]
                                                       }, 
                                                       state_idx)

    dqn_agent_reward += reward

print(f"Total Reward: {dqn_agent_reward: .2f}")

### Check Rule-based Policy For Baseline

In [None]:
fixed_rule_reward = 0.0
monitoring_metrics = {}

writer = SummaryWriter(comment="FixedRuleAgent-TestEnv")

is_test_env = True 

if is_test_env:
    total_steps = 24 * 7
    state, monitoring_metrics = test_env.reset()
else:
    total_steps = 3 * 24 * 7
    state, monitoring_metrics = train_env.reset()

for state_idx in range(total_steps):

    if is_test_env:
        action = test_env.rule_based_policy()
    else:
        action = train_env.rule_based_policy()

    if is_test_env:
        state, reward, done, truncated, monitoring_metrics = test_env.step(action=action)
    else:
        state, reward, done, truncated, monitoring_metrics = train_env.step(action=action)

    writer.add_scalars("Microgrid Environment State", {"grid_import_energy_without_bess": monitoring_metrics["grid_import_energy"], 
                                                       "grid_import_energy_with_bess": monitoring_metrics["grid_import_energy_with_bess"],
                                                       "solar_prod_energy": monitoring_metrics["solar_prod_energy"],
                                                       "solar_controller_setpoint": monitoring_metrics["solar_controller_setpoint"],
                                                       "bess_avail_discharge": monitoring_metrics["bess_avail_discharge"],
                                                       "bess_soc": monitoring_metrics["bess_soc"],
                                                       "raw_reward_earned": monitoring_metrics["raw_reward_earned"],
                                                       "scaled_reward_earned": monitoring_metrics["scaled_reward_earned"],
                                                       "total_cumulative_reward": fixed_rule_reward,
                                                       "action_energy": monitoring_metrics["action_energy"]
                                                       }, 
                                                       state_idx)

    fixed_rule_reward += reward

print(f"Total Reward: {fixed_rule_reward: .2f}")