### Import Library Dependencies

In [None]:
from microgrid_gym_env import MicrogridEnv
from double_dqn_agent import DoubleDQNAgent
from load_profile_data_loader import LoadProfileDataLoader

### Import Microgrid Load Profile Data

In [None]:
data_loader = LoadProfileDataLoader(csv_file_path="load_profile_data_nov2024.csv")
train_load_profile_data = data_loader.load_data(from_date='2024-11-01', to_date='2024-11-22')
test_load_profile_data = data_loader.load_data(from_date='2024-11-23', to_date='2024-11-30')

In [None]:
mini_flex_high_demand_tariff = {'peak_rate': 624.05, 'standard_rate': 189.08, 'offpeak_rate': 102.70}
mini_flex_low_demand_tariff = {'peak_rate': 203.57, 'standard_rate': 140.10, 'offpeak_rate': 88.91}
selected_tariff = mini_flex_high_demand_tariff

### Initialize Microgrid Training Environment

In [None]:
train_env = MicrogridEnv(data = train_load_profile_data, 
                         grid_notified_maximum_demand = 2000.0, 
                         bess_capacity = 3000.0, 
                         bess_cycle_efficiency = 0.9, 
                         bess_step_sizes = [1000.0, 250.0, 0.0, 250.0, 1000.0], 
                         tou_peak_tariff = selected_tariff['peak_rate'] / 100.0, 
                         tou_standard_tariff = selected_tariff['standard_rate'] / 100.0, 
                         tou_offpeak_tariff = selected_tariff['offpeak_rate'] / 100.0, 
                         solar_ppa_tariff = 1.4,
                         debug_flag = False)

train_env.reset()

### Initialize Double DQN Agent For Training

In [None]:
agent = DoubleDQNAgent(env=train_env, 
                       discount_factor=0.98,
                       td_n_steps_unroll=3,
                       initial_lr=0.0005, 
                       final_lr_factor=0.01,
                       total_training_steps=200_000,
                       per_alpha=0.6,
                       per_beta=0.4,
                       per_beta_increment=0.6/200_000,
                       exp_batch_size=24*7*8, # 8 episodes
                       exp_buffer_size=24*7*1024, # 1024 episodes
                       exp_min_buffer_samples=24*7*32, # 32 episodes
                       target_model_sync_steps=24*7*32, # 32 episodes
                       episode_len=24*7, # 1 episode = 1 week
                       stop_reward=250.0)

### Train Double DQN Agent

In [None]:
agent.learn()

### Test Double DQN Model

In [None]:
import numpy as np
import torch
from double_dqn_model import DoubleDQNModel
from torch.utils.tensorboard.writer import SummaryWriter

dqn_agent_model = DoubleDQNModel(input_shape=12, n_actions=5)
dqn_agent_model.load_state_dict(torch.load("double_dqn_model_weights.pth", weights_only=True))
dqn_agent_model.eval()

In [None]:
test_env = MicrogridEnv(data = test_load_profile_data, 
                        grid_notified_maximum_demand = 2000.0, 
                        bess_capacity = 3000.0, 
                        bess_cycle_efficiency = 0.9, 
                        bess_step_sizes = [1000.0, 250.0, 0.0, 250.0, 1000.0], 
                        tou_peak_tariff = selected_tariff['peak_rate'] / 100.0, 
                        tou_standard_tariff = selected_tariff['standard_rate'] / 100.0, 
                        tou_offpeak_tariff = selected_tariff['offpeak_rate'] / 100.0, 
                        solar_ppa_tariff = 1.4,
                        debug_flag = True)

test_env.reset()

In [None]:
dqn_agent_reward = 0.0
action_energy_vals = [-1000.0, -250.0, 0.0, 250.0, 1000.0]

writer = SummaryWriter(comment="DoubleDQNAgent-TestEnv")

state, _ = test_env.reset()

for state_idx in range(24*7):

    state_t = torch.as_tensor(np.array(state)).float() # Convert state to torch tensor
    state_t = state_t.unsqueeze(0) # Add batch dimension (shape: [1, state_dim]) 
    
    q_vals = dqn_agent_model(state_t)

    action = torch.argmax(q_vals, dim=1).item()  # Select action with max Q-value

    action_energy = action_energy_vals[action]  # Convert action index to energy value

    state, reward, done, truncated, _ = test_env.step(action=action)

    writer.add_scalars("Microgrid Environment State", {"Grid Import Energy": state[8], 
                                                       "Solar PV Production Energy": state[9],
                                                       "BESS SoC": state[11],
                                                       "Agent Selected Action Energy": action_energy}, 
                                                       state_idx)
    
    writer.add_scalar("DoubleDQN Agent Selected Action Energy", action_energy, state_idx)

    writer.add_scalar("DoubleDQN Agent Cumulative Reward", dqn_agent_reward, state_idx)

    dqn_agent_reward += reward

In [None]:
print(f"Total DQN Agent Test Reward: {dqn_agent_reward: .2f}")

### Run Fixed Rule Policy For Baseline

In [None]:
fixed_rule_reward = 0.0
action_energy_vals = [-1000.0, -250.0, 0.0, 250.0, 1000.0]

writer = SummaryWriter(comment="FixedRuleAgent-TestEnv")

state, _ = test_env.reset()

for state_idx in range(24*7):

    action = test_env.rule_based_policy()

    action_energy = action_energy_vals[action]

    state, reward, done, truncated, _ = test_env.step(action=action)

    writer.add_scalars("Microgrid Environment State", {"Grid Import Energy": state[8], 
                                                       "Solar PV Production Energy": state[9],
                                                       "BESS SoC": state[11],
                                                       "Agent Selected Action Energy": action_energy}, 
                                                       state_idx)
    
    writer.add_scalar("FixedRule Agent Selected Action Energy", action_energy, state_idx)

    writer.add_scalar("FixedRule Agent Cumulative Reward", fixed_rule_reward, state_idx)

    fixed_rule_reward += reward

In [None]:
print(f"Total Fixed Rule Test Reward: {fixed_rule_reward: .2f}")