In [1]:
import random
from pathlib import Path

import numpy as np
import torch
from citylearn.citylearn import CityLearnEnv
from matplotlib import pyplot as plt


from agents.base_agent import Agent
from agents.random_agent import RandomAgent
from agents.sac import SACAgent

In [2]:
def train_sac_agent(
    env: CityLearnEnv, 
    agent: SACAgent, 
    episodes: int = 100, 
) -> None:
    """Train SAC agent in the environment"""
    total_reward = 0
    
    reward_list = []
    
    for episode in range(episodes):
        # Reset environment and get initial observation
        observation = env.reset()
        episode_reward = 0
        
        while not env.done:
            flat_observation = np.concatenate(observation) if isinstance(observation, list) else observation
            
            action = [agent.select_action(flat_observation).tolist()]
            
            # print(action)
            agent.total_steps += 1
                    
            next_observation, reward, info, done = env.step(action)
            
            reward_list.append(reward)
                        
            flat_next_observation = np.concatenate(next_observation) if isinstance(next_observation, list) else next_observation
            
            episode_reward += np.sum(reward)
                        
            agent.replay_buffer.push(
                flat_observation, 
                action, 
                np.sum(reward),
                flat_next_observation, 
                len(done)
            )
            
            if agent.total_steps >= agent.exploration_timesteps:
                agent.train()
            
            
            observation = next_observation
        
        total_reward += episode_reward
        print(total_reward)
        
        print(f"Episode {episode+1}/{episodes}, Total Reward: {episode_reward}")
    
    return reward_list

In [3]:
SEED = 0
np.random.seed(SEED)
random.seed(SEED)
torch.manual_seed(SEED)
root_directory = Path("../data/citylearn_challenge_2023_phase_1")
schema_path = root_directory / "schema.json"
env = CityLearnEnv(
    schema=schema_path,
    root_directory=root_directory,
    random_seed=SEED,
    central_agent=True,
)

  self.load_state_dict(torch.load(self.filepath)['model_state_dict'])


In [4]:
observation_space_dim = 49
action_space_dim = 18 # set to 18 and turn on other actions
    
    
    # Initialize SAC Agent
sac_agent = SACAgent(
    observation_space_dim=observation_space_dim, 
    action_space_dim=action_space_dim,
    hidden_dim=256,
    buffer_size=100000,
    batch_size=256,
    learning_rate=3e-4,
    gamma=0.99,
    tau=0.01,
    alpha=0.05,
    action_space=env.action_space,
    exploration_timesteps = 0
)


    
    # Train the agent
rewards = train_sac_agent(env, sac_agent, episodes=3)

  self.load_state_dict(torch.load(self.filepath)['model_state_dict'])


-145065.773868267
Episode 1/3, Total Reward: -145065.773868267
-246446.72925210156
Episode 2/3, Total Reward: -101380.95538383457
-347152.3605938504
Episode 3/3, Total Reward: -100705.6313417488


In [5]:
def centralized_interact_with_env(
    env: CityLearnEnv, agent: Agent = RandomAgent, episodes: int = 100
) -> None:
    """Interact with environment using agent"""
    reward_list = []
    for episode in range(episodes):
        observation = env.reset()
        while not env.done:
            action = agent.select_action(observation)
            observation, reward, info, done = env.step(action)
            reward_list.append(reward)
    
    return reward_list

In [6]:
# random_agent = RandomAgent(env.observation_space, env.action_space)
# random_rewards = centralized_interact_with_env(env, random_agent, episodes=2)

In [11]:
flat = np.concatenate(rewards)

In [12]:
import matplotlib
matplotlib.use('inline')
plt.figure(figsize=(10, 6))
plt.plot(flat, 'k')
plt.savefig("test.png")

In [13]:
import pandas as pd
def format_evaluation(evaluation_data: dict) -> pd.DataFrame:
    kpis = pd.DataFrame.from_dict(
        evaluation_data, orient="index", columns=["value", "display_name", "weight"]
    )
    kpis_reset = kpis.reset_index().rename(columns={"index": "metric"})
    return kpis_reset

In [14]:
format_evaluation(env.evaluate_citylearn_challenge())

Unnamed: 0,metric,value,display_name,weight
0,carbon_emissions_total,0.885068,Carbon emissions,0.1
1,discomfort_proportion,0.886691,Unmet hours,0.3
2,ramping_average,1.438163,Ramping,0.075
3,daily_one_minus_load_factor_average,1.112994,Load factor,0.075
4,daily_peak_average,0.930902,Daily peak,0.075
5,annual_peak_average,0.867638,All-time peak,0.075
6,one_minus_thermal_resilience_proportion,,Thermal resilience,0.15
7,power_outage_normalized_unserved_energy_total,,Unserved energy,0.15
8,average_score,0.680741,Score,
