# TEST of custom Multi-Agent SAC agent on citylearn env

In [1]:
import gymnasium as gym
import seaborn as sns
import matplotlib.pyplot as plt

import torch

import numpy as np
import pandas as pd

from citylearn.citylearn import CityLearnEnv
from citylearn.wrappers import NormalizedSpaceWrapper, StableBaselines3Wrapper

from custom_agent.CTDE.ma_sac_agents import Agents

from custom_reward.custom_reward import CustomReward

In [2]:
class WrapperEnv:
    """
    Env to wrap provide Citylearn Env data without providing full env
    Preventing attribute access outside of the available functions
    """
    def __init__(self, env_data):
        self.observation_names = env_data['observation_names']
        self.action_names = env_data['action_names']
        self.observation_space = env_data['observation_space']
        self.action_space = env_data['action_space']
        self.time_steps = env_data['time_steps']
        self.seconds_per_time_step = env_data['seconds_per_time_step']
        self.random_seed = env_data['random_seed']
        self.buildings_metadata = env_data['buildings_metadata']
        self.episode_tracker = env_data['episode_tracker']
    
    def get_metadata(self):
        return {'buildings': self.buildings_metadata}
    
def makeEnv(schema_path, reward_function):
    # create environment
    env = CityLearnEnv(schema = schema_path, reward_function = reward_function, central_agent=False)

    env_data = dict(
        observation_names = env.observation_names,
        action_names = env.action_names,
        observation_space = env.observation_space,
        action_space = env.action_space,
        time_steps = env.time_steps,
        random_seed = None,
        episode_tracker = None,
        seconds_per_time_step = None,
        buildings_metadata = env.get_metadata()['buildings']
    )

    wrapper_env = WrapperEnv(env_data)
    return env, wrapper_env


In [3]:
schema_path = "data/schema.json"

env, wrapper_env = makeEnv(schema_path, CustomReward)

display(env.buildings[0].observation_metadata)

# wrap environment for a more workable env
env = NormalizedSpaceWrapper(env)

{'month': False,
 'day_type': True,
 'hour': True,
 'daylight_savings_status': False,
 'outdoor_dry_bulb_temperature': True,
 'outdoor_dry_bulb_temperature_predicted_6h': True,
 'outdoor_dry_bulb_temperature_predicted_12h': True,
 'outdoor_dry_bulb_temperature_predicted_24h': True,
 'outdoor_relative_humidity': False,
 'outdoor_relative_humidity_predicted_6h': False,
 'outdoor_relative_humidity_predicted_12h': False,
 'outdoor_relative_humidity_predicted_24h': False,
 'diffuse_solar_irradiance': True,
 'diffuse_solar_irradiance_predicted_6h': True,
 'diffuse_solar_irradiance_predicted_12h': True,
 'diffuse_solar_irradiance_predicted_24h': True,
 'direct_solar_irradiance': True,
 'direct_solar_irradiance_predicted_6h': True,
 'direct_solar_irradiance_predicted_12h': True,
 'direct_solar_irradiance_predicted_24h': True,
 'carbon_intensity': True,
 'indoor_dry_bulb_temperature': True,
 'average_unmet_cooling_setpoint_difference': False,
 'indoor_relative_humidity': False,
 'non_shiftable_

In [4]:
sac_agent = Agents(env, batch_size=100, buffer_max_size=100000)

  logger.warn(
  return torch._C._cuda_getDeviceCount() > 0
  logger.warn(


In [5]:
env.observation_space

[Box([0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0.], [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1.], (32,), float32),
 Box([0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0.], [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1.], (32,), float32),
 Box([0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0.], [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1.], (32,), float32)]

In [6]:
logs_list = []
warmup_steps = 5000

for i in range(3):
    # make agent
    sac_agent = Agents(env, batch_size=100, buffer_max_size=100000)
    
    # training run
    logs = sac_agent.train(nr_steps = 50 * 720, warmup_steps = warmup_steps, learn_delay = 100, learn_freq = 1, learn_weight = 1)
    
    # save training logs for this run
    logs_list.append(logs)

# print(logs_list.shape)

  0%|          | 0/36000 [00:00<?, ?it/s]

[Episode 1 mean reward: [-1.08041309 -0.96390594 -0.98829467]] ~ :   2%|▏         | 786/36000 [01:17<57:34, 10.19it/s]  


KeyboardInterrupt: 