In [7]:
import numpy as np 
import pandas as pd

import math
import sys
import os

import gymnasium as gym
gym.__version__

from citylearn.citylearn import CityLearnEnv
from citylearn.wrappers import NormalizedObservationWrapper, StableBaselines3Wrapper
from citylearn.reward_function import RewardFunction

sys.path.append("../custom_reward")
from custom_reward.custom_reward import CustomReward

from stable_baselines3 import SAC

import seaborn as sns
import matplotlib.pyplot as plt

In [8]:
class WrapperEnv:
    """
    Env to wrap provide Citylearn Env data without providing full env
    Preventing attribute access outside of the available functions
    """
    def __init__(self, env_data):
        self.observation_names = env_data['observation_names']
        self.action_names = env_data['action_names']
        self.observation_space = env_data['observation_space']
        self.action_space = env_data['action_space']
        self.time_steps = env_data['time_steps']
        self.seconds_per_time_step = env_data['seconds_per_time_step']
        self.random_seed = env_data['random_seed']
        self.buildings_metadata = env_data['buildings_metadata']
        self.episode_tracker = env_data['episode_tracker']
    
    def get_metadata(self):
        return {'buildings': self.buildings_metadata}

In [9]:
def makeEnv(schema_path, reward_function):
    # create environment
    env = CityLearnEnv(schema = schema_path, reward_function = reward_function, central_agent=False)

    env_data = dict(
        observation_names = env.observation_names,
        action_names = env.action_names,
        observation_space = env.observation_space,
        action_space = env.action_space,
        time_steps = env.time_steps,
        random_seed = None,
        episode_tracker = None,
        seconds_per_time_step = None,
        buildings_metadata = env.get_metadata()['buildings']
    )

    wrapper_env = WrapperEnv(env_data)
    return env, wrapper_env


In [10]:
# schema_path = "data/schema_edited.json"

# env, wrapper_env = makeEnv(schema_path, CustomReward)

# print(env.buildings[0].observation_space)

env, wrapper_env = makeEnv("data/schema.json", CustomReward)

print(env.buildings[0].observation_space)


# wrap environment for use in stablebaselines3
# env = NormalizedObservationWrapper(env)
# env = StableBaselines3Wrapper(env)

Box([ 1.          1.         21.38       21.385895   20.812239   20.947014
  0.          0.          0.          0.          0.          0.
  0.          0.          0.33751526  9.999914    0.31963342  0.
  0.          0.         -4.613046    0.02893     0.02893     0.02893
  0.02893     0.          0.          0.         20.          0.        ], [7.0000000e+00 2.4000000e+01 4.0320000e+01 4.0448032e+01 4.1761425e+01
 4.0484505e+01 4.6660001e+02 4.6125140e+02 5.5482874e+02 5.5179816e+02
 9.0848999e+02 1.0563348e+03 1.1297407e+03 1.2375875e+03 5.5606288e-01
 3.7222233e+01 7.4589686e+00 1.6887081e+00 1.0000000e+00 1.0000000e+00
 1.9749760e+01 5.8669999e-02 5.8669999e-02 5.8669999e-02 5.8669999e-02
 1.2199831e+01 5.2501535e+00 3.0000000e+00 2.7222221e+01 1.0000000e+00], (30,), float32)


In [11]:
obs = env.reset()
# display(env.observation_space[0].sample)
sum([obs.shape[0] for idx, obs in enumerate(env.observation_space)])
[obs.shape for obs in env.observation_space]
[act.shape for act in env.action_space]
# display(env.action_space)
# print(np.array(env.reset()).shape)

[(3,), (3,), (3,)]

In [12]:
# wrapper_env.action_names
np.array(env.get_metadata())

array({'uid': 'c688ad87300d4f7787772ffa247c722f', 'random_seed': 32006953, 'simulation_time_steps': 720, 'seconds_per_time_step': 3600.0, 'reward_function': 'CustomReward', 'central_agent': False, 'shared_observations': ['month', 'day_type', 'hour', 'daylight_savings_status', 'outdoor_dry_bulb_temperature', 'outdoor_dry_bulb_temperature_predicted_6h', 'outdoor_dry_bulb_temperature_predicted_12h', 'outdoor_dry_bulb_temperature_predicted_24h', 'outdoor_relative_humidity', 'outdoor_relative_humidity_predicted_6h', 'outdoor_relative_humidity_predicted_12h', 'outdoor_relative_humidity_predicted_24h', 'diffuse_solar_irradiance', 'diffuse_solar_irradiance_predicted_6h', 'diffuse_solar_irradiance_predicted_12h', 'diffuse_solar_irradiance_predicted_24h', 'direct_solar_irradiance', 'direct_solar_irradiance_predicted_6h', 'direct_solar_irradiance_predicted_12h', 'direct_solar_irradiance_predicted_24h', 'carbon_intensity', 'electricity_pricing', 'electricity_pricing_predicted_6h', 'electricity_pri

In [13]:
from custom_agent.custom_SAC.ma_sac_agents import Agents
Agents(env)


ModuleNotFoundError: No module named 'custom_agent'

In [29]:
from custom_spider_env.spider_fly_env.envs.grid_MA_pettingzoo4 import SpiderFlyEnvMA
from custom_spider_env.spider_fly_env.wrappers.normalized_obs_wrapper import NormalizeObsWrapper
from custom_spider_env.spider_fly_env.wrappers.pettingzoo_wrapper import PettingZooWrapper
from custom_agents.CTCE_algorithms.ma_sac_agents_seq_discrete_no_id import Agents

import numpy as np

# env = SpiderFlyEnvMA(spiders = 4, max_timesteps = 10, size = 5)
env = SpiderFlyEnvMA(spiders = 4, size = 10, max_timesteps = 100, render_mode = "ascii")
# env = PettingZooWrapper(env)
env = PettingZooWrapper(env, normalize = True)


[[' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ']
 [' ' 'X' ' ' ' ' ' ' ' ' 'X' ' ' ' ' 'X']
 [' ' ' ' 'O' ' ' ' ' ' ' ' ' ' ' ' ' ' ']
 [' ' ' ' 'X' ' ' ' ' ' ' ' ' ' ' ' ' ' ']]


In [30]:
_, _ = env.reset()
rew = [0]
trunc = [False]

while not trunc[0]:
    if np.isclose(rew[0], 1):
        break
    actions = [act_space.sample() for act_space in env.action_space]
    # env.print_info()
    obs, rew, _, trunc, _ = env.step(actions)
    print(rew)
    print(obs[0] * 9)
# env.print_info()


[[' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' 'X' ' ' ' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' 'X' ' ']
 [' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ']
 ['X' ' ' ' ' ' ' ' ' ' ' ' ' ' ' 'X' ' ']
 [' ' ' ' 'O' ' ' ' ' ' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ']]
[[' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' 'X' ' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' 'X' ' ']
 [' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ']
 [' ' 'X' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ']
 [' ' ' ' 'O' ' ' ' ' ' ' ' ' ' ' 'X' ' ']
 [' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ']]
[-0.0036622776601683794, -0.00020710678118654758, -0.0035, -0.004405124837953327]
[4. 8. 1. 3. 8. 2. 8. 7. 2. 2.]
[[' ' ' ' ' ' ' ' ' ' ' 

In [4]:
obs, _ = env.reset()
rew = [0]

sac = Agents(env, batch_size = 256, layer_sizes = (128, 128), global_observations = True)
sac.actor.load("models/actor_410000")

while rew[0] != 1:
    actions = sac.get_action(obs, False, True)

    next_obs, rew, done, trunc, _ = env.step(actions)

    if trunc[0] == True:
        obs, _ = env.reset()
    else:
        obs = next_obs
    print(rew, trunc)

[[' ' ' ' 'X' ' ' ' ']
 [' ' 'O' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ']
 [' ' ' ' 'X' ' ' 'X']
 ['X' ' ' ' ' ' ' ' ']]


  return torch._C._cuda_getDeviceCount() > 0


[[' ' ' ' 'X' ' ' ' ']
 [' ' 'O' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ']
 ['X' ' ' ' ' 'X' 'X']
 [' ' ' ' ' ' ' ' ' ']]
[-0.00020710678118654758, -0.001618033988749895, -0.0023027756377319948, -0.0019142135623730953] [False, False, False, False]
[[' ' ' ' 'X' ' ' ' ']
 [' ' 'O' ' ' ' ' ' ']
 ['X' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' 'X' 'X']
 [' ' ' ' ' ' ' ' ' ']]
[-0.00020710678118654758, -0.00020710678118654758, -0.0023027756377319948, -0.0019142135623730953] [False, False, False, False]
[[' ' ' ' 'X' ' ' ' ']
 ['X' 'O' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' 'X' 'X']
 [' ' ' ' ' ' ' ' ' ']]
[-0.00020710678118654758, 0.0, -0.0023027756377319948, -0.0019142135623730953] [False, False, False, False]
[['X' ' ' 'X' ' ' ' ']
 [' ' 'O' ' ' ' ' ' ']
 [' ' ' ' ' ' 'X' ' ']
 [' ' ' ' ' ' ' ' 'X']
 [' ' ' ' ' ' ' ' ' ']]
[-0.00020710678118654758, -0.00020710678118654758, -0.0023027756377319948, -0.001618033988749895] [False, False, False, False]
[['X' ' ' 'X' ' ' ' ']
 [' ' 'O' ' ' ' ' ' ']
 