<a href="https://colab.research.google.com/github/intelligent-environments-lab/CityLearn/blob/master/examples/quickstart.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# QuickStart

Install the latest CityLearn version from PyPi with the :code:`pip` command:

In [7]:
!pip install CityLearn



## CityLearn Control Agents

### No Control (Baseline)

Run the following to simulate an environment where the storage systems and heat pumps are not controlled (baseline). The storage actions prescribed will be 0.0 and the heat pump will have no action, i.e. `None`, causing it to deliver the ideal load in the building time series files:

In [2]:
from citylearn.agents.base import BaselineAgent as Agent
from citylearn.citylearn import CityLearnEnv

# initialize
env = CityLearnEnv('citylearn_challenge_2023_phase_1', central_agent=True)
model = Agent(env)

# step through environment and apply agent actions
observations, _ = env.reset()

while not env.terminated:
    actions = model.predict(observations)
    observations, reward, info, terminated, truncated = env.step(actions)

# test
kpis = model.env.evaluate()
kpis = kpis.pivot(index='cost_function', columns='name', values='value').round(3)
kpis = kpis.dropna(how='all')
display(kpis)

  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


name,Building_1,Building_2,Building_3,District
cost_function,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
all_time_peak_average,,,,1.0
annual_normalized_unserved_energy_total,0.0,0.0,0.0,0.0
carbon_emissions_total,1.0,1.0,1.0,1.0
cost_total,1.0,1.0,1.0,1.0
daily_one_minus_load_factor_average,,,,1.0
daily_peak_average,,,,1.0
discomfort_cold_delta_average,1.699,0.044,0.671,0.805
discomfort_cold_delta_maximum,4.741,1.772,3.466,3.326
discomfort_cold_delta_minimum,0.0,0.0,0.0,0.0
discomfort_cold_proportion,0.396,0.0,0.082,0.159


### Centralized RBC
Run the following to simulate an environment controlled by centralized RBC agent for a single episode:

In [3]:
from citylearn.agents.rbc import BasicRBC as Agent
from citylearn.citylearn import CityLearnEnv

# initialize
env = CityLearnEnv('citylearn_challenge_2023_phase_1', central_agent=True)
model = Agent(env)

# step through environment and apply agent actions
observations, _ = env.reset()

while not env.terminated:
    actions = model.predict(observations)
    observations, reward, info, terminated, truncated = env.step(actions)

# test
kpis = model.env.evaluate()
kpis = kpis.pivot(index='cost_function', columns='name', values='value').round(3)
kpis = kpis.dropna(how='all')
display(kpis)

name,Building_1,Building_2,Building_3,District
cost_function,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
all_time_peak_average,,,,1.179
annual_normalized_unserved_energy_total,-0.0,0.0,-0.0,-0.0
carbon_emissions_total,1.986,1.927,1.728,1.88
cost_total,1.923,1.872,1.703,1.833
daily_one_minus_load_factor_average,,,,0.723
daily_peak_average,,,,1.325
discomfort_cold_delta_average,9.938,3.535,3.222,5.565
discomfort_cold_delta_maximum,13.562,9.931,5.399,9.63
discomfort_cold_delta_minimum,0.0,0.0,0.0,0.0
discomfort_cold_proportion,0.982,0.914,0.975,0.957


### Decentralized-Independent SAC

Run the following to simulate an environment controlled by decentralized-independent SAC agents for 1 training episode:

In [3]:
from citylearn.agents.sac import SAC as Agent
from citylearn.citylearn import CityLearnEnv

# initialize
env = CityLearnEnv('citylearn_challenge_2023_phase_2_local_evaluation', central_agent=False)
model = Agent(env)

# train
model.learn(episodes=2, deterministic_finish=True)

# test
kpis = model.env.evaluate()
kpis = kpis.pivot(index='cost_function', columns='name', values='value').round(3)
kpis = kpis.dropna(how='all')
display(kpis)

name,Building_1,Building_2,Building_3,District
cost_function,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
all_time_peak_average,,,,0.95
annual_normalized_unserved_energy_total,0.013,0.013,0.013,0.013
carbon_emissions_total,0.954,0.98,0.958,0.964
cost_total,0.917,0.94,0.927,0.928
daily_one_minus_load_factor_average,,,,0.938
daily_peak_average,,,,0.934
discomfort_cold_delta_average,2.006,0.921,0.873,1.267
discomfort_cold_delta_maximum,6.514,4.565,2.974,4.685
discomfort_cold_delta_minimum,0.0,0.0,0.0,0.0
discomfort_cold_proportion,0.473,0.325,0.235,0.344


### Decentralized-Cooperative MARLISA

Run the following to simulate an environment controlled by decentralized-cooperative MARLISA agents for 1 training episodes:

In [17]:
from citylearn.agents.marlisa import MARLISA as Agent
from citylearn.citylearn import CityLearnEnv

# initialize
env = CityLearnEnv('citylearn_challenge_2023_phase_1', central_agent=False)
model = Agent(env)

# train
model.learn(episodes=20, deterministic_finish=True)

# test
kpis = model.env.evaluate()
kpis = kpis.pivot(index='cost_function', columns='name', values='value').round(3)
kpis = kpis.dropna(how='all')
display(kpis)

name,Building_1,Building_2,Building_3,District
cost_function,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
all_time_peak_average,,,,0.876
annual_normalized_unserved_energy_total,0.0,-0.0,0.0,0.0
carbon_emissions_total,1.009,1.033,1.016,1.019
cost_total,0.986,1.021,0.998,1.002
daily_one_minus_load_factor_average,,,,0.992
daily_peak_average,,,,0.995
discomfort_cold_delta_average,1.941,0.064,0.496,0.833
discomfort_cold_delta_maximum,9.584,1.324,3.796,4.901
discomfort_cold_delta_minimum,0.0,0.0,0.0,0.0
discomfort_cold_proportion,0.412,0.0,0.085,0.166


In [2]:
from citylearn.agents.marlisa import MARLISA as Agent
from citylearn.citylearn import CityLearnEnv

# initialize
env = CityLearnEnv('citylearn_challenge_2023_phase_1', central_agent=False)
model = Agent(env)

# train
model.learn(episodes=2, deterministic_finish=True)

# test
kpis = model.env.evaluate()
kpis = kpis.pivot(index='cost_function', columns='name', values='value').round(3)
kpis = kpis.dropna(how='all')
display(kpis)

name,Building_1,Building_2,Building_3,District
cost_function,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
all_time_peak_average,,,,0.95
annual_normalized_unserved_energy_total,0.0,0.0,0.0,0.0
carbon_emissions_total,0.958,0.975,0.958,0.964
cost_total,0.92,0.936,0.927,0.927
daily_one_minus_load_factor_average,,,,0.931
daily_peak_average,,,,0.921
discomfort_cold_delta_average,1.979,0.922,0.883,1.262
discomfort_cold_delta_maximum,6.465,4.569,2.98,4.671
discomfort_cold_delta_minimum,0.0,0.0,0.0,0.0
discomfort_cold_proportion,0.461,0.321,0.241,0.341


## Other Standard Reinforcement Learning Libraries

### Stable Baselines3 Reinforcement Learning Algorithms

Install the latest version of Stable Baselines3:

In [None]:
!pip install stable-baselines3

Before the environment is ready for use in Stable Baselines3, it needs to be wrapped. Firstly, wrap the environment using the `NormalizedObservationWrapper` (see [docs](https://www.citylearn.net/api/citylearn.wrappers.html#citylearn.wrappers.NormalizedObservationWrapper)) to ensure that observations served to the agent are min-max normalized between [0, 1] and cyclical observations e.g. hour, are encoded using the cosine transformation.

Next, we wrap with the `StableBaselines3Wrapper` (see [docs](https://www.citylearn.net/api/citylearn.wrappers.html#citylearn.wrappers.StableBaselines3Wrapper)) that ensures observations, actions and rewards are served in manner that is compatible with Stable Baselines3 interface.

> ⚠️ **NOTE**: `central_agent` in the `env` must be `True` when using Stable Baselines3  as it does not support multi-agents.

In [5]:
from stable_baselines3.sac import SAC as Agent
from citylearn.citylearn import CityLearnEnv
from citylearn.wrappers import NormalizedObservationWrapper, StableBaselines3Wrapper

# initialize
env = CityLearnEnv('citylearn_challenge_2023_phase_2_local_evaluation', central_agent=True)
env = NormalizedObservationWrapper(env)
env = StableBaselines3Wrapper(env)
model = Agent('MlpPolicy', env)

# train
episodes = 2
model.learn(total_timesteps=env.unwrapped.time_steps*episodes)

# test
observations, _ = env.reset()

while not env.unwrapped.terminated:
    actions, _ = model.predict(observations, deterministic=True)
    observations, _, _, _, _ = env.step(actions)

kpis = env.unwrapped.evaluate()
kpis = kpis.pivot(index='cost_function', columns='name', values='value').round(3)
kpis = kpis.dropna(how='all')
display(kpis)

2024-04-01 20:37:07,558	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2024-04-01 20:37:08,286	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


name,Building_1,Building_2,Building_3,District
cost_function,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
all_time_peak_average,,,,0.834
annual_normalized_unserved_energy_total,0.015,0.012,0.014,0.014
carbon_emissions_total,0.396,0.449,0.501,0.449
cost_total,0.375,0.413,0.471,0.419
daily_one_minus_load_factor_average,,,,1.316
daily_peak_average,,,,0.709
discomfort_cold_delta_average,0.0,0.004,0.001,0.002
discomfort_cold_delta_maximum,0.124,0.581,0.372,0.359
discomfort_cold_delta_minimum,0.0,0.0,0.0,0.0
discomfort_cold_proportion,0.0,0.0,0.0,0.0


### RLlib

Install the latest version of RLlib:

In [None]:
!pip install "ray[rllib]"

We advise that you include the `ClippedObservationWrapper` (see [docs](https://www.citylearn.net/api/citylearn.wrappers.html#citylearn.wrappers.ClippedObservationWrapper)) wrapper when working with RLlib so that observations are always clipped within the observation space before sending to the agent if not, out-of-bound observations will raise a `ValueError` and terminate the training.

We also wrap the environment with `NormalizedObservationWrapper` (see [docs](https://www.citylearn.net/api/citylearn.wrappers.html#citylearn.wrappers.NormalizedObservationWrapper)) to ensure that observations served to the agent are min-max normalized between [0, 1] and cyclical observations e.g. hour, are encoded using the cosine transformation.

RLlib supports both [single-agent and multi-agent algorithms](https://docs.ray.io/en/latest/rllib/rllib-algorithms.html). See below for an example for either case.

####  Single Agent

The single-agent interface for RLlib is the `RLlibSingleAgentWrapper` wrapper.

In [10]:
import warnings
from citylearn.wrappers import ClippedObservationWrapper, NormalizedObservationWrapper, RLlibSingleAgentWrapper
from ray.rllib.algorithms.sac import SACConfig as Config

warnings.filterwarnings('ignore', category=DeprecationWarning)

# initialize
env_config = {
    'env_kwargs': {
        'schema': 'citylearn_challenge_2023_phase_2_local_evaluation',
    },
    'wrappers': [
        NormalizedObservationWrapper, 
        ClippedObservationWrapper
    ]
}
config = (
    Config()
    .environment(RLlibSingleAgentWrapper, env_config=env_config)
)
model = config.build()

# train
for i in range(2):
    _ = model.train()

# test
env = RLlibSingleAgentWrapper(env_config)
observations, _ = env.reset()

while not env.unwrapped.terminated:
    actions = model.compute_single_action(observations, explore=False)
    observations, _, _, _, _ = env.step(actions)

kpis = env.unwrapped.evaluate()
kpis = kpis.pivot(index='cost_function', columns='name', values='value').round(3)
kpis = kpis.dropna(how='all')
display(kpis)



name,Building_1,Building_2,Building_3,District
cost_function,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
all_time_peak_average,,,,1.063
annual_normalized_unserved_energy_total,0.015,0.016,0.014,0.015
carbon_emissions_total,1.632,1.598,1.459,1.563
cost_total,1.572,1.548,1.427,1.516
daily_one_minus_load_factor_average,,,,0.74
daily_peak_average,,,,1.166
discomfort_cold_delta_average,7.743,2.896,2.563,4.401
discomfort_cold_delta_maximum,12.215,8.302,5.262,8.593
discomfort_cold_delta_minimum,0.0,0.0,0.0,0.0
discomfort_cold_proportion,0.968,0.733,0.757,0.819


#### Multi-agent

The multi-agent interface for RLlib is the `RLlibMultiAgentEnv` wrapper.

In [12]:
import warnings
from citylearn.wrappers import ClippedObservationWrapper, NormalizedObservationWrapper, RLlibMultiAgentEnv
from ray.rllib.algorithms.sac import SACConfig as Config
from ray.rllib.policy.policy import PolicySpec

warnings.filterwarnings('ignore', category=DeprecationWarning)

# initialize
env_config = {
    'env_kwargs': {
        'schema': 'citylearn_challenge_2023_phase_2_local_evaluation',
    },
    'wrappers': [
        NormalizedObservationWrapper, 
        ClippedObservationWrapper
    ]
}
config = (
    Config()
    .environment(RLlibMultiAgentEnv, env_config=env_config)
    .multi_agent(
        policies={a: PolicySpec() for a in RLlibMultiAgentEnv(env_config)._agent_ids},
        policy_mapping_fn=lambda agent_id, episode, worker, **kwargs: agent_id,
    )
)
model = config.build()

# train
for i in range(2):
    _ = model.train()

# test
env = RLlibMultiAgentEnv(env_config)
observations, _ = env.reset()

while not env.terminated:
    actions = {p: model.compute_single_action(o, policy_id=p, explore=False) for p, o in observations.items()}
    observations, _, _, _, _ = env.step(actions)

kpis = env.unwrapped.evaluate()
kpis = kpis.pivot(index='cost_function', columns='name', values='value').round(3)
kpis = kpis.dropna(how='all')
display(kpis)



name,Building_1,Building_2,Building_3,District
cost_function,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
all_time_peak_average,,,,1.063
annual_normalized_unserved_energy_total,0.015,0.016,0.014,0.015
carbon_emissions_total,1.639,1.599,1.462,1.566
cost_total,1.579,1.548,1.431,1.519
daily_one_minus_load_factor_average,,,,0.739
daily_peak_average,,,,1.168
discomfort_cold_delta_average,7.789,2.896,2.566,4.417
discomfort_cold_delta_maximum,12.26,8.308,5.261,8.61
discomfort_cold_delta_minimum,0.0,0.0,0.0,0.0
discomfort_cold_proportion,0.968,0.731,0.759,0.819
