# Example Usage of `BuildingEnv`

This notebook demonstrates 4 examples using `BuildingEnv`:

1. no-action policy
2. random action policy
3. MPC policy
4. training a PPO agent with StableBaselines3

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%cd ../..

In [None]:
from __future__ import annotations

from collections.abc import Sequence

import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm

from sustaingym.envs.building import BuildingEnv, ParameterGenerator

In [None]:
def plot_temp(env: BuildingEnv, zone_names: Sequence[str] | None = None) -> None:
    fig, ax = plt.subplots(tight_layout=True)

    ax.plot(np.array(env.statelist)[:, :env.n + 1])
    ax.set(
        xlabel='hours',
        ylabel='Celsius',
        title='Office Small Zonal Temperature')
    
    if zone_names is None:
        zone_names = [zone.name for zone in env.zones] + ['Outside']
    ax.legend(zone_names, loc="lower right")


def plot_power(env: BuildingEnv) -> None:
    fig, ax = plt.subplots(tight_layout=True)

    # TODO: is this actually in watts?
    power_consumption = np.sum(np.abs(np.stack(env.actionlist)), axis=1)
    ax.plot(power_consumption)
    ax.set(
        title='Office Small Power Consumption',
        xlabel='hours',
        ylabel='Watts')

### Environment Information

In [None]:
# Create environment
# see sustaingym/envs/building/utils.py for more info
params = ParameterGenerator(
    building='OfficeSmall', weather='Hot_Dry', location='Tucson')
env = BuildingEnv(params)

ZONE_NAMES = ['South', 'East', 'North', 'West', 'Core', 'Plenum', 'Outside']
num_hours = 24

print('Size of State Space:', env.observation_space.shape)
print('Size of Action Space:', env.action_space.shape)
print('Min action:', env.action_space.low)
print('Max action:', env.action_space.high)
print('Sample State:', env.observation_space.sample())
print('Sample Action:', env.action_space.sample())

### Taking no actions

In [None]:
env.reset()
a = env.action_space.sample()
for i in tqdm(range(num_hours)):
    a = a * 0
    obs, r, terminated, truncated, _ = env.step(a)

In [None]:
plot_temp(env, zone_names=ZONE_NAMES)

In [None]:
plot_power(env)

### Taking random actions

In [None]:
for i in range(num_hours):
    a = env.action_space.sample()  # Randomly select an action
    obs, r, terminated, truncated, _ = env.step(a)  # Return observation and reward

In [None]:
plot_temp(env)

In [None]:
plot_power(env)

### MPC Agent

In [None]:
from sustaingym.algorithms.building.mpc_controller import MPCAgent

agent = MPCAgent(env, gamma=env.gamma, safety_margin=0.96, planning_steps=10)

In [None]:
env.reset()
numofhours = 24
reward_total = 0
for i in range(numofhours):
    a, s = agent.predict()
    obs, r, terminated, truncated, _ = env.step(a)
    reward_total += r
print("total reward is: ", reward_total)

In [None]:
plot_temp(env)

In [None]:
plot_power(env)

### PPO Agent

In [None]:
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.utils import set_random_seed

In [None]:
seed = 25
set_random_seed(seed=seed)

In [None]:
model = PPO(MlpPolicy, env, verbose=1)
rewardlist = []

for i in range(300):
    model.learn(total_timesteps=1000)
    rw = 0
    vec_env = model.get_env()
    obs = vec_env.reset()
    for i in range(24):
        action, _states = model.predict(obs)
        obs, rewards, dones, info = vec_env.step(action)
        rw += rewards
    print('Avg reward:', rw / 24)
    rewardlist.append(rw / 24)

print("################TRAINING is Done############")
model.save('PPO_quick')

In [None]:
model = PPO(MlpPolicy, env, verbose=1)
vec_env = model.get_env()
model = PPO.load("PPO_quick")
obs = vec_env.reset()
print("Initial observation", obs)

for i in range(24):
    action, _states = model.predict(obs)
    obs, rewards, dones, info = vec_env.step(action)

In [None]:
plot_temp(env, zone_names=ZONE_NAMES)

In [None]:
plot_power(env)

In [None]:
fig, ax = plt.subplots(tight_layout=True)
ax.plot(rewardlist)
ax.set(xlabel='episode', ylabel='avg. reward', title='Quick PPO training')
plt.show()