In [75]:
import os

# import couple of libs some will be useful
import gym
import numpy as np
from collections import deque
import random
import re
import os
import sys
import time
import json
import itertools
from datasets import Dataset

# import stable_baselines3
from stable_baselines3 import PPO, A2C, DDPG, TD3
from stable_baselines3.common.utils import set_random_seed

from citylearn.citylearn import CityLearnEnv

import functools


In [76]:
schema = "citylearn_challenge_2022_phase_1"

In [77]:
def action_space_to_dict(aspace):
    """ Only for box space """
    return { "high": aspace.high,
             "low": aspace.low,
             "shape": aspace.shape,
             "dtype": str(aspace.dtype)
    }

def env_reset(env):
    observations = env.reset()
    action_space = env.action_space
    observation_space = env.observation_space
    #building_info = env.get_building_information()
    #building_info = list(building_info.values())
    action_space_dicts = [action_space_to_dict(asp) for asp in action_space]
    observation_space_dicts = [action_space_to_dict(osp) for osp in observation_space]
    obs_dict = {"action_space": action_space_dicts,
                "observation_space": observation_space_dicts,
              #  "building_info": building_info,
                "observation": observations }
    return obs_dict



In [78]:

index_commun = [0, 2, 19, 4, 8, 24]
index_particular = [20, 21, 22, 23]

normalization_value_commun = [12, 24, 2, 100, 100, 1]
normalization_value_particular = [5, 5, 5, 5]

len_tot_index = len(index_commun) + len(index_particular) * 5

## env wrapper for stable baselines
class EnvCityGym(gym.Env):
    """
    Env wrapper coming from the gym library.
    """
    def __init__(self, env):
        self.env = env

        # get the number of buildings
        self.num_buildings = len(env.action_space)

        # define action and observation space
        self.action_space = gym.spaces.Box(low=np.array([-1] * self.num_buildings), high=np.array([1] * self.num_buildings), dtype=np.float32)

        # define the observation space
        self.observation_space = gym.spaces.Box(low=np.array([0] * len_tot_index), high=np.array([1] * len_tot_index), dtype=np.float32)

        # TO THINK : normalize the observation space
        self.current_obs = None
    def reset(self):
        obs_dict = env_reset(self.env)
        obs = self.env.reset()

        observation = self.get_observation(obs)
        
        self.current_obs = observation
        self.interactions = []

        return observation

    def get_observation(self, obs):
        """
        We retrieve new observation from the building observation to get a proper array of observation
        Basicly the observation array will be something like obs[0][index_commun] + obs[i][index_particular] for i in range(5)

        The first element of the new observation will be "commun observation" among all building like month / hour / carbon intensity / outdoor_dry_bulb_temperature_predicted_6h ...
        The next element of the new observation will be the concatenation of certain observation specific to buildings non_shiftable_load / solar_generation / ...  
        """
        
        # we get the observation commun for each building (index_commun)
        observation_commun = [obs[0][i]/n for i, n in zip(index_commun, normalization_value_commun)]
        observation_particular = [[o[i]/n for i, n in zip(index_particular, normalization_value_particular)] for o in obs]

        observation_particular = list(itertools.chain(*observation_particular))
        # we concatenate the observation
        observation = observation_commun + observation_particular

        return observation

    def step(self, action):
        """
        we apply the same action for all the buildings
        """
        # reprocessing action
        action = [[act] for act in action]
        #print(action)
        # we do a step in the environment
        obs, reward, done, info = self.env.step(action)
        
        observation = self.get_observation(obs)
        
        
        self.interactions.append({
            "observations": self.current_obs,
            "next_observations": self.get_observation(obs),  # Assuming next observation is same as current for simplicity
            "actions": action,
            "rewards": reward,
            "dones": done,
            "info": info
        })
        
        self.current_obs = observation
        
        

        return observation, sum(reward), done, info
        
    def render(self, mode='human'):
        return self.env.render(mode)

In [79]:
model = PPO.load("pretrained_citylearn/PPO/model_PPO_timesteps_1000_seed_572")
env = CityLearnEnv(schema=schema)
env = EnvCityGym(env)


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])  # type: ignore[arg-type]
  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [80]:

obs = env.reset()
nb_iter = 300
reward_tot = 0
for i in range(nb_iter):

    action = model.predict(obs,deterministic = True)[0]
    #print(action)
    
        
    obs, rewards, dones, info = env.step(action)
    reward_tot += rewards 

    

    if i % 1000 == 0:
        print("actions : ", action)
        print("rewards : ", rewards)


actions :  [ 0.00526512 -0.0675551   0.03242772  0.03278241 -0.02485087]
rewards :  -5.116924926638603


In [81]:
df_evaluate = env.env.evaluate()

In [87]:
df_evaluate[df_evaluate.name=="District"]

Unnamed: 0,cost_function,value,name,level
0,annual_normalized_unserved_energy_total,0.0,District,district
1,annual_peak_average,1.009098,District,district
2,carbon_emissions_total,1.016995,District,district
3,cost_total,1.017304,District,district
4,daily_one_minus_load_factor_average,0.997155,District,district
5,daily_peak_average,1.013926,District,district
6,discomfort_delta_average,0.0,District,district
7,discomfort_delta_maximum,0.0,District,district
8,discomfort_delta_minimum,0.0,District,district
9,discomfort_proportion,,District,district


In [85]:
kpis = env.env.evaluate()
kpis = kpis.pivot(index='cost_function', columns='name', values='value').round(3)
kpis = kpis.dropna(how='all')
display(kpis)

name,Building_1,Building_2,Building_3,Building_4,Building_5,District
cost_function,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
annual_normalized_unserved_energy_total,0.0,0.0,0.0,0.0,0.0,0.0
annual_peak_average,,,,,,1.009
carbon_emissions_total,1.044,1.0,1.016,1.025,1.0,1.017
cost_total,1.044,1.0,1.015,1.028,1.0,1.017
daily_one_minus_load_factor_average,,,,,,0.997
daily_peak_average,,,,,,1.014
discomfort_delta_average,0.0,0.0,0.0,0.0,0.0,0.0
discomfort_delta_maximum,0.0,0.0,0.0,0.0,0.0,0.0
discomfort_delta_minimum,0.0,0.0,0.0,0.0,0.0,0.0
electricity_consumption_total,1.048,1.0,1.013,1.028,1.0,1.018


In [83]:
df_evaluate[df_evaluate.cost_function=="electricity_consumption_total"]

Unnamed: 0,cost_function,value,name,level
12,electricity_consumption_total,1.018029,District,district
18,electricity_consumption_total,1.048458,Building_1,building
31,electricity_consumption_total,1.000055,Building_2,building
44,electricity_consumption_total,1.013177,Building_3,building
57,electricity_consumption_total,1.028449,Building_4,building
70,electricity_consumption_total,1.000006,Building_5,building


In [19]:

model = PPO.load("pretrained_citylearn/PPO/model_PPO_timesteps_1000000_seed_572")
env = CityLearnEnv(schema=schema)
env = EnvCityGym(env)

#small
model_s = PPO.load("pretrained_citylearn/PPO/model_PPO_timesteps_10000_seed_572")
env_s = CityLearnEnv(schema=schema)
env_s = EnvCityGym(env_s)


obs = env.reset()
obs_s = env_s.reset()

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])  # type: ignore[arg-type]
  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [21]:
nb_iter = 719

reward_tot = 0
reward_tot_s = 0

for i in range(nb_iter):

    action = model.predict(obs)[0]
    action_s = model_s.predict(obs_s)[0]
        
    obs, rewards, dones, info = env.step(action)
    reward_tot += rewards 
    
    obs_s, rewards_s, dones_s, info_s = env_s.step(action_s)
    reward_tot_s += rewards_s
    
    

    if i % 1000 == 0:
        print("actions : ", action)
        print("rewards : ", rewards)

#print(sum(env.env.evaluate())/2)
print(reward_tot)

actions :  [ 0.49094275  1.         -1.         -0.80103385 -1.        ]
rewards :  -8.490358173847198
-2792.6668946893074


In [22]:
df_evaluate =env.env.evaluate()

In [24]:
df_evaluate

Unnamed: 0,cost_function,value,name,level
0,annual_normalized_unserved_energy_total,0.000000,District,district
1,annual_peak_average,1.000000,District,district
2,carbon_emissions_total,1.002605,District,district
3,cost_total,1.001061,District,district
4,daily_one_minus_load_factor_average,0.999703,District,district
...,...,...,...,...
78,discomfort_delta_maximum,0.000000,Building_10,building
79,discomfort_delta_average,0.000000,Building_10,building
80,one_minus_thermal_resilience_proportion,,Building_10,building
81,power_outage_normalized_unserved_energy_total,,Building_10,building


In [None]:
df_evaluate.value[:18]

In [None]:
df_evaluate_s =env_s.env.evaluate()

In [None]:
df_evaluate_s.to_csv("test.csv")

In [None]:
reward_tot_s

In [None]:
reward_tot