In [1]:
from citylearn.citylearn import CityLearnEnv
from citylearn.wrappers import NormalizedSpaceWrapper, StableBaselines3Wrapper, NormalizedObservationWrapper

from custom_agent.CTDE.ma_sac_agents_all_act import Agents
from custom_agent.SAC_components.actor import Actor
from custom_reward.custom_reward import CustomReward

import gym

from stable_baselines3 import SAC

import torch
import torch.nn as nn

import pandas as pd

import os

import sys

In [2]:
sys.path.append("../")
!source venv/bin/activate

In [3]:
def evaluate(agents, env, name, nr_episodes):
    observations = env.reset()

    for episode in range(nr_episodes):
        while not env.done:
            if isinstance(agents, list) or isinstance(agents, nn.ModuleList):
                actions = []                
                for (agent, obs) in zip(agents, observations):
                    obs = torch.tensor(obs, dtype = torch.float32).unsqueeze(0)
                    act, _ = agent.normal_distr_sample(obs, reparameterize = False, deterministic = True)
                    actions.append(act.cpu().detach().numpy()[0])
            else:
                # observations = torch.tensor(observations, dtype = torch.float32).unsqueeze(0)
                actions, _ = agents.predict(observations, deterministic=True)
            observations, _, _, _ = env.step(actions)
        if episode == 0:
            kpis = env.evaluate()
        else:
            kpis["value"] += env.evaluate()["value"]
        
        # update power outage seed
        # env = update_power_outage_random_seed(env, 90000)
    
    # average
    kpis["value"] /= nr_episodes

    kpis.insert(0, "Type", name)
    # kpis = kpis.pivot(index=['cost_function', 'Reward function'], columns='name', values='value')    
    kpis = kpis.dropna(how='all')

    return kpis

In [4]:
def calculateFinalScore(kpis: pd.DataFrame):
    # get reward function names
    types = kpis.index.unique("Type")
    # display(kpis.index.unique("cost_function"))
    
    # create new score dataframe
    final_scores = pd.DataFrame(columns = ["cost_function", "description", "Type",  "value"])
    
    # get key performace indicator values per reward func 
    for f in types:
        # get values for current reward func
        kpis_f = kpis.xs(f, level = "Type")
        # display(kpis_f)
        
        # KPIS:
        # carbon emissions
        g = kpis_f.loc["carbon_emissions_total", "District"][0]
        # unmet hours
        u = kpis_f.loc["discomfort_proportion", "District"][0]
        # ramping
        r = kpis_f.loc["ramping_average", "District"][0]
        # 1 - load factor
        l = kpis_f.loc["daily_one_minus_load_factor_average", "District"][0]
        # daily peak
        d = kpis_f.loc["daily_peak_average", "District"][0]
        # all-time peak
        a = kpis_f.loc["annual_peak_average", "District"][0]
        # 1 - thermal resilience
        m = kpis_f.loc["one_minus_thermal_resilience_proportion", "District"][0]
        # normalized unserved energy
        s = kpis_f.loc["power_outage_normalized_unserved_energy_total", "District"][0]    
        
        # SCORE COMPONENTS (weights from citylearn challenge 2023):
        comfort = 0.3 * u
        emissions = 0.1 * g
        grid = 0.3 * (r + l + d + a) / 4
        resilience = 0.3 * (m + s) / 2
        
        # FINAL SCORE:
        score = comfort + emissions + grid + resilience
        
        # add score (and components) to kpi dataframe
        final_scores = final_scores.append(pd.DataFrame({"cost_function": ["Comfort"], 
                                                         "description": ["The comfort score component calculated using the citylearn 2023 challenge formula. Optimal is zero."],
                                                         "Type": [f],
                                                         "value": [comfort],
                                                         "name": "District"}))
        final_scores = final_scores.append(pd.DataFrame({"cost_function": ["Emissions"], 
                                                         "description": ["The emissions score component calculated using the citylearn 2023 challenge formula. Optimal is zero."],
                                                         "Type": [f],
                                                         "value": [emissions],
                                                         "name": "District"}))
        final_scores = final_scores.append(pd.DataFrame({"cost_function": ["Grid"], 
                                                         "description": ["The grid score component calculated using the citylearn 2023 challenge formula. Optimal is zero."],
                                                         "Type": [f],
                                                         "value": [grid],
                                                         "name": "District"}))
        final_scores = final_scores.append(pd.DataFrame({"cost_function": ["Resilience"], 
                                                         "description": ["The resilience score component calculated using the citylearn 2023 challenge formula. Optimal is zero."],
                                                         "Type": [f],
                                                         "value": [resilience],
                                                         "name": "District"}))
        final_scores = final_scores.append(pd.DataFrame({"cost_function": ["Final"], 
                                                         "description": ["The final performance score calculated using the citylearn 2023 challenge formula. Optimal is zero."],
                                                         "Type": [f],
                                                         "value": [score],
                                                         "name": "District"}))
                    
        
    final_scores = final_scores.pivot(index=["cost_function", "description", "Type"], columns="name", values="value")
    final_scores = final_scores.sort_values(["cost_function", "Type"])
    
    return final_scores        

In [5]:
def addDesc(kpis):
    desc_list = []
    for function in kpis["cost_function"]:
        if (function == "annual_normalized_unserved_energy_total"):
            desc_list.append("Annual proportion of unmet demand due to supply shortage e.g. power outage (normalized).")
        elif (function == "annual_peak_average"):
            desc_list.append("Annual peak electricity consumption (normalized).")
        elif (function == "carbon_emissions_total"):
            desc_list.append("Total carbon emissions due to electricity drawn from the grid (normalized).")
        elif (function == "cost_total"):
            desc_list.append("Total electricity monetary cost (normalized).")
        elif (function == "daily_one_minus_load_factor_average"):
            desc_list.append("Daily difference ratio of rolling mean demand to rolling peak demand over a specified period (normalized).")
        elif (function == "daily_peak_average"):
            desc_list.append("Daily average peak electricity consumption (normalized).")
        elif (function == "discomfort_delta_average"):
            desc_list.append("Mean temperature delta from the temperature setpoint.")
        elif (function == "discomfort_delta_maximum"):
            desc_list.append("Maximum temperature delta from the temperature setpoint.")  
        elif (function == "discomfort_delta_minimum"):
            desc_list.append("Minimum temperature delta from the temperature setpoint.")  
        elif (function == "discomfort_proportion"):
            desc_list.append("Proportion of occupied timesteps where the temperature falls outside of the comfort band from the temperature setpoint.")
        elif (function == "discomfort_too_cold_proportion"):
            desc_list.append("Proportion of occupied timesteps where the temperature falls below the comfort band from the temperature setpoint.")  
        elif (function == "discomfort_too_hot_proportion"):
            desc_list.append("Proportion of occupied timesteps where the temperature falls above the comfort band from the temperature setpoint.")  
        elif (function == "electricity_consumption_total"):
            desc_list.append("Total electricity that is consumed from the grid. (normalized).")  
        elif (function == "monthly_one_minus_load_factor_average"):
            desc_list.append("Monthly difference between 1 and the load factor i.e., ratio of rolling mean demand to rolling peak demand over a specified period (normalized).")  
        elif (function == "one_minus_thermal_resilience_proportion"):
            desc_list.append("Percentage of discomfort time steps during power outage.")  
        elif (function == "power_outage_normalized_unserved_energy_total"):
            desc_list.append("Proportion of unmet demand while in a power outage.")  
        elif (function == "ramping_average"):
            desc_list.append("Rolling sum of absolute difference in net electric consumption between consecutive time steps (normalized).")
        elif (function == "zero_net_energy"):
            desc_list.append("The net sum of electricty that is consumed from the grid and self-generated from renenewable sources (normalized).")  
            
    # print(desc_list)

    kpis.insert(2, "description", desc_list, True)

    return kpis

In [6]:
schema_path = "data/schema.json"

env = CityLearnEnv(schema=schema_path, reward_function = CustomReward, central_agent=False)
env2 = CityLearnEnv(schema=schema_path, reward_function = CustomReward, central_agent=True)

# wrap environment for a more workable env
env = NormalizedSpaceWrapper(env)
env2 = NormalizedObservationWrapper(env2)
env2 = StableBaselines3Wrapper(env2)

#### We only load actors

In [7]:
dir = "training/runs/custom_masac_tests/test_all_action2_copy/models"

actors_centr_critics = nn.ModuleList()
for (obs_space, act_space) in zip(env.observation_space, env.action_space):
            actors_centr_critics.append(Actor(0.0003, obs_space.shape[0], act_space.shape[0], act_space.low, act_space.high, (256, 256)))

for idx, actor in enumerate(actors_centr_critics):
    actor.load(os.path.join(dir, "actor" + str(idx) + "_1300000"))

sb3_actor = SAC.load("training/runs/reward_func_tests/lr3e-4_tau5e-3_gamma99e-2/custom5/models/final_model.zip")
sb3_actor.set_env(env2)

  logger.warn(
  return torch._C._cuda_getDeviceCount() > 0


#### Evaluate

In [8]:
kpis = pd.DataFrame()
kpis.append(evaluate(actors_centr_critics, env, "Multiple central critics", 1))
print(1)
kpis.append(evaluate(sb3_actor, env, "SB3 based CTCE", 1))

ValueError: Error: Unexpected observation shape (3, 32) for Box environment, please use (54,) or (n_env, 54) for the observation shape.

In [None]:
kpis.head(5)

Unnamed: 0,Type,cost_function,value,name,level
0,Multiple central critics,annual_normalized_unserved_energy_total,0.031072,District,district
1,Multiple central critics,annual_peak_average,0.902243,District,district
2,Multiple central critics,carbon_emissions_total,0.909054,District,district
3,Multiple central critics,cost_total,0.896997,District,district
4,Multiple central critics,daily_one_minus_load_factor_average,0.945919,District,district


In [None]:
kpis = addDesc(kpis)

In [None]:
kpis = kpis.pivot(index=["cost_function", "description", "Type"], columns="name", values="value")
kpis = kpis.sort_values(["cost_function", "Type"])

In [None]:
final_scores = calculateFinalScore(kpis)

In [None]:
final_scores

Unnamed: 0_level_0,Unnamed: 1_level_0,name,District
cost_function,description,Type,Unnamed: 3_level_1
Comfort,The comfort score component calculated using the citylearn 2023 challenge formula. Optimal is zero.,Multiple central critics,0.01195
Emissions,The emissions score component calculated using the citylearn 2023 challenge formula. Optimal is zero.,Multiple central critics,0.090905
Final,The final performance score calculated using the citylearn 2023 challenge formula. Optimal is zero.,Multiple central critics,0.525202
Grid,The grid score component calculated using the citylearn 2023 challenge formula. Optimal is zero.,Multiple central critics,0.265314
Resilience,The resilience score component calculated using the citylearn 2023 challenge formula. Optimal is zero.,Multiple central critics,0.157032
