In [34]:

import os
import random
from dataclasses import dataclass

import numpy as np
import torch
from datasets import load_dataset
from transformers import DecisionTransformerConfig, DecisionTransformerModel, Trainer, TrainingArguments
from train_DT import TrainableDT
import gym

from citylearn.citylearn import CityLearnEnv
import itertools

In [69]:
# Function that gets an action from the model using autoregressive prediction with a window of the previous 20 timesteps.
def get_action(model, states, actions, rewards, returns_to_go, timesteps):
    # This implementation does not condition on past rewards

    states = states.reshape(1, -1, model.config.state_dim)
    actions = actions.reshape(1, -1, model.config.act_dim)
    returns_to_go = returns_to_go.reshape(1, -1, 1)
    timesteps = timesteps.reshape(1, -1)

    states = states[:, -model.config.max_length :]
    actions = actions[:, -model.config.max_length :]
    returns_to_go = returns_to_go[:, -model.config.max_length :]
    timesteps = timesteps[:, -model.config.max_length :]
    padding = model.config.max_length - states.shape[1]
    # pad all tokens to sequence length
    attention_mask = torch.cat([torch.zeros(padding), torch.ones(states.shape[1])])
    attention_mask = attention_mask.to(dtype=torch.long).reshape(1, -1)
    states = torch.cat([torch.zeros((1, padding, model.config.state_dim)), states], dim=1).float()
    actions = torch.cat([torch.zeros((1, padding, model.config.act_dim)), actions], dim=1).float()
    returns_to_go = torch.cat([torch.zeros((1, padding, 1)), returns_to_go], dim=1).float()
    timesteps = torch.cat([torch.zeros((1, padding), dtype=torch.long), timesteps], dim=1)
    
    print(timesteps.size())
    print(states.size())

    state_preds, action_preds, return_preds = model.original_forward(
        states=states,
        actions=actions,
        rewards=rewards,
        returns_to_go=returns_to_go,
        timesteps=timesteps,
        attention_mask=attention_mask,
        return_dict=False,
    )

    return action_preds[0, -1]

In [36]:
run_path = "checkpoints/city_learn/DT_PPO_100k_cl_24/run/"

In [37]:
model_path = "checkpoints/city_learn/DT_PPO_100k_cl_24/run/checkpoint-20"

In [38]:
model = TrainableDT.from_pretrained(model_path,local_files_only = True)

In [55]:
model.config.max_length

24

In [39]:
total_params = sum(p.numel() for p in model.parameters())

In [40]:
total_params

1259424

### Environment Settings

In [41]:
def action_space_to_dict(aspace):
    """ Only for box space """
    return { "high": aspace.high,
             "low": aspace.low,
             "shape": aspace.shape,
             "dtype": str(aspace.dtype)
    }

def env_reset(env):
    observations = env.reset()
    action_space = env.action_space
    observation_space = env.observation_space
    #building_info = env.get_building_information()
    #building_info = list(building_info.values())
    action_space_dicts = [action_space_to_dict(asp) for asp in action_space]
    observation_space_dicts = [action_space_to_dict(osp) for osp in observation_space]
    obs_dict = {"action_space": action_space_dicts,
                "observation_space": observation_space_dicts,
              #  "building_info": building_info,
                "observation": observations }
    return obs_dict

In [42]:
index_commun = [0, 2, 19, 4, 8, 24]
index_particular = [20, 21, 22, 23]

normalization_value_commun = [12, 24, 2, 100, 100, 1]
normalization_value_particular = [5, 5, 5, 5]

len_tot_index = len(index_commun) + len(index_particular) * 5

## env wrapper for stable baselines
class EnvCityGym(gym.Env):
    """
    Env wrapper coming from the gym library.
    """
    def __init__(self, env):
        self.env = env

        # get the number of buildings
        self.num_buildings = len(env.action_space)

        # define action and observation space
        self.action_space = gym.spaces.Box(low=np.array([-1] * self.num_buildings), high=np.array([1] * self.num_buildings), dtype=np.float32)

        # define the observation space
        self.observation_space = gym.spaces.Box(low=np.array([0] * len_tot_index), high=np.array([1] * len_tot_index), dtype=np.float32)

        # TO THINK : normalize the observation space
        self.current_obs = None
    def reset(self):
        obs_dict = env_reset(self.env)
        obs = self.env.reset()

        observation = self.get_observation(obs)
        
        self.current_obs = observation
        self.interactions = []

        return observation

    def get_observation(self, obs):
        """
        We retrieve new observation from the building observation to get a proper array of observation
        Basicly the observation array will be something like obs[0][index_commun] + obs[i][index_particular] for i in range(5)

        The first element of the new observation will be "commun observation" among all building like month / hour / carbon intensity / outdoor_dry_bulb_temperature_predicted_6h ...
        The next element of the new observation will be the concatenation of certain observation specific to buildings non_shiftable_load / solar_generation / ...  
        """
        
        # we get the observation commun for each building (index_commun)
        observation_commun = [obs[0][i]/n for i, n in zip(index_commun, normalization_value_commun)]
        observation_particular = [[o[i]/n for i, n in zip(index_particular, normalization_value_particular)] for o in obs]

        observation_particular = list(itertools.chain(*observation_particular))
        # we concatenate the observation
        observation = observation_commun + observation_particular

        return observation

    def step(self, action):
        """
        we apply the same action for all the buildings
        """
        # reprocessing action
        action = [[act] for act in action]

        # we do a step in the environment
        obs, reward, done, info = self.env.step(action)
        
        observation = self.get_observation(obs)
        
        
        self.interactions.append({
            "observations": self.current_obs,
            "next_observations": self.get_observation(obs),  # Assuming next observation is same as current for simplicity
            "actions": action,
            "rewards": reward,
            "dones": done,
            "info": info
        })
        
        self.current_obs = observation
        
        

        return observation, sum(reward), done, info
        
    def render(self, mode='human'):
        return self.env.render(mode)

In [43]:
schema = "citylearn_challenge_2022_phase_1"

In [57]:
env = CityLearnEnv(schema=schema)
env = EnvCityGym(env)

In [58]:
device = "cpu"

In [59]:
state_mean = np.load(run_path+"state_mean.npy")
state_std = np.load(run_path+"state_std.npy")

state_mean = torch.from_numpy(state_mean).to(device=device)
state_std = torch.from_numpy(state_std).to(device=device)


In [60]:
TARGET_RETURN = -3300

In [61]:
state = env.reset()

In [62]:
state = np.array(state)

In [63]:
state_dim = env.observation_space.shape[0]
act_dim = env.action_space.shape[0]

In [64]:
target_return = torch.tensor(TARGET_RETURN, device=device, dtype=torch.float32).reshape(1, 1)

In [65]:
scale = 1

In [66]:
states = torch.from_numpy(state).reshape(1, state_dim).to(device=device, dtype=torch.float32)
actions = torch.zeros((0, act_dim), device=device, dtype=torch.float32)
rewards = torch.zeros(0, device=device, dtype=torch.float32)
timesteps = torch.tensor(0, device=device, dtype=torch.long).reshape(1, 1)

In [70]:
for t in range(8000):
    
    actions = torch.cat([actions, torch.zeros((1, act_dim), device=device)], dim=0)
    rewards = torch.cat([rewards, torch.zeros(1, device=device)])
    
    
    action = get_action(
            model,
            (states - state_mean) / state_std,
            actions,
            rewards,
            target_return,
            timesteps,
        )
   
    #print(action)
    
    actions[-1] = action
    action = action.detach().cpu().numpy()
    
    state, reward, done, _ = env.step(action)
    
    state = np.array(state)

    cur_state = torch.from_numpy(state).to(device=device).reshape(1, state_dim)
    states = torch.cat([states, cur_state], dim=0)
    rewards[-1] = reward
    #print(reward)

    pred_return = target_return[0, -1] - (reward / scale)
    target_return = torch.cat([target_return, pred_return.reshape(1, 1)], dim=1)
    #print(pred_return)
    timesteps = torch.cat([timesteps, torch.ones((1, 1), device=device, dtype=torch.long) * (t + 1)], dim=1)


torch.Size([1, 24])
torch.Size([1, 24, 26])


IndexError: index out of range in self

In [71]:
kpis = env.env.evaluate()
kpis = kpis.pivot(index='cost_function', columns='name', values='value').round(3)
kpis = kpis.dropna(how='all')
display(kpis)

name,Building_1,Building_2,Building_3,Building_4,Building_5,District
cost_function,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
annual_normalized_unserved_energy_total,0.0,0.0,0.0,0.0,0.0,0.0
annual_peak_average,,,,,,1.02
carbon_emissions_total,1.06,1.036,0.925,1.102,1.19,1.063
cost_total,1.067,1.045,0.886,1.071,1.257,1.065
daily_one_minus_load_factor_average,,,,,,0.993
daily_peak_average,,,,,,1.051
discomfort_delta_average,0.0,0.0,0.0,0.0,0.0,0.0
discomfort_delta_maximum,0.0,0.0,0.0,0.0,0.0,0.0
discomfort_delta_minimum,0.0,0.0,0.0,0.0,0.0,0.0
electricity_consumption_total,1.041,1.032,0.92,1.075,1.158,1.045


In [165]:
df_evaluate =env.env.evaluate()

In [166]:
df_evaluate

Unnamed: 0,cost_function,value,name,level
0,annual_normalized_unserved_energy_total,0.000000,District,district
1,annual_peak_average,1.004988,District,district
2,carbon_emissions_total,1.025520,District,district
3,cost_total,1.004578,District,district
4,daily_one_minus_load_factor_average,0.973440,District,district
...,...,...,...,...
78,discomfort_delta_maximum,0.000000,Building_10,building
79,discomfort_delta_average,0.000000,Building_10,building
80,one_minus_thermal_resilience_proportion,,Building_10,building
81,power_outage_normalized_unserved_energy_total,,Building_10,building
