In [5]:
import math
import numpy as np
import pandas as pd
import random
import re
import requests
import simplejson as json
import os 
from typing import List, Mapping, Tuple
from ipywidgets import IntProgress, Text, VBox

# CityLearn
from citylearn.agents.rbc import HourRBC
from citylearn.agents.q_learning import TabularQLearning
from citylearn.citylearn import CityLearnEnv
from citylearn.data import DataSet
from citylearn.reward_function import RewardFunction
from citylearn.wrappers import NormalizedObservationWrapper
from citylearn.wrappers import StableBaselines3Wrapper
from citylearn.wrappers import TabularQLearningWrapper
from omegaconf import OmegaConf

# baseline RL algorithms
from stable_baselines3 import SAC
from stable_baselines3.common.callbacks import BaseCallback

  __DEFAULT = ''
  __STORAGE_SUFFIX = '_without_storage'
  __PARTIAL_LOAD_SUFFIX = '_and_partial_load'
  __PV_SUFFIX = '_and_pv'


In [6]:
import os
import torch
import argparse
import numpy as np

from tqdm.auto import trange
from omegaconf import OmegaConf

from stable_baselines3.common.vec_env import DummyVecEnv

from trajectory.models.gpt import GPT
from trajectory.utils.common import set_seed
from trajectory.utils.env import create_env, rollout, vec_rollout
from rewards.user_reward import SubmissionReward


In [7]:
class WrapperEnv:
    """
    Env to wrap provide Citylearn Env data without providing full env
    Preventing attribute access outside the available functions
    """

    def __init__(self, env_data):
        self.observation_names = env_data['observation_names']
        self.action_names = env_data['action_names']
        self.observation_space = env_data['observation_space']
        self.action_space = env_data['action_space']
        self.time_steps = env_data['time_steps']
        self.seconds_per_time_step = env_data['seconds_per_time_step']
        self.random_seed = env_data['random_seed']
        self.buildings_metadata = env_data['buildings_metadata']
        self.episode_tracker = env_data['episode_tracker']

    def get_metadata(self):
        return {'buildings': self.buildings_metadata}


def create_citylearn_env(SCHEMA, reward_function):
    env = CityLearnEnv(SCHEMA, reward_function=reward_function)

    env_data = dict(
        observation_names=env.observation_names,
        action_names=env.action_names,
        observation_space=env.observation_space,
        action_space=env.action_space,
        time_steps=env.time_steps,
        random_seed=None,
        episode_tracker=None,
        seconds_per_time_step=None,
        buildings_metadata=env.get_metadata()['buildings']
    )

    wrapper_env = WrapperEnv(env_data)
    return env, wrapper_env


In [8]:
device = "cpu"

In [9]:
data_dir = './data/'
SCHEMA = os.path.join(data_dir, 'schemas/warm_up/schema.json')

In [10]:
schema =  "citylearn_challenge_2022_phase_1"

In [11]:
checkpoints_path = 'checkpoints/city_learn/uniform/baseline'

In [12]:
config = "configs/eval_base.yaml"
config = OmegaConf.load(config)
run_config = OmegaConf.load(os.path.join(checkpoints_path, "config.yaml"))

In [13]:
beam_context = config.beam_context
beam_width = config.beam_width
beam_steps = config.beam_steps
plan_every = config.plan_every
sample_expand = config.sample_expand
k_act = config.k_act
k_obs = config.k_obs
k_reward = config.k_reward
temperature = config.temperature
discount = config.discount,
max_steps = 719

In [14]:
discretizer = torch.load(os.path.join(checkpoints_path, "discretizer.pt"), map_location=device)


In [15]:
model = GPT(**run_config.model)
model.eval()
model.to(device)
model.load_state_dict(torch.load(os.path.join(checkpoints_path, "model_last.pt"), map_location=device))

<All keys matched successfully>

In [None]:
env,wrapped_env= create_citylearn_env(SCHEMA,SubmissionReward)

In [None]:
obs, _= env.reset()

In [None]:
obs

In [None]:
env.reward_function = SubmissionReward(env)

# -------------------- WRAP ENVIRONMENT --------------------
env = NormalizedObservationWrapper(env)
env = StableBaselines3Wrapper(env)


In [None]:
obs,_= env.reset()

In [None]:
len(obs)

In [None]:
beam_steps = 3

In [None]:
transition_dim, obs_dim, act_dim = model.transition_dim, model.observation_dim, model.action_dim
    # trajectory of tokens for model action planning
    # +1 just to avoid index error while updating context on the last step
context = torch.zeros(1, model.transition_dim * (max_steps + 1), dtype=torch.long).to(device)

In [None]:
context[0].shape

In [None]:
obs = env.reset()