In [1]:
# System operations
import inspect
import os
import uuid

# Date and time
from datetime import datetime

# type hinting
from typing import List, Mapping, Tuple

# Data visualization
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns

# User interaction
from IPython.display import clear_output
from ipywidgets import Button, FloatSlider, HBox, HTML
from ipywidgets import IntProgress, Text, VBox

# Data manipulation
from bs4 import BeautifulSoup
import math
import numpy as np
import pandas as pd
import random
import re
import requests
import simplejson as json

# CityLearn
from citylearn.agents.rbc import HourRBC
from citylearn.agents.q_learning import TabularQLearning
from citylearn.citylearn import CityLearnEnv
from citylearn.data import DataSet
from citylearn.reward_function import RewardFunction
from citylearn.wrappers import NormalizedObservationWrapper
from citylearn.wrappers import StableBaselines3Wrapper
from citylearn.wrappers import TabularQLearningWrapper

# baseline RL algorithms
from stable_baselines3 import SAC
from stable_baselines3.common.callbacks import BaseCallback
from rewards.user_reward import SubmissionReward
from utils.plot import *

In [2]:
class Config:
    data_dir = './data/'
    SCHEMA = os.path.join(data_dir, 'schemas/warm_up/schema.json')
    num_episodes = 10

In [3]:
config = Config()

In [None]:
env = CityLearnEnv(config.SCHEMA, reward_function=SubmissionReward)

## SAC

In [None]:
sac_env = CityLearnEnv(config.SCHEMA, reward_function = SubmissionReward)

In [None]:
sac_env = NormalizedObservationWrapper(sac_env)

In [None]:
sac_env = StableBaselines3Wrapper(sac_env)

In [None]:
sac_model = SAC(policy='MlpPolicy', env=sac_env)

In [None]:
class CustomCallback(BaseCallback):
    def __init__(self, env: CityLearnEnv, loader: IntProgress):
        r"""Initialize CustomCallback.

        Parameters
        ----------
        env: Mapping[str, CityLearnEnv]
            CityLearn environment instance.
        loader: IntProgress
            Progress bar.
        """

        super().__init__(verbose=0)
        self.loader = loader
        self.env = env
        self.reward_history = [0]

    def _on_step(self) -> bool:
        r"""Called each time the env step function is called."""

        if self.env.time_step == 0:
            self.reward_history.append(0)

        else:
            self.reward_history[-1] += sum(self.env.rewards[-1])

        self.loader.value += 1

        return True

In [None]:
# ----------------- CALCULATE NUMBER OF TRAINING EPISODES -----------------
tql_episodes=10
fraction = 0.25
sac_episodes = int(tql_episodes*fraction)
print('Fraction of Tabular Q-Learning episodes used:', fraction)
print('Number of episodes to train:', sac_episodes)
sac_episode_timesteps = sac_env.time_steps - 1
sac_total_timesteps = sac_episodes*sac_episode_timesteps

In [None]:
def get_loader(**kwargs):
    """Returns a progress bar"""

    kwargs = {
        'value': 0,
        'min': 0,
        'max': 10,
        'description': 'Simulating:',
        'bar_style': '',
        'style': {'bar_color': 'maroon'},
        'orientation': 'horizontal',
        **kwargs
    }
    return IntProgress(**kwargs)

In [None]:
sac_total_timesteps

In [None]:

# ------------------------------- SET LOADER ------------------------------
sac_loader = get_loader(max=sac_total_timesteps)
display(sac_loader)

# ------------------------------- TRAIN MODEL -----------------------------
sac_callback = CustomCallback(env=sac_env, loader=sac_loader)
sac_model = sac_model.learn(
    total_timesteps=sac_total_timesteps
)

In [None]:
sac_env.reset()[0].shape

In [None]:
import gym
test_env = gym.make('Pendulum-v1')


In [None]:
len(test_env.reset())

In [None]:
test_env.reset()

In [None]:
sac_env.reset()

In [None]:
model_p = SAC("MlpPolicy", test_env)

In [None]:
model_p.learn(total_timesteps=500,log_interval=10)

In [None]:
sac_env.reset()

In [None]:
sac_model.env.envs[0].reset()

In [None]:
sac_env

In [None]:
test_env.reset()

In [None]:
model_p.env.envs[0].reset()

In [None]:
env.reset()