In [1]:
import gym
import energym
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
env = gym.make('Eplus-discrete-hot-v1')

for i in range(1):
    obs = env.reset()
    rewards = []
    total_power = []
    temp = []
    out_temp = []
    actions = []
    done = False
    current_month = 0

    while not done:
        a = env.action_space.sample()
        obs, reward, done, info = env.step(a)
        rewards.append(reward)
        total_power.append(info['total_power'])
        temp.append(info['temperature'])
        out_temp.append(info['out_temperature'])
        actions.append(a)
        if info['month'] != current_month: # display results every month
            current_month = info['month']
            print('Reward: ', sum(rewards), info)
        break
    print('Episode ', i, 'Mean reward: ', np.mean(rewards), 'Cumulative reward: ', sum(rewards))
    print()
    print(obs)



[2021-05-05 10:22:03,820] EPLUS_ENV_discrete-hot-v1_MainThread_ROOT INFO:Creating EnergyPlus simulation environment...
[2021-05-05 10:22:03,824] EPLUS_ENV_discrete-hot-v1_MainThread_ROOT INFO:EnergyPlus working directory is in /workspaces/energym/Eplus-env-discrete-hot-v1-res1/Eplus-env-sub_run1
Reward:  -0.40314488461957476 {'timestep': 900.0, 'day': 1, 'month': 1, 'hour': 0, 'total_power': 1197.281193442195, 'total_power_no_units': -0.11972811934421951, 'comfort_penalty': -0.6865616498949301, 'temperature': 19.31343835010507, 'out_temperature': 4.4}
Episode  0 Mean reward:  -0.40314488461957476 Cumulative reward:  -0.40314488461957476

[4.40000000e+00 6.50000000e+01 3.87500000e+00 1.45000000e+02
 0.00000000e+00 0.00000000e+00 1.80000000e+01 2.70000000e+01
 1.93134384e+01 1.94445049e+01 2.56701305e+01 7.49999995e-01
 3.39839742e+01 0.00000000e+00 2.09721298e+01 1.19728119e+03
 1.00000000e+00 1.00000000e+00 0.00000000e+00]


In [3]:
obs

array([4.40000000e+00, 6.50000000e+01, 3.87500000e+00, 1.45000000e+02,
       0.00000000e+00, 0.00000000e+00, 1.80000000e+01, 2.70000000e+01,
       1.93134384e+01, 1.94445049e+01, 2.56701305e+01, 7.49999995e-01,
       3.39839742e+01, 0.00000000e+00, 2.09721298e+01, 1.19728119e+03,
       1.00000000e+00, 1.00000000e+00, 0.00000000e+00])

In [4]:
# Accessing the latest observations:

dict(zip(env.variables, env.current_obs))

{'Site Outdoor Air Drybulb Temperature': 4.4,
 'Site Outdoor Air Relative Humidity': 65.0,
 'Site Wind Speed': 3.875,
 'Site Wind Direction': 145.0,
 'Site Diffuse Solar Radiation Rate per Area': 0.0,
 'Site Direct Solar Radiation Rate per Area': 0.0,
 'Zone Thermostat Heating Setpoint Temperature': 18.0,
 'Zone Thermostat Cooling Setpoint Temperature': 27.0,
 'Zone Air Temperature': 19.31343835010507,
 'Zone Thermal Comfort Mean Radiant Temperature': 19.44450487839941,
 'Zone Air Relative Humidity': 25.67013046197828,
 'Zone Thermal Comfort Clothing Value': 0.7499999951999999,
 'Zone Thermal Comfort Fanger Model PPD': 33.98397417761555,
 'Zone People Occupant Count': 0.0,
 'People Air Temperature': 20.97212981582051,
 'Facility Total HVAC Electric Demand Power': 1197.281193442195}

In [5]:
env.close()

In [6]:
# Now the callback class

In [7]:
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3 import DQN

In [8]:
class DataAssimilationCallback(BaseCallback):
    
    def __init__(self, check_freq, actual_state, verbose=1):
        super(DataAssimilationCallback, self).__init__(verbose)
        self.check_freq = check_freq
        # Number of time the callback was called:
        self.n_calls = 0
        # Actual state after step:
        self.actual_state = actual_state



    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:

          # get current observation:
          obs_state = env.current_obs
          print("observed states:")
          print(obs_state)
          print()

          print("actual states:")
          print(self.actual_state)
          print()
          
          # make the new states the mean of the observed and actual states:
          env.update_states(self.actual_state)
          print("new states:")
          print(env.current_obs)
          print()

          print("calls:")
          print(self.n_calls)
          print()

        return True    # set false for just one step

In [9]:
# Parameters:
gamma = 0.9

# Create environment:
env = gym.make('Eplus-discrete-hot-v1')

# Create model:
model = DQN("MlpPolicy", env, verbose=1, gamma = gamma)


# Callback:
s = obs + np.abs(np.random.normal())
callback = DataAssimilationCallback(check_freq = 1, actual_state = s)

print(s)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
[5.46412629e+00 6.60641263e+01 4.93912629e+00 1.46064126e+02
 1.06412629e+00 1.06412629e+00 1.90641263e+01 2.80641263e+01
 2.03775646e+01 2.05086312e+01 2.67342567e+01 1.81412628e+00
 3.50481005e+01 1.06412629e+00 2.20362561e+01 1.19834532e+03
 2.06412629e+00 2.06412629e+00 1.06412629e+00]


In [10]:
model.learn(total_timesteps=4, callback=callback)

[2021-05-05 10:22:25,563] EPLUS_ENV_discrete-hot-v1_MainThread_ROOT INFO:Creating EnergyPlus simulation environment...
[2021-05-05 10:22:25,563] EPLUS_ENV_discrete-hot-v1_MainThread_ROOT INFO:Creating EnergyPlus simulation environment...
[2021-05-05 10:22:25,586] EPLUS_ENV_discrete-hot-v1_MainThread_ROOT INFO:EnergyPlus working directory is in /workspaces/energym/Eplus-env-discrete-hot-v1-res2/Eplus-env-sub_run1
[2021-05-05 10:22:25,586] EPLUS_ENV_discrete-hot-v1_MainThread_ROOT INFO:EnergyPlus working directory is in /workspaces/energym/Eplus-env-discrete-hot-v1-res2/Eplus-env-sub_run1
observed states:
[4.40000000e+00 6.50000000e+01 3.87500000e+00 1.45000000e+02
 0.00000000e+00 0.00000000e+00 1.50000000e+01 3.00000000e+01
 1.93134384e+01 1.94445049e+01 2.56701305e+01 7.49999995e-01
 3.39839742e+01 0.00000000e+00 2.09721298e+01 1.19728119e+03
 1.00000000e+00 1.00000000e+00 0.00000000e+00]

actual states:
[5.46412629e+00 6.60641263e+01 4.93912629e+00 1.46064126e+02
 1.06412629e+00 1.064

<stable_baselines3.dqn.dqn.DQN at 0x7f31873ddfd0>

In [11]:
env = gym.make('Eplus-discrete-hot-v1')

for i in range(1):
    obs = env.reset()
    rewards = []
    total_power = []
    temp = []
    out_temp = []
    actions = []
    done = False
    current_month = 0
    for i in range(5):
        print("initial observations:")
        print(env.current_obs)
        print()
        a = env.action_space.sample()
        obs, reward, done, info = env.step(a)
        print("current observations:")
        print(obs)
        print()
        env.update_states(obs + 0.5)
        print("new observations:")
        print(env.current_obs)
        print()

env.close()

[2021-05-05 10:22:27,047] EPLUS_ENV_discrete-hot-v1_MainThread_ROOT INFO:Creating EnergyPlus simulation environment...
[2021-05-05 10:22:27,047] EPLUS_ENV_discrete-hot-v1_MainThread_ROOT INFO:Creating EnergyPlus simulation environment...
[2021-05-05 10:22:27,047] EPLUS_ENV_discrete-hot-v1_MainThread_ROOT INFO:Creating EnergyPlus simulation environment...
[2021-05-05 10:22:27,054] EPLUS_ENV_discrete-hot-v1_MainThread_ROOT INFO:EnergyPlus working directory is in /workspaces/energym/Eplus-env-discrete-hot-v1-res3/Eplus-env-sub_run1
[2021-05-05 10:22:27,054] EPLUS_ENV_discrete-hot-v1_MainThread_ROOT INFO:EnergyPlus working directory is in /workspaces/energym/Eplus-env-discrete-hot-v1-res3/Eplus-env-sub_run1
[2021-05-05 10:22:27,054] EPLUS_ENV_discrete-hot-v1_MainThread_ROOT INFO:EnergyPlus working directory is in /workspaces/energym/Eplus-env-discrete-hot-v1-res3/Eplus-env-sub_run1
initial observations:
None

current observations:
[4.40000000e+00 6.50000000e+01 3.87500000e+00 1.45000000e+0

In [12]:
# Seems to work! Now to actually implement with real states data (for each timestep) and proper BLUE algorithm