In [12]:
import env
import gymnasium as gym 
import pandas as pd
import os
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

In [13]:
# load training data - 1-min sampling rate
file_path = os.path.abspath('')
parent_path = os.path.dirname(file_path)
data_path = os.path.join(parent_path, 'data/disturbance_1min.csv')
data = pd.read_csv(data_path, index_col=[0])
index = range(0, len(data)*60, 60)
data.index = index

# sample every hour
dt = 3600
data = data.groupby([data.index // dt]).mean()
n = len(data)

In [14]:
data.head()


Unnamed: 0,out_temp,qint_lump,qhvac_lump,qwin_lump,qradin_lump,weighted_average
0,21.118333,0.440416,-3.909012,0.0,0.012347,23.03648
1,22.098333,0.430104,0.0,0.0,0.012347,25.051452
2,21.949167,0.426729,0.0,0.0,0.012347,25.191672
3,21.798333,0.424369,0.0,0.0,0.012347,25.244827
4,21.7,0.407597,0.0,0.0,0.012347,25.266527


In [15]:
disturbance_names = ['out_temp', 'qint_lump', 'qwin_lump', 'qradin_lump']
disturbance = data[disturbance_names].values

u = data['qhvac_lump'].values

In [16]:
# test rc state space
rc_params = [10383.181640625, 499116.65625, 1321286.5, 1.53524649143219, 0.5000227689743042, 1.0003612041473389, 20.09742546081543]
x0 = np.array([20, 20, 20])
x_high = np.array([40., 80., 40.])
x_low = np.array([10., 10., 10.])
n_actions = 121
u_high = [0]
u_low = [-12.0]

t_d = [t*dt for t in range(n)]


In [17]:
A = np.array([[-1.97402551e-04,  0.00000000e+00,  1.92610415e-04],
       [ 0.00000000e+00, -3.30784418e-06,  2.00281620e-06],
       [ 1.51360733e-06,  7.56564853e-07, -2.27017218e-06]])
Bu = np.array([[9.63095932e-05], [0.], [0.]])
Bd = np.array([[4.79213586e-06, 9.63095932e-05, 0.00000000e+00, 0.00000000e+00],
       [1.30502798e-06, 0.00000000e+00, 2.00353963e-06, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 7.56838127e-07]])
C = np.array([[1., 0., 0.],
       [0., 0., 0.]])
D = np.array([[0.],
            [1.]])

In [18]:
# test linear state space gym env
import env.r4c3_discrete as rc

disturbances = [t_d, disturbance]

dist_fcn = rc.LinearInterpolation(*disturbances)
lssm = gym.make("DiscreteLinearStateSpace-v0",
                A = A,
                Bu = Bu, 
                Bd = Bd,
                C = C,
                D = D,
                x0 = x0,
                dist_fcn = dist_fcn,
                x_high = x_high,
                x_low = x_low,
                n_actions = n_actions,
                u_high = u_high,
                u_low = u_low,
                ts = 0,
                te = dt*24.,
                dt = dt).env

state0, _ = lssm.reset(seed=0)
assert lssm.observation_space.shape[0] == lssm.state.shape[0], "state shape does not match observation space shape"
assert lssm.action_space.n == n_actions, "action space does not match n_actions"
assert lssm.t == lssm.ts, "initial time does not match given start time"
assert np.any(lssm.state.astype(np.float32) == state0), "initial state does not match given initial state"

# sample actions from action space
# step 1
lssm.action_space.seed(12)
action = lssm.action_space.sample() # sample action from action space
state1, reward1, done1, _, _ = lssm.step(action) # step the environment
assert lssm.t == lssm.ts + lssm.dt, "time does not match given time step"
assert done1 == lssm.done, "done does not match environment done"
assert reward1 == 1, "reward does not match given reward"
assert np.any(lssm.state.astype(np.float32) == state1), "state does not match given state"

# step 2
action = lssm.action_space.sample()
state2, reward2, done2, _, _ = lssm.step(action)
assert lssm.t == lssm.ts + 2*lssm.dt, "time does not match given time step"
assert done2 == lssm.done, "done does not match environment done"
assert reward2 == 1, "reward does not match given reward"
assert np.any(lssm.state.astype(np.float32) == state2), "state does not match given state"

# reset environment
state3, _ = lssm.reset(seed=0)
assert lssm.t == lssm.ts, "time does not match given start time"
assert np.any(lssm.state.astype(np.float32) == state3), "state does not match given state"
assert np.any(state3 == state0), "state does not previous initial state"

  logger.warn(f"{pre} is not within the observation space.")


In [27]:
env = gym.make("R4C3Discrete-v0",
    rc_params = rc_params,
    x0 = x0,
    x_high = x_high,
    x_low = x_low,
    n_actions = n_actions,
    u_high = u_high,
    u_low = u_low,
    disturbances = (t_d, disturbance),
    ts = 0,
    te = 24.*dt,
    dt = dt).env
print(env.A, env.Bu, env.Bd, env.C, env.D)
print("=========================================")
print(env.x0, env.high, env.low)
print("=========================================")
print(env.observation_space)
print(env.reset(seed=0)) #TODO: seed is not working right now
env.action_space.seed(12)
action = env.action_space.sample()
env.step(action)

[[-1.97402551e-04  0.00000000e+00  1.92610415e-04]
 [ 0.00000000e+00 -3.30784418e-06  2.00281620e-06]
 [ 1.51360733e-06  7.56564853e-07 -2.27017218e-06]] [[9.63095932e-05]
 [0.00000000e+00]
 [0.00000000e+00]] [[4.79213586e-06 9.63095932e-05 0.00000000e+00 0.00000000e+00]
 [1.30502798e-06 0.00000000e+00 2.00353963e-06 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 7.56838127e-07]] [[1. 0. 0.]
 [0. 0. 0.]] [[0.]
 [1.]]
[20 20 20] [40. 80. 40.] [10. 10. 10.]
Box([10. 10. 10.  0.], [40. 80. 40.  4.], (4,), float32)
(array([20.011822, 20.450464, 19.64416 ,  0.      ], dtype=float32), {})


(array([20.011822 , 20.447788 , 19.648394 ,  1.5333333], dtype=float32),
 -0.6166667849794238,
 False,
 False,
 {})

In [20]:
env.reset()

i = 0
done = False
while i < n and not done:
    action = (u[i] - np.array(u_low))*(n_actions - 1) / (np.array(u_high) - np.array(u_low))
    action = int(action)
    print(u[i], action)
    obs, reward, done, _, _ = env.step(action)

    i += 1


-3.909011583503724 80
0.0 120
0.0 120
0.0 120
0.0 120
0.0 120
-0.008112508783001288 119
-0.1795707385732975 118
-7.049829225505723 49
-5.751299873544396 62
-4.347338930665189 76
-3.667525614615987 83
-3.615029677453877 83
-3.39209038667379 86
-3.3527780409798345 86
-3.2431797200190573 87
-2.8611788167807473 91
-2.7238136544326124 92
-0.29386119041929604 117
-0.29540834784551434 117
-0.4682965790873039 115
-0.013543364561231497 119
0.0 120
0.0 120


In [21]:
obs
env.observation_space


Box([10. 10. 10.  0.], [40. 80. 40.  4.], (4,), float32)

In [22]:
cart = gym.make("CartPole-v1").env
obs, _ = cart.reset()
cart.observation_space

Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32)