In [None]:
%matplotlib notebook
%reload_ext autoreload
%autoreload 2

import datetime
import sys
from os import path, environ
import pickle
import warnings

sys.path.insert(0, path.abspath('../../../pyTorchBridge/'))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPRegressor
from tqdm.auto import tqdm, trange

from utils import contiguous_sequences
from plotting import model_surface, plot_surface
from condenser import Condenser

chiller_file = path.join(environ['DATADIR'],
                         'EngineeringScienceBuilding',
                         '2422_ESB_HVAC.csv')
plot_path = path.join('..', 'docs', 'img')
bin_path = './bin/'

In [None]:
# Data selection 'all' or 'chiller_on' or 'fan_on'
MODE = 'chiller_on'
# Read pre-processed data:
# Pytorch uses float32 as default type for weights etc,
# so input data points are also read in the same type.
df = pd.read_csv(chiller_file, index_col='time',
                 parse_dates=['time'], dtype=np.float32)
df.drop(['PowFanA', 'PowFanB', 'FlowCond', 'PowChiP', 'PerFreqConP', 'PowConP'], axis='columns', inplace=True)
df.dropna(inplace=True)
if MODE == 'chiller_on':
    df = df[df['PowChi'] != 0.]
if MODE == 'fan_on':
    df = df[(df['PerFreqFanA'] != 0.) | df['PerFreqFanB'] != 0.]
print(len(df), 'Records')

## Environment model

State variables (8):

`'TempCondIn', 'TempCondOut', 'TempAmbient', 'TempWetBulb', 'TempEvapIn', 'TempEvapOut', 'PressDiffEvap', 'PressDiffCond'`

Action variables (1):

`'TempCondInSetpoint'`

### Condenser

In [None]:
envvars = ['TempCondIn', 'TempCondOut', 'TempAmbient', 'TempWetBulb', 'TempEvapIn', 'TempEvapOut', 'PressDiffEvap', 'PressDiffCond']
actionvars = ['TempCondInSetpoint']
inputs =  actionvars + envvars 
outputs = ['PowChi', 'TempCondOut', 'TempCondIn']
lag = (1, 1, 1)    # 0, 1, 2, 3, 4, ...

df_in = pd.DataFrame(columns=inputs, index=df.index)
df_in['TempCondInSetpoint'] = np.clip(df['TempWetBulb'] - 4, a_min=65, a_max=None)  # approach controller
df_in[inputs[1:]] = df[inputs[1:]]

df_out = pd.DataFrame(columns=outputs, index=df.index)
df_out[outputs] = df[outputs]

idx_list = contiguous_sequences(df.index, pd.Timedelta(5, unit='min'), filter_min=10)

# Create dataframes of contiguous sequences with a delay
# of 1 time unit to indicate causality input -> outputs
dfs_in, dfs_out = [], []
for idx in idx_list:
    dfs_in.append(df_in.loc[idx[:-max(lag) if max(lag) > 0 else None]])
    cols = []
    for l, c in zip(lag, outputs):
        window = slice(l, None if l==max(lag) else -(max(lag)-l))
        series = df_out[c].loc[idx[window]]
        cols.append(series.values)
        if l == min(lag): index = series.index
    dfs_out.append(pd.DataFrame(np.asarray(cols).T, index=index, columns=outputs))

df_in = pd.concat(dfs_in, sort=False)
df_out = pd.concat(dfs_out, sort=False)

print('{:6d} time series'.format(len(dfs_in)))
print('{:6d} total rows'.format(len(df_in)))

In [None]:
std_in_cond, std_out_cond = StandardScaler(), StandardScaler()
net = MLPRegressor(hidden_layer_sizes=(64, 32, 16),
                   activation='tanh',
                   solver='adam',
                   verbose=True,
                   early_stopping=True,
                   learning_rate_init=1e-3)
est_cond = Pipeline([('std', std_in_cond), ('net', net)])

with warnings.catch_warnings():
    warnings.simplefilter('ignore', category=FutureWarning)
    est_cond.fit(df_in, std_out_cond.fit_transform(df_out))

In [None]:
# Save model
save = {
    'loss': est_cond['net'].loss_,
    'estimator': est_cond,
    'output_norm': std_out_cond,
    'inputs': inputs,
    'outputs': outputs
}
with open(path.join(bin_path, 'env_condenser_condenser_nn'), 'wb') as f:
    pickle.dump(save, f)

In [None]:
# load model
with open(path.join(bin_path, 'env_condenser_condenser_nn'), 'rb') as f:
    save = pickle.load(f, fix_imports=False)
    est_cond = save['estimator']
    std_out = save['output_norm']

In [None]:
# Visualize model predictions
test_in, test_out = dfs_in[2], dfs_out[2]
pred = pd.DataFrame(std_out.inverse_transform(est_cond.predict(test_in)),
                    index=test_out.index, columns=test_out.columns)

plt.subplot(2, 1, 1)
test_in.loc[:, ('TempCondIn')].plot(grid=True, style=':', label='TempCondIn')
test_out.loc[:, ('TempCondIn')].plot(grid=True, style=':', label='TempCondIn-Next')
pred.loc[:, ('TempCondIn')].plot(grid=True, style=':', label='TempCondIn-Next-Pred')
test_in.loc[:, ('TempCondOut')].plot(grid=True, style=':', label='TempCondOut')
test_out.loc[:, ('TempCondOut')].plot(grid=True, style=':', label='TempCondOut-Next')
pred.loc[:, ('TempCondOut')].plot(grid=True, style=':', label='TempCondOut-Next-Pred')
plt.legend()
plt.subplot(2, 1, 2)
ax4 = test_out.loc[:, ('PowChi')].plot(grid=True, label='PowChi')
ax5 = pred.loc[:, ('PowChi')].plot(grid=True, label='PowChi-Pred')
plt.legend()

In [None]:
point = df_in.loc['2019-07-01T1200-6'].values.reshape(1, -1)
x, y, z = model_surface(lambda x: std_out_cond.inverse_transform(est_cond.predict(x))[:,2],
                        X=point, vary_idx=(0, 3),vary_range=((65, 85), (75, 95)), vary_num=(20, 20))
ax = plot_surface(x,y,z, cmap=plt.cm.coolwarm)
ax.set_xlabel('TempCondInSetpoint')
ax.set_ylabel('TempAmbient')
ax.set_zlabel('TempCondIn-Next Cycle')

In [None]:
print(df_in.loc['2019-07-01T1200-6'])

### Cooling Tower

Note: not being used in the environment. The condenser model is now predicting the next timestep's `TempCondIn` as well.

In [None]:
envvars = ['TempCondOut', 'TempAmbient', 'TempWetBulb', 'TempEvapIn', 'TempEvapOut', 'PressDiffEvap', 'PressDiffCond']
actionvars = ['TempCondInSetpoint']
inputs =  actionvars + envvars
outputs = ['TempCondIn']
lag = 0    # 0, 1, 2, 3, 4, ...

df_in = pd.DataFrame(columns=inputs, index=df.index)
df_in['TempCondInSetpoint'] = df['TempWetBulb'] + 4  # approach controller
df_in[inputs[1:]] = df[inputs[1:]]

df_out = pd.DataFrame(columns=outputs, index=df.index)
df_out[outputs] = df[outputs]

idx_list = contiguous_sequences(df.index, pd.Timedelta(5, unit='min'), filter_min=10)

# Create dataframes of contiguous sequences with a delay
# of 1 time unit to indicate causality input -> outputs
dfs_in, dfs_out = [], []
for idx in idx_list:
    dfs_in.append(df_in.loc[idx[:-lag if lag > 0 else None]])
    dfs_out.append(df_out.loc[idx[lag:]])

df_in = pd.concat(dfs_in, sort=False)
df_out = pd.concat(dfs_out, sort=False)

print('{:6d} time series'.format(len(dfs_in)))
print('{:6d} total rows'.format(len(df_in)))

In [None]:
std_in_tower, std_out_tower = StandardScaler(), StandardScaler()
net = MLPRegressor(hidden_layer_sizes=(64, 32, 16),
                   activation='tanh',
                   solver='adam',
                   verbose=True,
                   early_stopping=True,
                   learning_rate_init=1e-3)
est_tower = Pipeline([('std', std_in_tower), ('net', net)])

with warnings.catch_warnings():
    warnings.simplefilter('ignore', category=FutureWarning)
    est_tower.fit(df_in, std_out_tower.fit_transform(df_out))


In [None]:
# Save model
save = {
    'loss': est_tower['net'].loss_,
    'estimator': est_tower,
    'output_norm': std_out_tower,
    'inputs': inputs,
    'outputs': outputs
}
with open(path.join(bin_path, 'env_condenser_tower_nn'), 'wb') as f:
    pickle.dump(save, f)

In [None]:
# load model
with open(path.join(bin_path, 'env_condenser_tower_nn'), 'rb') as f:
    save = pickle.load(f, fix_imports=False)
    est_tower = save['estimator']
    std_out_tower = save['output_norm']

In [None]:
# Visualize model predictions
test_in, test_out = dfs_in[2], dfs_out[2]
pred = pd.DataFrame(std_out_tower.inverse_transform(est_tower.predict(test_in)),
                    index=test_out.index, columns=test_out.columns)
test_in.loc[:, ('TempCondInSetpoint', 'TempCondOut')].plot(grid=True)
test_out.loc[:, ('TempCondIn')].plot(grid=True, style=':')
pred.loc[:, ('TempCondIn')].plot(grid=True, style=':')
plt.legend()

## RL Environment

In [None]:
# Make wrapper for cooling tower such that outputs are normalized
# i.e. in physical units instead of being 0 mean and 1 variance.
from cooling_tower import CoolingTower

externalvars = ('TempAmbient', 'TempWetBulb', 'TempEvapIn', 'TempEvapOut', 'PressDiffEvap', 'PressDiffCond')
externalvals = [df.loc[:, externalvars] for df in dfs_in]

class InvTransformer:
    
    def __init__(self, estimator, transformer):
        self.estimator = estimator
        self.transformer = transformer
    
    def predict(self, x):
        return self.transformer.inverse_transform(self.estimator.predict(x))
        

esb = Condenser(InvTransformer(est_cond, std_out_cond), externalvals)

In [None]:
# Visualize environment episode
done = False
states = []
power = []
esb.reset()
while not done:
    state, _, done, info = esb.step(esb.action_space.sample())
    states.append(state)
    power.append(info.get('powchi'))
esb.reset()
    
states = np.asarray(states)
power = np.asarray(power)
plt.subplot(2,1,1)
plt.plot(power, label='Total Power')
plt.legend()
plt.subplot(2,1,2)
plt.plot(states[:, 0], label='TempCondIn')
plt.plot(states[:, 1], label='TempCondOut')
plt.plot(states[:, 2], label='TempAmbient')
plt.plot(states[:, 3], label='TempWetBulb')
plt.legend()

## RL Control

In [149]:
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

from stable_baselines import PPO2
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines.common.policies import MlpPolicy, LstmPolicy

class CT(CoolingTower):
    def reward(self, t, state: np.ndarray, action: np.ndarray, nstate: np.ndarray,
               locals: dict) -> float:
        powchi = (locals.get('powchi') - 5300) / 415700.
        # powfans = locals.get('powfans') / 21230
        tempcondin = (locals.get('tempcondin') - 281.7) / 25.15
        tempcondout = (locals.get('tempcondout') - 290.) / 20.

        # return - (0.2 * powchi) - (0.8 * powfans)
        # return - (0.9 * tempcondin) - (0.1 * powfans)
        # return - (0.1 * powchi) - (0.9 * tempcondin)
        return -powchi
    
esb_vec = DummyVecEnv([lambda: Condenser(InvTransformer(est_cond, std_out_cond), externalvals) \
                       for _ in range(4)])
agent = PPO2(MlpPolicy, esb_vec, verbose=1, learning_rate=1e-3)
agent.learn(50000, log_interval=10)

-------------------------------------
| approxkl           | 0.0043457365 |
| clipfrac           | 0.04736328   |
| explained_variance | 0.807        |
| fps                | 490          |
| n_updates          | 1            |
| policy_entropy     | 1.4178611    |
| policy_loss        | -0.004554482 |
| serial_timesteps   | 128          |
| time_elapsed       | 0            |
| total_timesteps    | 512          |
| value_loss         | 0.003511376  |
-------------------------------------
---------------------------------------
| approxkl           | 0.0068328762   |
| clipfrac           | 0.100097656    |
| explained_variance | 0.691          |
| fps                | 1517           |
| n_updates          | 10             |
| policy_entropy     | 1.3978771      |
| policy_loss        | -0.0037712269  |
| serial_timesteps   | 1280           |
| time_elapsed       | 3.76           |
| total_timesteps    | 5120           |
| value_loss         | 0.000100123114 |
--------------------------

<stable_baselines.ppo2.ppo2.PPO2 at 0x19a14de7a88>

In [157]:
# seqidx = np.random.randint(len(dfs_in))
seqidx = 21 # May
simulate_hist = True  # Whether to use raw output data, or simulate it through historical actions

# indexing histories after 1st element because simulated trajectories
# are recorded after initial state (> 0), so lengths are equal
act_hist = dfs_in[seqidx].loc[:, ('TempCondInSetpoint')].values[1:, None]
ext = dfs_in[seqidx].loc[:, externalvars]

# Get baseline by running historic actions through environment:
if simulate_hist:
    esb.reset(external=ext)
    done = False
    pow_hist_chi, pow_hist_fan, temp_hist = [], [], []
    t = 0
    while not done:
        action = act_hist[t, :1]
        _, _, done, info = esb.step(action)
        # pow_hist_fan.append(info.get('powfans'))
        pow_hist_chi.append(info.get('powchi'))
        temp_hist.append(info.get('tempcondin'))
        t += 1
else:
    pow_hist_chi = dfs_out[seqidx]['PowChi'].values
    pow_hist_fans = dfs_out[seqidx]['PowFans'].values
    temp_hist = dfs_out[seqidx]['TempCondIn'].values

In [158]:
pfan, pchi, act, rewards, temp = [], [], [], [], []

# run multiple trials over same period for stochastic policy
for trial in range(10):
    state = esb.reset(external=ext)
    done = False
    pfan.append([])
    pchi.append([])
    act.append([])
    rewards.append([])
    temp.append([])
    while not done:
        action = agent.predict(state)[0]
        state, reward, done, info = esb.step(action)
        act[-1].append(action)
        # pfan[-1].append(info.get('powfans'))
        pchi[-1].append(info.get('powchi'))
        rewards[-1].append(reward)
        temp[-1].append(info.get('tempcondin'))

# get std_dev and mean of metrics
# std_pfan = np.std(pfan, axis=0, keepdims=False)
std_pchi = np.std(pchi, axis=0, keepdims=False)
std_act = np.std(act, axis=0, keepdims=False)
std_rewards = np.std(rewards, axis=0, keepdims=False)
std_temp = np.std(temp, axis=0, keepdims=False)

# pfan = np.mean(pfan, axis=0, keepdims=False)
pchi = np.mean(pchi, axis=0, keepdims=False)
act = np.mean(act, axis=0, keepdims=False)
rewards = np.mean(rewards, axis=0, keepdims=False)
temp = np.mean(temp, axis=0, keepdims=False)

In [None]:
plt.figure(figsize=(8,12))
# plt.subplot(4,1,1)
# plt.title('Fan Power (Average RL {:.0f}W vs Historical {:.0f}W)'\
#           .format(np.mean(pfan), np.mean(pow_hist_fan)))
# plt.plot(pfan, 'b:', label='RL.Fan')
# plt.fill_between(np.arange(len(pfan)), pfan+std_pfan, pfan-std_pfan, color='b', alpha=0.3)
# plt.plot(pow_hist_fan, 'r:', label='Historical.Fan')
# plt.ylim(bottom=0)
# plt.legend()

plt.subplot(4,1,2)
plt.title('Chiller Power (Average RL {:.0f}W vs Historical {:.0f}W)'\
          .format(np.mean(pchi), np.mean(pow_hist_chi)))
plt.plot(pchi, 'b:', label='RL.Chiller')
plt.fill_between(np.arange(len(pchi)), pchi+std_pchi, pchi-std_pchi, color='b', alpha=0.3)
plt.plot(pow_hist_chi, 'r:', label='Historical.Chiller')
plt.ylim(bottom=0)
plt.legend()

plt.subplot(4,1,3)
plt.title('Setpoint Control (Average RL {:.2f} vs Historical {:.2f})'\
          .format(np.mean(act[:, 0]), np.mean(act_hist[:, 0])))
plt.plot(act[:, 0], 'b:', label='RL.Setpoint')
plt.fill_between(np.arange(len(act[:, 0])), act[:, 0]+std_act[:, 0], act[:, 0]-std_act[:, 0], color='b', alpha=0.3)
plt.plot(act_hist[:, 0], 'r:', label='Historical.Setpoint')
# plt.ylim(top=1.05)
plt.legend()


plt.subplot(4,1,4)
plt.title('Output Temperature (Average RL {:.1f}F vs Historical {:.1f}F)'\
          .format(np.mean(temp), np.mean(temp_hist)))
plt.plot(temp, 'b:', label='RL.Temp')
plt.fill_between(np.arange(len(temp)), temp+std_temp, temp-std_temp, color='b', alpha=0.3)
plt.plot(temp_hist, 'r:', label='Historical.Temp')
plt.legend()

plt.tight_layout()

In [None]:
plt.figure(figsize=(8,3))
plt.plot(k2f(dfs_in[seqidx]['TempAmbient'].values), label='Ambient Temp')
plt.plot(k2f(dfs_in[seqidx]['TempWetBulb'].values), label='WetBulb Temp')
plt.legend()
plt.ylabel('Temperature /F')
plt.title('Environmental Conditions')
plt.tight_layout()
plt.show()