In [None]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

import datetime
import sys
from os import path, environ
import pickle
import warnings

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPRegressor
from tqdm.auto import tqdm, trange

from utils import contiguous_sequences
from plotting import model_surface, plot_surface
from systems import Condenser
from baseline_control import SimpleFeedbackController, FeedbackController

chiller_file_1 = path.join(environ['DATADIR'],
                         'EngineeringScienceBuilding',
                         '2422_ESB_HVAC_1.csv')
chiller_file_2 = path.join(environ['DATADIR'],
                         'EngineeringScienceBuilding',
                         '2841_ESB_HVAC_2.csv')

plot_path = path.join('..', 'docs', 'img')
bin_path = './bin/'

## Controller script demo

In [None]:
%reload_ext autoreload
%autoreload 2
from datetime import datetime, timedelta
import pytz
from controller import make_arguments, get_settings, get_controller, update_controller, get_current_state, put_control_action

In [None]:
parser = make_arguments()
args = parser.parse_args(['-s', './local.ini'])
settings = get_settings(args)
settings['target'] = 'temperature'
settings

In [None]:
ctrl = get_controller(**settings)
update_controller(ctrl, **settings)

In [None]:
end = datetime.now(pytz.utc)
start = end - timedelta(minutes=10)
s = get_current_state(start, end, **settings)
s

In [None]:
ctrl = get_controller(**settings)
actions = []
feedbacks = []
temps = []
s['TempCondIn'] = 62.
s['TempWetBulb'] = 40.
for i in range(45):
    action, = ctrl.predict(s)
    feedbacks.append(ctrl.feedback(s))
    actions.append(action)
    put_control_action(action, **settings)
    temps.append(s['TempCondIn'])
    if i < 10:
        s['TempCondIn'] -= 1.
    elif i < 20:
        s['TempCondIn'] += 1.
    elif i < 30:
        if actions[-1] > actions[-2]:
            s['TempCondIn'] -= 1.
        else:
            s['TempCondIn'] += 1.
    elif i < 40:
        if actions[-1] > actions[-2]:
            s['TempCondIn'] += 1.
        else:
            s['TempCondIn'] -= 1.
    else:
        s['TempCondIn'] -= np.nan

In [None]:
plt.figure(figsize=(12,8))
# plt.imshow(np.zeros((1, 20)), aspect='auto', alpha=0.3)
plt.plot(temps, label='Temp /F')
plt.plot(actions, label='Setpoint /F')
plt.grid(which='both')
for line in (10,20,30):
    plt.axvline(x=line, color='black', ls=':')
plt.axhline(y=s['TempWetBulb'], label='WetBulb /F', color='red', ls='--')
plt.axhline(y=55, label='Action lower bound', color='blue', ls='--')
plt.legend(loc='upper left')

plt.text(2, 47, 'Increasing\n(Unresponsive)')
plt.text(12, 47, 'Decreasing\n(Unresponsive)')
plt.text(22, 47, 'Same direction')
plt.text(32, 47, 'Opposite direction')
plt.title('Controller response to different feedback behaviors')
plt.xlabel('Time')
plt.ylabel('Temperature /F')

plt.twinx()
plt.plot(feedbacks, 'g:', lw=3, label='feedback')
plt.legend(loc='upper right')
plt.ylabel('Feecback /F')

## Environment model

State variables (12):

`'TempCondIn', 'TempCondOut', 'TempEvapOut', 'PowChi', 'PowFanA', 'PowFanB', 'PowConP', 'TempEvapIn', 'TempAmbient', 'TempWetBulb', 'PressDiffEvap', 'PressDiffCond'`

Action variables (1):

`'TempCondInSetpoint'`

Output variables (3):

`'TempCondIn', 'TempCondOut', 'TempEvapOut', 'PowChi', 'PowFanA', 'PowFanB', 'PowConP'`

Model:

`[Action, State] --> [Output]`

In [None]:
# Choosing which chiller to use
chiller_file = chiller_file_2
# Data selection 'all' or 'chiller_on' or 'fan_on'
MODE = 'chiller_on'
# Read pre-processed data:
# Pytorch uses float32 as default type for weights etc,
# so input data points are also read in the same type.
df = pd.read_csv(chiller_file, index_col='time',
                 parse_dates=['time'], dtype=np.float32)
print('Original length: {} Records'.format(len(df)))
# # These fields were not populated until 2020-07-01, so leaving then out of analysis
# df.drop(['PowFanA', 'PowFanB', 'FlowCond', 'PowChiP', 'PerFreqConP', 'PowConP'], axis='columns', inplace=True)
df.drop(['FlowCond', 'PowChiP', 'PerFreqConP'], axis='columns', inplace=True)

df.dropna(inplace=True)
if MODE == 'chiller_on':
    df = df[df['RunChi'] != 0]
if MODE == 'fan_on':
    df = df[(df['RunFanA'] != 0.) | df['RunFanB'] != 0.]
print('Processed length: {} Records'.format(len(df)))

In [None]:
# load model
with open(path.join(bin_path, 'v2_condenser'), 'rb') as f:
    save = pickle.load(f, fix_imports=False)
    est_cond = save['estimator']
    std_out_cond = save['output_norm']
    statevars = save['statevars']
    actionvars = save['actionvars']
    inputs = save['inputs']
    outputs = save['outputs']
    lag = save['lag']

### Condenser Data

In [None]:
df_in = pd.DataFrame(columns=inputs, index=df.index)
df_in['TempCondInSetpoint'] = np.clip(df['TempWetBulb'] - 4, a_min=65, a_max=None)  # approach controller
df_in[inputs[1:]] = df[inputs[1:]]

df_out = pd.DataFrame(columns=outputs, index=df.index)
df_out[outputs] = df[outputs]

idx_list = contiguous_sequences(df.index, pd.Timedelta(5, unit='min'), filter_min=10)

# Create dataframes of contiguous sequences with a delay
# of 1 time unit to indicate causality input -> outputs
dfs_in, dfs_out = [], []
for idx in idx_list:
    dfs_in.append(df_in.loc[idx[:-max(lag) if max(lag) > 0 else None]])
    cols = []
    for l, c in zip(lag, outputs):
        window = slice(l, None if l==max(lag) else -(max(lag)-l))
        series = df_out[c].loc[idx[window]]
        cols.append(series.values)
        if l == min(lag): index = series.index
    dfs_out.append(pd.DataFrame(np.asarray(cols).T, index=index, columns=outputs))

df_in = pd.concat(dfs_in, sort=False)
df_out = pd.concat(dfs_out, sort=False)

print('{:6d} time series'.format(len(dfs_in)))
print('{:6d} total rows'.format(len(df_in)))

## RL Environment

In [None]:
# Make wrapper for cooling tower such that outputs are normalized
# i.e. in physical units instead of being 0 mean and 1 variance.

externalvars = ('TempEvapIn', 'TempAmbient', 'TempWetBulb', 'PressDiffEvap', 'PressDiffCond')
externalvals = [df.loc[:, externalvars] for df in dfs_in]

class InvTransformer:
    
    def __init__(self, estimator, transformer):
        self.estimator = estimator
        self.transformer = transformer
    
    def predict(self, x):
        return self.transformer.inverse_transform(self.estimator.predict(x))
        

esb = Condenser(InvTransformer(est_cond, std_out_cond), externalvals)

In [None]:
# Visualize environment episode
done = False
states = []
power = []
esb.reset()
while not done:
    state, _, done, info = esb.step(esb.action_space.sample())
    states.append(state)
    power.append(info.get('powchi'))
esb.reset()
    
states = np.asarray(states)
power = np.asarray(power)
plt.subplot(2,1,1)
plt.plot(power, label='Total Power')
plt.legend()
plt.subplot(2,1,2)
plt.plot(states[:, 0], label='TempCondIn')
plt.plot(states[:, 1], label='TempCondOut')
plt.plot(states[:, 2], label='TempEvapOut')
plt.plot(states[:, 4], label='TempEvapIn')
plt.plot(states[:, 5], label='TempAmbient')
plt.plot(states[:, 6], label='TempWetBulb')
plt.legend()
plt.show()

## Simple Feedback Control

In [None]:
longest_seq_idx = max(range(len(dfs_in)), key= lambda i: len(dfs_in[i]))

In [None]:
dfs_in[longest_seq_idx]

In [None]:
# seqidx = np.random.randint(len(dfs_in))
seqidx = longest_seq_idx
simulate_hist = True  # Whether to use raw output data, or simulate it through historical actions

# indexing histories after 1st element because simulated trajectories
# are recorded after initial state (> 0), so lengths are equal
act_hist = dfs_in[seqidx].loc[:, ('TempCondInSetpoint')].values[1:, None]
ext = dfs_in[seqidx].loc[:, externalvars]

# Get baseline by running historic actions through environment:
if simulate_hist:
    esb.reset(external=ext, state0=dfs_in[seqidx].iloc[0, 1:].values)
    done = False
    pow_hist_chi, pow_hist_fan, temp_hist = [], [], []
    t = 0
    while not done:
        action = act_hist[t, :1]
        _, _, done, info = esb.step(action)
        # pow_hist_fan.append(info.get('powfans'))
        pow_hist_chi.append(info.get('powchi'))
        temp_hist.append(info.get('tempcondin'))
        t += 1
else:
    pow_hist_chi = dfs_out[seqidx]['PowChi'].values
    pow_hist_fans = dfs_out[seqidx]['PowFans'].values
    temp_hist = dfs_out[seqidx]['TempCondIn'].values

In [None]:
# define agent
class Controller1(SimpleFeedbackController):
    
    def feedback(self, X):
        return -sum(X[3:7])  # PowChi, PowFanA, PowFanB, PowConP
        # return -X[0]
    
    def starting_action(self, X):
        return np.asarray([X[9] + 4]) # TempWetBulb

    def clip_action(self, u, X):
        u = super().clip_action(u, X)
        return np.clip(u, a_min=X[9], a_max=None)

class Controller2(FeedbackController):
    
    def feedback(self, X):
        return -X[3]  # PowChi
    
    def starting_action(self, X):
        return None
        # return X[9] + 4 # TempWetBulb
        


agent_fn = lambda: Controller1(bounds=((60., 80.),), stepsize=1)
# agent_fn = lambda: Controller2(bounds=((55., 90.),), kp=1., ki=0.2, kd=0.)

In [None]:
pfan, pchi, act, rewards, temp = [], [], [], [], []

# run multiple trials over same period for stochastic policy
for trial in trange(1, leave=False):
    state = esb.reset(external=ext, state0=dfs_in[seqidx].iloc[0, 1:].values)
    agent = agent_fn()
    done = False
    pfan.append([])
    pchi.append([])
    act.append([])
    rewards.append([])
    temp.append([])
    while not done:
        action = agent.predict(state)[0]
        state, reward, done, info = esb.step(action)
        act[-1].append(action)
        # pfan[-1].append(info.get('powfans'))
        pchi[-1].append(info.get('powchi'))
        rewards[-1].append(reward)
        temp[-1].append(info.get('tempcondin'))

# get std_dev and mean of metrics
# std_pfan = np.std(pfan, axis=0, keepdims=False)
std_pchi = np.std(pchi, axis=0, keepdims=False)
std_act = np.std(act, axis=0, keepdims=False)
std_rewards = np.std(rewards, axis=0, keepdims=False)
std_temp = np.std(temp, axis=0, keepdims=False)

# pfan = np.mean(pfan, axis=0, keepdims=False)
pchi = np.mean(pchi, axis=0, keepdims=False)
act = np.mean(act, axis=0, keepdims=False)
rewards = np.mean(rewards, axis=0, keepdims=False)
temp = np.mean(temp, axis=0, keepdims=False)

In [None]:
plt.figure(figsize=(8,12))
# plt.subplot(4,1,1)
# plt.title('Fan Power (Average RL {:.0f}W vs Historical {:.0f}W)'\
#           .format(np.mean(pfan), np.mean(pow_hist_fan)))
# plt.plot(pfan, 'b:', label='RL.Fan')
# plt.fill_between(np.arange(len(pfan)), pfan+std_pfan, pfan-std_pfan, color='b', alpha=0.3)
# plt.plot(pow_hist_fan, 'r:', label='Historical.Fan')
# plt.ylim(bottom=0)
# plt.legend()

plt.subplot(3,1,1)
plt.title('Chiller Power (Average {:.0f}kW vs Historical {:.0f}kW)'\
          .format(np.mean(pchi), np.mean(pow_hist_chi)))
plt.plot(pchi, 'b:', label='Chiller')
plt.fill_between(np.arange(len(pchi)), pchi+std_pchi, pchi-std_pchi, color='b', alpha=0.3)
plt.plot(pow_hist_chi, 'r:', label='Historical.Chiller')
plt.ylim(bottom=0)
plt.legend()

plt.subplot(3,1,2)
plt.title('Setpoint Control (Average {:.2f} vs Historical {:.2f})'\
          .format(np.mean(act[:, 0]), np.mean(act_hist[:, 0])))
plt.plot(ext['TempAmbient'].values, 'g.', label='TempAmbient')
plt.plot(ext['TempWetBulb'].values, 'c.', label='TempWetBulb')
plt.plot(act[:, 0], 'b:', label='Setpoint')
plt.fill_between(np.arange(len(act[:, 0])), act[:, 0]+std_act[:, 0], act[:, 0]-std_act[:, 0], color='b', alpha=0.3)
plt.plot(act_hist[:, 0], 'r:', label='Historical.Setpoint')
# plt.ylim(top=1.05)
plt.legend()


plt.subplot(3,1,3)
plt.title('Output Temperature (Average {:.1f}F vs Historical {:.1f}F)'\
          .format(np.mean(temp), np.mean(temp_hist)))
plt.plot(temp, 'b:', label='Temp')
plt.fill_between(np.arange(len(temp)), temp+std_temp, temp-std_temp, color='b', alpha=0.3)
plt.plot(temp_hist, 'r:', label='Historical.Temp')
plt.legend()

plt.tight_layout()

In [None]:
plt.figure(figsize=(8,3))
plt.plot(dfs_in[seqidx]['TempAmbient'].values, label='Ambient Temp')
plt.plot(dfs_in[seqidx]['TempWetBulb'].values, label='WetBulb Temp')
plt.legend()
plt.ylabel('Temperature /F')
plt.title('Environmental Conditions')
plt.tight_layout()
plt.show()