Models for chiller cooling from BDX data, where the setpoints are generated from a simple feedback controller (`src/baseline_control.SimpleFeedbackController`).

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import os
from itertools import zip_longest, product

import notebook_setup
import numpy as np
import pandas as pd
import sklearn
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from tslearn.utils import to_time_series
from tslearn.metrics import dtw
from sklearn import metrics
from sklearn import cluster
from sklearn import manifold
from datetime import datetime, timezone, timedelta
import matplotlib.pyplot as plt
import torch
from torch import nn, optim
import gym
import optuna
from tqdm.autonotebook import tqdm, trange

from bdx import get_trend
from utils import get_credentials
from plotting import model_surface, plot_surface
from commonml import rl, helpers, stats

## Data

* Chiller 1 data (2422) `2021-03-18 1220-5` to `2021-03-26 1010-5`, `2021-05-25 0000-5` to `2021-06-08 0000-5`
* Chiller 2 data (2841) `2021-03-26 1010-5` to `2021-05-16 0610-5`
* Python setpoint data (3481)

In [None]:
cst = timezone(offset=-timedelta(hours=6))
cdt = timezone(offset=-timedelta(hours=5))

datadir = os.path.join(os.environ.get('DATADIR'), 'EngineeringScienceBuilding')
username, password = get_credentials()

### Download and save

In [None]:
# Get data from BDX
ch1 = get_trend('2422', username, password,
               start=datetime(2021,3,18,12,20, tzinfo=cdt),
               end=datetime(2021,3,26,10,10, tzinfo=cdt))
ch1 = ch1.append(get_trend('2422', username, password,
                 start=datetime(2021,5,25,0,0, tzinfo=cdt),
                 end=datetime(2021,6,8,0,0, tzinfo=cdt)))

ch2 = get_trend('2841', username, password,
               start=datetime(2021,3,26,10,10, tzinfo=cdt),
               end=datetime(2021,5,16,6,10, tzinfo=cdt))

stpt = get_trend('3481', username, password,
               start=datetime(2021,3,18,12,20, tzinfo=cdt),
               end=datetime(2021,5,16,6,10, tzinfo=cdt))
stpt = stpt.append(get_trend('3481', username, password,
                   start=datetime(2021,5,25,0,0, tzinfo=cdt),
                   end=datetime(2021,6,8,0,0, tzinfo=cdt)))
stpt = stpt['CDWTPythonSetpt']

In [None]:
# Get naive reference data from BDX
refstart, refend = datetime(2020,3,1, tzinfo=cst), datetime(2021,3,8, tzinfo=cst)
stpt_ref = get_trend('3481', username, password,
               start=refstart, end=refend)

stpt_ref = stpt_ref['JCI Cooling_Tower_Water_Setpoint']

ch1_ref = get_trend('2422', username, password,
                   start=refstart, end=refend)
ch1_ref = ch1_ref.loc[(ch1_ref['RunChi']==True) & (ch1_ref['PowChi'] > 0)]

ch2_ref = get_trend('2841', username, password,
                   start=refstart, end=refend)
ch2_ref = ch2_ref.loc[(ch2_ref['RunChi']==True) & (ch2_ref['PowChi'] > 0)]

In [None]:
# Save data to disk
ch1.to_csv(os.path.join(datadir, '2422_v3_chiller1_model.csv'))
ch2.to_csv(os.path.join(datadir, '2841_v3_chiller2_model.csv'))
stpt.to_csv(os.path.join(datadir, '3481_v3_setpoint_model.csv'))

In [None]:
# Save reference data to disk
ch1_ref.to_csv(os.path.join(datadir, '2422_v3_chiller1_eval.csv'))
ch2_ref.to_csv(os.path.join(datadir, '2841_v3_chiller2_eval.csv'))
stpt_ref.to_csv(os.path.join(datadir, '3481_v3_setpoint_eval.csv'))

### Load from file

In [None]:
# Load data from disk
ch1 = pd.read_csv(os.path.join(datadir, '2422_v3_chiller1_model.csv'), index_col='time', parse_dates=True)
ch2 = pd.read_csv(os.path.join(datadir, '2841_v3_chiller2_model.csv'), index_col='time', parse_dates=True)
stpt = pd.read_csv(os.path.join(datadir, '3481_v3_setpoint_model.csv'), index_col='time', parse_dates=True)

ch1_stpt = stpt.loc[ch1.index]
ch2_stpt = stpt.loc[ch2.index]
ch1['Setpoint'] = ch1_stpt
ch2['Setpoint'] = ch2_stpt

In [None]:
# Load ref data from disk
ch1_ref = pd.read_csv(os.path.join(datadir, '2422_v3_chiller1_eval.csv'), index_col='time', parse_dates=True)
ch2_ref = pd.read_csv(os.path.join(datadir, '2841_v3_chiller2_eval.csv'), index_col='time', parse_dates=True)
stpt_ref = pd.read_csv(os.path.join(datadir, '3481_v3_setpoint_eval.csv'), index_col='time', parse_dates=True)

ch1_ref_stpt = stpt_ref.loc[ch1_ref.index]
ch2_ref_stpt = stpt_ref.loc[ch2_ref.index]
ch1_ref['Setpoint'] = ch1_ref_stpt
ch2_ref['Setpoint'] = ch2_ref_stpt

ch1_ref = ch1_ref[~ch1_ref['Setpoint'].isna()]
ch2_ref = ch2_ref[~ch2_ref['Setpoint'].isna()]

## Models & Environments

Time-independent state space. `[x]-> y` where `[Ambient, Chiller, Setpoint] -> [Next Condenser Water Temp]`

In [None]:
from systems import CoolingTowerEnv

In [None]:
ch1.columns

In [None]:
# Features for the data-model for environment
# State + action variables
colsx = [
    # Ambient
    'TempWetBulb', 'TempAmbient',
    # Machine temperatures
    'TempCondOut',
    # Machine state
    'Tonnage', 'PressDiffCond',
    # Action
    'Setpoint'
]
state_vars = colsx[:-1]
# Variables for cooling tower conditions that are staged
ticker_vars = ['TempWetBulb', 'TempAmbient', 'Tonnage', 'PressDiffCond']
lag = 1
colsy = ['TempCondIn', 'TempCondOut', 'PowFanA']

In [None]:
def get_env_data(df, colsx, colsy, ticker_vars, lag=1, train_split=0.9):
    data = df.loc[:, list(set(colsx+colsy+ticker_vars))]
    data = data.dropna(axis=0, how='any')
    ticker = [day_data for date, day_data in data[ticker_vars].groupby(data.index.date)]
    ticker = [t for t in ticker if len(t)==288] # all samples for a day
    x = data.loc[data.index[:-lag], colsx]
    y = data.loc[data.index[lag:], colsy]

    scaler = MinMaxScaler(feature_range=(-1, 1))
    x = scaler.fit_transform(x)
    y = y.to_numpy().squeeze()

    x_train, x_val, y_train, y_val = train_test_split(x, y, train_size=train_split)
    return x_train, x_val, y_train, y_val, ticker, scaler

def train_model(x, y, **model_params):
    params = dict(hidden_layer_sizes=(32,32,32), learning_rate_init=1e-3, max_iter=500, verbose=True)
    params.update(model_params)
    model = MLPRegressor(**params)
    model.fit(x, y)
    return model

def get_env(model_fn, scaler_fn, ticker, seed=0):
    if isinstance(model_fn, sklearn.base.BaseEstimator):
        model_fn = model_fn.predict
    if isinstance(scaler_fn, sklearn.base.TransformerMixin):
        scaler_fn = scaler_fn.transform
    return CoolingTowerEnv(model_fn, ticker, seed, scaler_fn)

### ESB Chiller 1 Model

In [None]:
x_train1, x_val1, y_train1, y_val1, ticker1, scaler1 = get_env_data(ch1, colsx, colsy, ticker_vars)
model1 = train_model(x_train1, y_train1)

In [None]:
%matplotlib inline
xx = x_val1[:200]
xx_ = scaler1.inverse_transform(xx)
yy = y_val1[:200]
yp = model1.predict(xx)

plt.figure(figsize=(10,5))
plt.subplot(2,1,1)
plt.plot(yp[:,0], label='TempCondIn-pred')
plt.plot(yp[:,1], label='TempCondOut-pred')
plt.plot(xx_[:,0], label='TempWetBulb', ls=':')
plt.plot(xx_[:,2], label='TempCondOut-last', ls=':')
plt.legend()
plt.subplot(2,1,2)
plt.plot(yy[:,2], label='PowFan-pred')

In [None]:
%matplotlib inline
stpts = [55., 60., 65., 70., 75.]
for stpt in stpts:
    env1 = get_env(model1, scaler1, ticker1, 0)
    scaled_stpt = env1.scale_setpoint([stpt])
    rewards = helpers.rewards(env1, lambda x: scaled_stpt)[0]
    plt.plot(rewards, label='{:.0f}, total:{:.0f}'.format(stpt, sum(rewards)))
plt.legend()

TODO:

1. Mutual information between TempCondIn-Pred vs TempCondIn, and TempCondIn vs TempCondIn-Last. Essentially figure out if the model prediction is closer to the actual data, or whether the data between lags is closer to each other).

### ESB Chiller 2 Model

In [None]:
x_train2, x_val2, y_train2, y_val2, ticker2, scaler2 = get_env_data(ch2, colsx, colsy, ticker_vars)
model2 = train_model(x_train2, y_train2)

In [None]:
xx = data.loc[data.index[:2000], colsx]
xx = scaler2.transform(xx)
yy = model2.predict(xx)
plt.figure(figsize=(10,5))
# plt.plot(np.arange(len(yy)), yy[:, 0], label='TempCondIn-Pred')
# plt.plot(np.arange(len(yy)), ch2.loc[ch2.index[1:501], 'TempCondIn'].to_numpy().squeeze(), label='TempCondIn', ls=':')
# plt.plot(np.arange(len(yy)), ch2.loc[ch2.index[:500], 'TempCondIn'].to_numpy().squeeze(), label='TempCondIn-Last', ls=':')
# plt.legend()
# plt.twinx()
plt.plot(np.arange(len(yy)), yy[:, 2], label='PowFanA-Pred')
plt.plot(np.arange(len(yy)), ch2.loc[data.index[1:2001], 'PowFanA'].to_numpy().squeeze(), label='PowFanA', ls=':')
plt.plot(np.arange(len(yy)), ch2.loc[data.index[0:2000], 'PowFanA'].to_numpy().squeeze(), label='PowFanA-Last', ls=':')
plt.legend()

In [None]:
%matplotlib inline
stpts = [55., 60., 65., 70., 75.]
for stpt in stpts:
    env2 = get_env(model2, scaler2, ticker2, 0)
    scaled_stpt = env2.scale_setpoint([stpt])
    rewards = helpers.rewards(env2, lambda x: scaled_stpt)[0]
    plt.plot(rewards, label='{:.0f}, total:{:.0f}'.format(stpt, sum(rewards)))
plt.legend()

### Benchmark Controllers

In [None]:
from controllers.baseline_control import SimpleFeedbackController

class UpDownController(SimpleFeedbackController):
    
    def __init__(self, model_fn, scaler_fn, seed=None):
        super().__init__(bounds=((-1, 1.),), stepsize=0.05, window=1, seed=seed)
        self.model_fn = model_fn
        self.scaler_fn = scaler_fn
    
    def feedback(self, newstate):
        if len(self._actions) == 0:
            return 0.
        state = self._states[-1]
        x = np.concatenate((state, self._actions[-1]))
        x = self.scaler_fn(x.reshape(1, -1))
        x[-1] = self._actions[-1][0] # action is in [-1,1] range already
        temp_cond_in, temp_cond_out, pow_fan = self.model_fn(x)[0]
        reward = ((state[2] - temp_cond_in) / \
                  (state[2] - state[0] + 1e-2)) - \
                 (max(pow_fan, 0) / 10)
        return np.clip(reward, -1, 1)

    def starting_action(self, x):
        return self.random.uniform(*self.bounds[0])

class FixedApproachController:
    
    def __init__(self, approach: float):
        self.approach = approach
    
    def predict(self, state):
        wetbulb = state[0]
        setpoint = np.array((self.approach + wetbulb,))
        return (setpoint - 65) / 10., 1.

class ModelPredictiveController:
    
    def __init__(self, model_fn, scaler_fn, resolution=21):
        self.model_fn = model_fn
        self.scaler_fn = scaler_fn
        self.resolution = resolution
        self.bounds = np.asarray([[-1, 1]])
        self.actions = np.linspace(self.bounds[0][0], self.bounds[0][1], num=resolution, endpoint=True).reshape(-1, 1)

    def predict(self, state):
        x = np.tile(state, (self.resolution, 1))
        x = np.hstack((x, self.actions))
        x = self.scaler_fn(x)
        x[:, -1] = self.actions[:, 0]
        y = self.model_fn(x)
        temp_cond_in, temp_cond_out, pow_fan = y[:,0], y[:,1], y[:,2]
        reward = ((state[2] - temp_cond_in) / \
                  (state[2] - state[0] + 1e-2)) - \
                 (np.fmax(pow_fan, 0) / 10)
        best = np.argmax(reward)
        return self.actions[best], 1.
        

## Transfer

In [None]:
# RL hyperparameter search
def objective(trial: optuna.Trial):
    params = dict(
        policy = rl.ActorCriticBox,
        activation = nn.Tanh,
        state_dim = len(state_vars),
        action_dim = 1,
        n_latent_var = trial.suggest_int('n_latent_var', 16, 128),
        lr = trial.suggest_loguniform('lr', 1e-4, 1e-1),
        gamma = 0.,
        epochs = trial.suggest_int('epochs', 1, 10),
        update_interval = trial.suggest_int('update_interval', 16, 288, log=True)
    )
    
    env = get_env(model2, scaler2, ticker2, 0)
    agent = rl.PPO(env = env, seed=0, **params)
    rewards = agent.learn(timesteps=trial.suggest_int('timesteps', 288*10, 288*30),
                          reward_aggregation='episodic.normalized')
    feedback = np.mean(rewards[-5:])
    return feedback

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=30)

In [None]:
agent_params = dict(
    policy = rl.ActorCriticBox,
    activation = nn.Tanh,
    state_dim = len(state_vars),
    action_dim = 1,
    n_latent_var = 64,
    lr = 3e-4,
    gamma = 0.,
    epochs = 10,
    update_interval = 150,
    truncate=False,
    seed=0
)
timesteps = 288 * 30

In [None]:
def transfer_experiment(agent_from, agent_to, transfer_model=None, timesteps=288*30, **agent_params):
    env_from, env_to = agent_from.env, agent_to.env
    if transfer_model is not None:
        env_model = get_env(transfer_model, env_to.scaler_fn, env_to.ticker_vars, 0)
        agentx = rl.PPO(env=env_model, **agent_params)
        agentx.policy.load_state_dict(agent_from.policy.state_dict())
        agentx.learn(timesteps // 10)
        agentx.env = get_env(transfer_model, env_to.scaler_fn, env_to.ticker_vars, 0)
        rewardsx = agentx.learn(timesteps=timesteps)
    else:
        agentx, rewardsx = None, None
    
    agentx_ = rl.PPO(env=None, **agent_params)
    agentx_.env = get_env(env_to.model_fn, env_to.scaler_fn, env_to.ticker_vars, 0)
    agentx_.policy.load_state_dict(agent_from.policy.state_dict())
    rewardsx_ = agentx_.learn(timesteps=timesteps)
    
    return (agentx, rewardsx), (agentx_, rewardsx_)

In [None]:
import warnings
warnings.simplefilter("error")

### Transfer Across Towers

#### Env 1 Policy

In [None]:
%matplotlib inline
# Training on environment 1
env1 = get_env(model1, scaler1, ticker1, 0)
agent1 = rl.PPO(env = env1, **agent_params)
rewards1 = agent1.learn(timesteps=timesteps)
plt.plot(stats.rolling_mean(rewards1, 1))

In [None]:
%matplotlib inline
env1_ = get_env(model1, scaler1, ticker2[:1], seed=0)
res = helpers.get_from_env(('reward', 'action'), env1_, lambda s: agent1.policy.predict(s)[0])
actions = np.asarray(res['action']).squeeze()
rewards = np.asarray(res['reward']).squeeze()
actions = np.asarray(actions) * 10 + 65
# plt.figure(figsize=(10,5))
s, = plt.plot(actions)
plt.ylabel('setpoint')
plt.ylim(55, 75)
plt.twinx()
r, = plt.plot(rewards, c='r', ls=':')
plt.ylabel('reward')
plt.ylim(-1, 1)
plt.legend((s, r), ('Setpoint', 'Reward'))
plt.title('Tower 1 control')

#### Env 2 Policy

In [None]:
%matplotlib inline
# Training environment 2
env2 = get_env(model2, scaler2, ticker2, 0)
agent2 = rl.PPO(env = env2, **agent_params)
rewards2 = agent2.learn(timesteps=timesteps)
plt.plot(stats.rolling_mean(rewards2, 4))

In [None]:
%matplotlib inline
env2_ = get_env(model2, scaler2, ticker2[:1], seed=0)
res = helpers.get_from_env(('reward', 'action'), env2_, lambda s: agent2.policy.predict(s)[0])
actions = np.asarray(res['action']).squeeze()
rewards = np.asarray(res['reward']).squeeze()
actions = np.asarray(actions) * 10 + 65
# plt.figure(figsize=(10,5))
s, = plt.plot(actions)
plt.ylabel('setpoint')
plt.ylim(55, 75)
plt.twinx()
r, = plt.plot(rewards, c='r', ls=':')
plt.ylabel('reward')
plt.ylim(-1, 1)
plt.legend((s, r), ('Setpoint', 'Reward'))
plt.title('Tower 2 control')

#### Model Transfer

In [None]:
test_point = x_val2[:1]

In [None]:
# Model 1 prediction with model 2 data
vary = (0, 5)
s1 = model_surface(lambda x: model1.predict(x)[:,0], test_point, vary, ((-1,1),(-1,1)), (10,10))
s2 = model_surface(lambda x: model2.predict(x)[:,0], test_point, vary, ((-1,1),(-1,1)), (10,10))

# fitting model1 on model2 data
model12 = helpers.clone(model1, dict(n_iter_=0, t_=0, loss_curve_=[], best_loss_=np.inf))
model12.set_params(warm_start=True, verbose=True)
model12.fit(x_val2, y_val2)
# model12.fit(x_train2, y_train2)

In [None]:
%matplotlib notebook
s12 = model_surface(lambda x: model12.predict(x)[:,0], test_point, vary, ((-1,1),(-1,1)), (10,10))

plot_surface(*s1, fig_kwargs={'figsize':(8,8)}, cmap=plt.cm.coolwarm, hatch='o', alpha=0.6)
ax = plt.gca()
plot_surface(*s2, cmap=plt.cm.coolwarm, ax=ax, hatch='-', alpha=0.6)
plot_surface(*s12, cmap=plt.cm.coolwarm, ax=ax, hatch='.', alpha=0.6)
ax.set_zlim(50, 80)
plt.xlabel(colsx[vary[0]])
plt.ylabel(colsx[vary[1]] if vary[1]<len(colsx) else 'Setpoint')
ax.set_zlabel('Condenser Temperature In')
ax.view_init(10, 60)
plt.subplots_adjust(left=0, right=1, bottom=0, top=1)

#### Policy transfer

In [None]:
# Transfer from env 1 to 2, using model12 as intermediate
env12 = get_env(model12, scaler2, ticker2, 0)
agent12 = rl.PPO(env = env12, **agent_params)
agent12.policy.load_state_dict(agent1.policy.state_dict())
agent12.learn(288 * 3)
agent12.env = get_env(model12, scaler2, ticker2, 0)
rewards12 = agent12.learn(timesteps=288*30)

In [None]:
# Transfer w/o model as intermediate
agent12_ = rl.PPO(env = env2, **agent_params)
agent12_.policy.load_state_dict(agent1.policy.state_dict())
rewards12_ = agent12_.learn(timesteps=288*30)

In [None]:
# transferring across
(agent12, rewards12), (agent12_, rewards12_) = \
    transfer_experiment(agent1, agent2, transfer_model=model12, timesteps=timesteps, **agent_params)

In [None]:
# Learning performance
plt.plot(stats.rolling_mean(rewards1, 4), label='Tower 1: %.2f' % np.mean(rewards1[-5:]), ls=':')
plt.plot(stats.rolling_mean(rewards2, 4), label='Tower 2: %.2f' % np.mean(rewards2[-5:]))
plt.plot(stats.rolling_mean(rewards12, 4), label='Tower 1->model->2: %.2f' % np.mean(rewards12[-5:]))
plt.plot(stats.rolling_mean(rewards12_, 4), label='Tower 1->2: %.2f' % np.mean(rewards12_[-5:]))
plt.ylabel('Rewards')
plt.xlabel('Episodes')
plt.legend()
plt.title('RL Performance')

In [None]:
# Operational performance
results = dict(r2=[], r12=[], r12_=[], rud=[], rfa=[], rmpc=[])
for i in trange(10, leave=False):
    _seed = i
    # rprod1 = helpers.get_from_env(('reward',), get_env(model2, scaler2, ticker2, _seed), lambda x: agent1.predict(x)[0])['reward'][0]
    results['r2'].append(helpers.get_from_env(('reward',), get_env(model2, scaler2, ticker2, _seed), lambda x: agent2.predict(x)[0])['reward'][0])
    results['r12'].append(helpers.get_from_env(('reward',), get_env(model2, scaler2, ticker2, _seed), lambda x: agent12.predict(x)[0])['reward'][0])
    results['r12_'].append(helpers.get_from_env(('reward',), get_env(model2, scaler2, ticker2, _seed), lambda x: agent12_.predict(x)[0])['reward'][0])

    updown = UpDownController(model2.predict, scaler2.transform, 0)
    results['rud'].append(helpers.get_from_env(('reward',), get_env(model2, scaler2, ticker2, _seed), lambda x: updown.predict(x)[0])['reward'][0])
    fixed = FixedApproachController(approach=5)
    results['rfa'].append(helpers.get_from_env(('reward',), get_env(model2, scaler2, ticker2, _seed), lambda x: fixed.predict(x)[0])['reward'][0])
    mpc = ModelPredictiveController(modelb.predict, scalerb.transform, resolution=21)
    results['rmpc'].append(helpers.get_from_env(('reward',), get_env(modeln, scalern, tickern, _seed), lambda x: mpc.predict(x)[0])['reward'][0])

keys = list(results.keys())
resagg12 = {}
for key in keys:
    results[key] = helpers.homogenous_array(results[key])
    resagg12[key+'_mean'] = np.nanmean(results[key], axis=0)
    resagg12[key+'_std'] = np.nanstd(results[key], axis=0)

In [None]:
# plt.plot(stats.rolling_mean(rprod1, 4), label='Tower 1 {:.2f}'.format(sum(rprod1)), ls=':')
r = resagg12
p = plt.plot(stats.rolling_mean(r['r2_mean'], 4), label='Tower 2: {:.2f}'.format(sum(r['r2_mean'])), ls=':')
plt.fill_between(np.arange(len(r['r2_mean'])), r['r2_mean']+r['r2_std'], r['r2_mean']-r['r2_std'], color=p[0].get_color(), alpha=0.3)
p = plt.plot(stats.rolling_mean(r['r12_mean'], 4), label='Tower 1->model->2: {:.2f}'.format(sum(r['r12_mean'])))
plt.fill_between(np.arange(len(r['r12_mean'])), r['r12_mean']+r['r12_std'], r['r12_mean']-r['r12_std'], color=p[0].get_color(), alpha=0.3)
p = plt.plot(stats.rolling_mean(r['r12__mean'], 4), label='Tower 1->2: {:.2f}'.format(sum(r['r12__mean'])))
plt.fill_between(np.arange(len(r['r12__mean'])), r['r12__mean']+r['r12__std'], r['r12__mean']-r['r12__std'], color=p[0].get_color(), alpha=0.3)
p = plt.plot(stats.rolling_mean(r['rud_mean'], 4), label='UpDown: {:.2f}'.format(sum(r['rud_mean'])))
plt.fill_between(np.arange(len(r['rud_mean'])), r['rud_mean']+r['rud_std'], r['rud_mean']-r['rud_std'], color=p[0].get_color(), alpha=0.3)
p = plt.plot(stats.rolling_mean(r['rfa_mean'], 4), label='Fixed: {:.2f}'.format(sum(r['rfa_mean'])))
plt.fill_between(np.arange(len(r['rfa_mean'])), r['rfa_mean']+r['rfa_std'], r['rfa_mean']-r['rfa_std'], color=p[0].get_color(), alpha=0.3)
p = plt.plot(stats.rolling_mean(r['rmpc_mean'], 4), label='MPC: {:.2f}'.format(sum(r['rmpc_mean'])))
plt.fill_between(np.arange(len(r['rmpc_mean'])), r['rmpc_mean']+r['rmpc_std'], r['rmpc_mean']-r['rmpc_std'], color=p[0].get_color(), alpha=0.3)
plt.ylabel('Rewards')
plt.xlabel('Time steps')
plt.legend()
plt.title('Transfer: tower 1 to 2')

### Transfer Inside Tower

In [None]:
# Chiller/tower data to use
ch = ch2
ticker = ticker2

In [None]:
similarity_idx = ['TempWetBulb', 'TempAmbient', 'Tonnage'] # variables for judging env similarity in ticker
similarity_matrix = stats.timeseries.similarity_matrix(ticker, similarity_idx)

In [None]:
clusterer = cluster.SpectralClustering(n_clusters=2, affinity='precomputed')
cluster_labels = clusterer.fit_predict(similarity_matrix)
projecter = manifold.SpectralEmbedding(affinity='precomputed')
coords = projecter.fit_transform(similarity_matrix)

In [None]:
%matplotlib inline
coords0 = coords[cluster_labels==0]
coords1 = coords[cluster_labels==1]
plt.scatter(coords0[:,0], coords0[:,1], label='Cluster A')
plt.scatter(coords1[:,0], coords1[:,1], label='Cluster B')
plt.legend()
plt.title('Episodes clustered by\n independent state variables')

#### Env A Policy

In [None]:
tickera = [t for label, t in zip(cluster_labels, ticker) if label==0]
cha = ch.loc[pd.concat(tickera).index]
x_traina, x_vala, y_traina, y_vala, _, scalera = get_env_data(cha, colsx, colsy, ticker_vars)
modela = train_model(x_traina, y_traina)
enva = get_env(modela, scalera, tickera, 0)

In [None]:
# Training on environment a
agenta = rl.PPO(env=enva, **agent_params)
rewardsa = agenta.learn(timesteps=timesteps)
plt.plot(rewardsa)

#### Env B Policy

In [None]:
tickerb = [t for label, t in zip(cluster_labels, ticker) if label==1]
chb = ch.loc[pd.concat(tickerb).index]
x_trainb, x_valb, y_trainb, y_valb, _, scalerb = get_env_data(chb, colsx, colsy, ticker_vars)
modelb = train_model(x_trainb, y_trainb)
envb = get_env(modelb, scalerb, tickerb, 0)

In [None]:
# Training on environment b
agentb = rl.PPO(env=envb, **agent_params)
rewardsb = agentb.learn(timesteps=timesteps)
plt.plot(rewardsb)

#### Model Transfer

In [None]:
# Model 1 prediction with model 2 data
vary = (0, 5)
sa = model_surface(lambda x: modela.predict(x)[:,0], test_point, vary, ((-1,1),(-1,1)), (10,10))
sb = model_surface(lambda x: modelb.predict(x)[:,0], test_point, vary, ((-1,1),(-1,1)), (10,10))

# fitting model1 on model2 data
modelab = helpers.clone(modela, dict(n_iter_=0, t_=0, loss_curve_=[], best_loss_=np.inf))
modelab.set_params(warm_start=True, verbose=True)
modelab.fit(x_valb, y_valb)
# modelab.fit(x_trainb, y_trainb)

In [None]:
%matplotlib notebook
sab = model_surface(lambda x: modelab.predict(x)[:,0], test_point, vary, ((-1,1),(-1,1)), (10,10))

plot_surface(*sa, fig_kwargs={'figsize':(8,8)}, cmap=plt.cm.coolwarm, hatch='o', alpha=0.6)
ax = plt.gca()
plot_surface(*sb, cmap=plt.cm.coolwarm, ax=ax, hatch='-', alpha=0.6)
plot_surface(*sab, cmap=plt.cm.coolwarm, ax=ax, hatch='.', alpha=0.6)

ax.set_zlim(50, 80)
plt.xlabel(colsx[vary[0]])
plt.ylabel(colsx[vary[1]] if vary[1]<len(colsx) else 'Setpoint')
ax.set_zlabel('Condenser Temperature In')
ax.view_init(10, 60)
plt.subplots_adjust(left=0, right=1, bottom=0, top=1)

#### Policy transfer

In [None]:
# transferring across
(agentab, rewardsab), (agentab_, rewardsab_) = \
    transfer_experiment(agenta, agentb, transfer_model=modelab, timesteps=288*30, **agent_params)

In [None]:
%matplotlib inline
# Learning performance
plt.plot(stats.rolling_mean(rewardsa, 4), label='Tower A: %.2f' % np.mean(rewardsa[-5:]), ls=':')
plt.plot(stats.rolling_mean(rewardsb, 4), label='Tower B: %.2f' % np.mean(rewardsb[-5:]))
plt.plot(stats.rolling_mean(rewardsab, 4), label='Tower 1->model->2: %.2f' % np.mean(rewardsab[-5:]))
plt.plot(stats.rolling_mean(rewardsab_, 4), label='Tower A->B: %.2f' % np.mean(rewardsab_[-5:]))
plt.ylabel('Rewards')
plt.xlabel('Episodes')
plt.legend()
plt.title('RL training performance')

In [None]:
# Operational performance
results = dict(ra=[], rb=[], rab=[], rab_=[], rud=[], rfa=[], rmpc=[])
for i in trange(10, leave=False):
    _seed = i
    results['ra'].append(helpers.get_from_env(('reward',), get_env(modelb, scalerb, tickerb, _seed), lambda x: agenta.predict(x)[0])['reward'][0])
    results['rb'].append(helpers.get_from_env(('reward',), get_env(modelb, scalerb, tickerb, _seed), lambda x: agentb.predict(x)[0])['reward'][0])
    results['rab'].append(helpers.get_from_env(('reward',), get_env(modelb, scalerb, tickerb, _seed), lambda x: agentab.predict(x)[0])['reward'][0])
    results['rab_'].append(helpers.get_from_env(('reward',), get_env(modelb, scalerb, tickerb, _seed), lambda x: agentab_.predict(x)[0])['reward'][0])

    updown = UpDownController(modelb.predict, scalerb.transform, 0)
    results['rud'].append(helpers.get_from_env(('reward',), get_env(modelb, scalerb, tickerb, _seed), lambda x: updown.predict(x)[0])['reward'][0])
    fixed = FixedApproachController(approach=5)
    results['rfa'].append(helpers.get_from_env(('reward',), get_env(modelb, scalerb, tickerb, _seed), lambda x: fixed.predict(x)[0])['reward'][0])
    mpc = ModelPredictiveController(modelb.predict, scalerb.transform, resolution=21)
    results['rmpc'].append(helpers.get_from_env(('reward',), get_env(modelb, scalerb, tickerb, _seed), lambda x: mpc.predict(x)[0])['reward'][0])

keys = list(results.keys())
resaggab = {}
for key in keys:
    results[key] = helpers.homogenous_array(results[key])
    resaggab[key+'_mean'] = np.nanmean(results[key], axis=0)
    resaggab[key+'_std'] = np.nanstd(results[key], axis=0)

In [None]:
%matplotlib inline
r = resaggab
p = plt.plot(stats.rolling_mean(r['rb_mean'], 4), label='Tower B: {:.2f}'.format(sum(r['rb_mean'])), ls=':')
plt.fill_between(np.arange(len(r['rb_mean'])), r['rb_mean']+r['rb_std'], r['rb_mean']-r['rb_std'], color=p[0].get_color(), alpha=0.3)
p = plt.plot(stats.rolling_mean(r['rab_mean'], 4), label='Tower A->model->B: {:.2f}'.format(sum(r['rab_mean'])))
plt.fill_between(np.arange(len(r['rab_mean'])), r['rab_mean']+r['rab_std'], r['rab_mean']-r['rab_std'], color=p[0].get_color(), alpha=0.3)
p = plt.plot(stats.rolling_mean(r['rab__mean'], 4), label='Tower A->B: {:.2f}'.format(sum(r['rab__mean'])))
plt.fill_between(np.arange(len(r['rab__mean'])), r['rab__mean']+r['rab__std'], r['rab__mean']-r['rab__std'], color=p[0].get_color(), alpha=0.3)
p = plt.plot(stats.rolling_mean(r['rud_mean'], 4), label='UpDown: {:.2f}'.format(sum(r['rud_mean'])))
plt.fill_between(np.arange(len(r['rud_mean'])), r['rud_mean']+r['rud_std'], r['rud_mean']-r['rud_std'], color=p[0].get_color(), alpha=0.3)
p = plt.plot(stats.rolling_mean(r['rfa_mean'], 4), label='Fixed: {:.2f}'.format(sum(r['rfa_mean'])))
plt.fill_between(np.arange(len(r['rfa_mean'])), r['rfa_mean']+r['rfa_std'], r['rfa_mean']-r['rfa_std'], color=p[0].get_color(), alpha=0.3)
p = plt.plot(stats.rolling_mean(r['rmpc_mean'], 4), label='MPC: {:.2f}'.format(sum(r['rmpc_mean'])))
plt.fill_between(np.arange(len(r['rmpc_mean'])), r['rmpc_mean']+r['rmpc_std'], r['rmpc_mean']-r['rmpc_std'], color=p[0].get_color(), alpha=0.3)
plt.ylabel('Rewards')
plt.xlabel('Time steps')
plt.legend()
plt.title('Transfer: Cluster A to B')

### Transfer using stale data

In [None]:
# `old` or `o` suffix for old,
# `new` or `n` suffix for new
chold, chnew = ch1_ref, ch1

In [None]:
%matplotlib inline
bins = np.linspace(55, 75, num=11, endpoint=True)
plt.hist(chold['Setpoint'], label='Old control', density=True, bins=bins, color='r', alpha=0.5)
plt.hist(chnew['Setpoint'], label='Up-down control (new)', density=True, bins=bins, color='b', alpha=0.5)
plt.xlabel('Setpoint /F')
plt.ylabel('Relative frequency')
plt.legend()

#### Policy on old env

In [None]:
x_traino, x_valo, y_traino, y_valo, tickero, scalero = get_env_data(chold, colsx, colsy, ticker_vars)
modelo = train_model(x_traino, y_traino)
envo = get_env(modelo, scalero, tickero, 0)

In [None]:
%matplotlib inline
# Training on environment old
agento = rl.PPO(env=envo, **agent_params)
rewardso = agento.learn(timesteps=timesteps)
plt.plot(rewardso)

#### Policy on new env

In [None]:
x_trainn, x_valn, y_trainn, y_valn, tickern, scalern = get_env_data(chnew, colsx, colsy, ticker_vars)
modeln = train_model(x_trainn, y_trainn)
envn = get_env(modeln, scalern, tickern, 0)

In [None]:
%matplotlib inline
# Training on environment new
agentn = rl.PPO(env=envn, **agent_params)
rewardsn = agentn.learn(timesteps=timesteps)
plt.plot(rewardsn)

#### Model Transfer

In [None]:
# Model 1 prediction with model 2 data
vary = (0, 5)
so = model_surface(lambda x: modelo.predict(x)[:,0], test_point, vary, ((-1,1),(-1,1)), (10,10))
sn = model_surface(lambda x: modeln.predict(x)[:,0], test_point, vary, ((-1,1),(-1,1)), (10,10))

# fitting modelo on modeln data
modelon = helpers.clone(modelo, dict(n_iter_=0, t_=0, loss_curve_=[], best_loss_=np.inf))
modelon.set_params(warm_start=True, verbose=True)
modelon.fit(x_valn, y_valn)
# modelon.fit(x_trainn, y_trainn)

In [None]:
%matplotlib notebook
son = model_surface(lambda x: modelon.predict(x)[:,0], test_point, vary, ((-1,1),(-1,1)), (10,10))

plot_surface(*so, fig_kwargs={'figsize':(8,8)}, cmap=plt.cm.coolwarm, hatch='o', alpha=0.6)
ax = plt.gca()
plot_surface(*sn, cmap=plt.cm.coolwarm, ax=ax, hatch='-', alpha=0.6)
plot_surface(*son, cmap=plt.cm.coolwarm, ax=ax, hatch='.', alpha=0.6)

plt.xlabel(colsx[vary[0]])
plt.ylabel(colsx[vary[1]] if vary[1]<len(colsx) else 'Setpoint')
ax.set_zlim(50, 80)
ax.set_zlabel('Condenser Temperature In')
ax.view_init(10, 60)
plt.subplots_adjust(left=0, right=1, bottom=0, top=1)

#### Policy transfer

In [None]:
# transferring across
(agenton, rewardson), (agenton_, rewardson_) = \
    transfer_experiment(agento, agentn, transfer_model=None, timesteps=timesteps, **agent_params)

In [None]:
%matplotlib inline
# Learning performance
plt.plot(stats.rolling_mean(rewardso, 4), label='Old data: %.2f' % np.mean(rewardso[-5:]), ls=':')
plt.plot(stats.rolling_mean(rewardsn, 4), label='New data: %.2f' % np.mean(rewardsn[-5:]))
# plt.plot(stats.rolling_mean(rewardson, 4), label='Old data->model->New data: %.2f' % np.mean(rewardson[-5:]))
plt.plot(stats.rolling_mean(rewardson_, 4), label='Old data->New data: %.2f' % np.mean(rewardson_[-5:]))
plt.ylabel('Rewards')
plt.xlabel('Episodes')
plt.legend()
plt.title('RL Performance')

In [None]:
# Operational performance
results = dict(ro=[], rn=[], ron=[], ron_=[], rud=[], rfa=[], rmpc=[])
for i in trange(10, leave=False):
    _seed = i
    results['ro'].append(helpers.get_from_env(('reward',), get_env(modeln, scalern, tickern, _seed), lambda x: agento.predict(x)[0])['reward'][0])
    results['rn'].append(helpers.get_from_env(('reward',), get_env(modeln, scalern, tickern, _seed), lambda x: agentn.predict(x)[0])['reward'][0])
#     results['ron'].append(helpers.get_from_env(('reward',), get_env(modeln, scalern, tickern, _seed), lambda x: agenton.predict(x)[0])['reward'][0])
    results['ron_'].append(helpers.get_from_env(('reward',), get_env(modeln, scalern, tickern, _seed), lambda x: agenton_.predict(x)[0])['reward'][0])

    updown = UpDownController(modeln.predict, scalern.transform, 0)
    results['rud'].append(helpers.get_from_env(('reward',), get_env(modeln, scalern, tickern, _seed), lambda x: updown.predict(x)[0])['reward'][0])
    fixed = FixedApproachController(approach=5)
    results['rfa'].append(helpers.get_from_env(('reward',), get_env(modeln, scalern, tickern, _seed), lambda x: fixed.predict(x)[0])['reward'][0])
    mpc = ModelPredictiveController(modelb.predict, scalerb.transform, resolution=21)
    results['rmpc'].append(helpers.get_from_env(('reward',), get_env(modeln, scalern, tickern, _seed), lambda x: mpc.predict(x)[0])['reward'][0])

keys = list(results.keys())
resaggon = {}
for key in keys:
    if len(results[key])==0: continue
    results[key] = helpers.homogenous_array(results[key])
    resaggon[key+'_mean'] = np.nanmean(results[key], axis=0)
    resaggon[key+'_std'] = np.nanstd(results[key], axis=0)

In [None]:
%matplotlib inline
r = resaggon
p = plt.plot(stats.rolling_mean(r['rn_mean'], 4), label='Diverse: {:.2f}'.format(sum(r['rn_mean'])))
plt.fill_between(np.arange(len(r['rn_mean'])), r['rn_mean']+r['rn_std'], r['rn_mean']-r['rn_std'], color=p[0].get_color(), alpha=0.3)
# p = plt.plot(stats.rolling_mean(r['ron_mean'], 4), label='Tower A->model->B: {:.2f}'.format(sum(r['ron_mean'])))
# plt.fill_between(np.arange(len(r['ron_mean'])), r['ron_mean']+r['ron_std'], r['ron_mean']-r['ron_std'], color=p[0].get_color(), alpha=0.3)
p = plt.plot(stats.rolling_mean(r['ron__mean'], 4), label='Sparse->Diverse: {:.2f}'.format(sum(r['ron__mean'])))
plt.fill_between(np.arange(len(r['ron__mean'])), r['ron__mean']+r['ron__std'], r['ron__mean']-r['ron__std'], color=p[0].get_color(), alpha=0.3)
p = plt.plot(stats.rolling_mean(r['rud_mean'], 4), label='UpDown: {:.2f}'.format(sum(r['rud_mean'])))
plt.fill_between(np.arange(len(r['rud_mean'])), r['rud_mean']+r['rud_std'], r['rud_mean']-r['rud_std'], color=p[0].get_color(), alpha=0.3)
p = plt.plot(stats.rolling_mean(r['rfa_mean'], 4), label='Fixed: {:.2f}'.format(sum(r['rfa_mean'])))
plt.fill_between(np.arange(len(r['rfa_mean'])), r['rfa_mean']+r['rfa_std'], r['rfa_mean']-r['rfa_std'], color=p[0].get_color(), alpha=0.3)
p = plt.plot(stats.rolling_mean(r['rmpc_mean'], 4), label='MPC: {:.2f}'.format(sum(r['rmpc_mean'])))
plt.fill_between(np.arange(len(r['rmpc_mean'])), r['rmpc_mean']+r['rmpc_std'], r['rmpc_mean']-r['rmpc_std'], color=p[0].get_color(), alpha=0.3)
plt.ylabel('Rewards')
plt.xlabel('Time steps')
plt.legend()
plt.title('Transfer: sparse to diverse data')