In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import os
from itertools import zip_longest, product
from collections import namedtuple

import notebook_setup
import numpy as np
import pandas as pd
import sklearn
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from tslearn.utils import to_time_series
from tslearn.metrics import dtw
from sklearn import metrics
from sklearn import cluster
from sklearn import manifold
from datetime import datetime, timezone, timedelta
import matplotlib
import matplotlib.pyplot as plt
import torch
from torch import nn, optim
import gym
import optuna
from tqdm.autonotebook import tqdm, trange

from bdx import get_trend
from utils import get_credentials
from commonml import rl, helpers, stats
from systems import CoolingTowerEnv, CoolingTowerIOEnv
from plotting import model_surface, plot_surface

## Data

* ESB Chiller 1 data (2422) `2021-03-18 1220-5` to `2021-03-26 1010-5`, `2021-05-25 0000-5` to `2021-06-08 0000-5`
* ESB Chiller 2 data (2841) `2021-03-26 1010-5` to `2021-05-16 0610-5`
* ESB Python setpoint data (3481)
* Kissam data (2661) `2021-06-03 0000-5` to `2021-08-31`

In [None]:
cst = timezone(offset=-timedelta(hours=6))
cdt = timezone(offset=-timedelta(hours=5))

datadir = os.path.join(os.environ.get('DATADIR'), 'EngineeringScienceBuilding')
username, password = get_credentials()

### Download and save

In [None]:
# Get data from BDX for chiller systems
# ESB Cooling tower 1
ch1 = get_trend('2422', username, password,
               start=datetime(2021,8,1, tzinfo=cdt),
               end=datetime(2021,11,1, tzinfo=cdt))
# ch1 = ch1.append(get_trend('2422', username, password,
#                  start=datetime(2021,5,25,0,0, tzinfo=cdt),
#                  end=datetime(2021,6,8,0,0, tzinfo=cdt)))

# ESB Cooling tower 2
ch2 = get_trend('2841', username, password,
               start=datetime(2021,3,26,10,10, tzinfo=cdt),
               end=datetime(2021,5,16,6,10, tzinfo=cdt))

# ESB setpoint
stpt = get_trend('3481', username, password,
               start=datetime(2021,8,1, tzinfo=cdt),
               end=datetime(2021,11,1, tzinfo=cdt))
# stpt = stpt.append(get_trend('3481', username, password,
#                    start=datetime(2021,5,25,0,0, tzinfo=cdt),
#                    end=datetime(2021,6,8,0,0, tzinfo=cdt)))
stpt = stpt['CDWTPythonSetpt']

# Merge setpoint w/ data
ch1['Setpoint'] = stpt.loc[ch1.index]
ch2['Setpoint'] = stpt.loc[ch2.index]

ch1.to_csv(os.path.join(datadir, '2422_RLEM_esb_chiller1_model.csv'))
# ch2.to_csv(os.path.join(datadir, '2841_RLEM_esb_chiller2_model.csv'))

In [None]:
# Kissam
chk = get_trend('2661', username, password,
               start=datetime(2021,6,3,0,0, tzinfo=cdt),
               end=datetime(2021,8,31,0,0, tzinfo=cdt))
renames = {'EffectiveTempCondInSetpoint': 'Setpoint'}
drops = ['TempCondInSetpoint']
df = chk.copy()

for column in chk.columns:
    if column.startswith('CT_1.'):
        fieldname = column[5:]
        chk[column] = (chk[column] + chk['CT_2.' + fieldname]) / 2
        renames[column] = fieldname
        drops.append('CT_2.' + fieldname)
    elif column.startswith('CH_1.'):
        fieldname = column[5:]
        chk[column] = (chk[column] + chk['CH_2.' + fieldname]) / 2
        renames[column] = fieldname
        drops.append('CH_2.' + fieldname)
chk.rename(columns=renames, inplace=True)
chk.drop(columns=drops, inplace=True)

chk.to_csv(os.path.join(datadir, '2661_RLEM_kissam_chiller_model.csv'))

In [None]:
# Get naive reference data from BDX
refstart, refend = datetime(2020,3,1, tzinfo=cst), datetime(2021,3,8, tzinfo=cst)
stpt_ref = get_trend('3481', username, password,
               start=refstart, end=refend)

stpt_ref = stpt_ref['JCI Cooling_Tower_Water_Setpoint']

ch1_ref = get_trend('2422', username, password,
                   start=refstart, end=refend)
ch1_ref = ch1_ref.loc[(ch1_ref['RunChi']==True) & (ch1_ref['PowChi'] > 0)]

ch2_ref = get_trend('2841', username, password,
                   start=refstart, end=refend)
ch2_ref = ch2_ref.loc[(ch2_ref['RunChi']==True) & (ch2_ref['PowChi'] > 0)]

ch1_ref_stpt = stpt_ref.loc[ch1_ref.index]
ch2_ref_stpt = stpt_ref.loc[ch2_ref.index]
ch1_ref['Setpoint'] = ch1_ref_stpt
ch2_ref['Setpoint'] = ch2_ref_stpt

ch1_ref = ch1_ref[~ch1_ref['Setpoint'].isna()]
ch2_ref = ch2_ref[~ch2_ref['Setpoint'].isna()]

ch1_ref.to_csv(os.path.join(datadir, '2422_RLEM_esb_chiller1_eval.csv'))
ch2_ref.to_csv(os.path.join(datadir, '2841_RLEM_esb_chiller2_eval.csv'))

In [None]:
# Save data to disk
ch1.to_csv(os.path.join(datadir, '2422_RLEM_esb_chiller1_model.csv'))
ch2.to_csv(os.path.join(datadir, '2841_RLEM_esb_chiller2_model.csv'))
chk.to_csv(os.path.join(datadir, '2661_RLEM_kissam_chiller_model.csv'))

In [None]:
# Save reference data to disk
ch1_ref.to_csv(os.path.join(datadir, '2422_RLEM_esb_chiller1_eval.csv'))
ch2_ref.to_csv(os.path.join(datadir, '2841_RLEM_esb_chiller2_eval.csv'))

### Load from file

In [None]:
# Load data from disk
ch1 = pd.read_csv(os.path.join(datadir, '2422_RLEM_esb_chiller1_model.csv'), index_col='time', parse_dates=True)
ch2 = pd.read_csv(os.path.join(datadir, '2841_RLEM_esb_chiller2_model.csv'), index_col='time', parse_dates=True)
chk = pd.read_csv(os.path.join(datadir, '2661_RLEM_kissam_chiller_model.csv'), index_col='time', parse_dates=True)

In [None]:
# Load ref data from disk
ch1_ref = pd.read_csv(os.path.join(datadir, '2422_RLEM_esb_chiller1_eval.csv'), index_col='time', parse_dates=True)
ch2_ref = pd.read_csv(os.path.join(datadir, '2841_RLEM_esb_chiller2_eval.csv'), index_col='time', parse_dates=True)

## Models & Environments

Time-independent state space. `[x]-> y` where `[Ambient, Chiller, Setpoint] -> [Next Condenser Water Temp]`

In [None]:
DataConfig = namedtuple('DataConfig', ['colsx', 'colsy', 'ticker_vars', 'lag'])

In [None]:
# Features for the data-model for environment
# State + action variables
cfg1 = cfg2 = DataConfig(
    colsx = [
        # Ambient
        'TempWetBulb', 'TempAmbient',
        # Machine temperatures
        'TempCondIn', 'TempCondOut',
        # Machine state
        'Tonnage', 'PressDiffCond',
        # Action
        'Setpoint'
    ],
    # Variables for cooling tower conditions that are staged
    ticker_vars = ['TempWetBulb', 'TempAmbient', 'Tonnage', 'PressDiffCond'],
    lag = 1,
    colsy = ['TempCondIn', 'TempCondOut', 'PowFanA'],
)

In [None]:
def get_env_data(
    df, colsx, colsy, ticker_vars,
    lag=1, train_split=0.9, group_days=False, shuffle=False, seed=0
):
    random = np.random.RandomState(seed=seed)
    data = df.loc[:, list(set(colsx+colsy+ticker_vars))]
    index = data.dropna(axis=0, how='any').index
    data = data.loc[index]
    assert not pd.isnull(data).any(1).any(), 'Null found'
    ticker = [day_data for date, day_data in data[ticker_vars].groupby(data.index.date)]
    ticker = [t for t in ticker if len(t)==288] # all samples for a day

    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaler.fit(data.loc[data.index[:-lag], colsx].values)
    if not group_days:
        x = scaler.transform(data.loc[data.index[:-lag], colsx].values)
        y = data.loc[data.index[lag:], colsy].to_numpy().squeeze()
        if train_split == 1.:
            x_train, x_val, y_train, y_val = x, [], y, []
        else:
            x_train, x_val, y_train, y_val = train_test_split(x, y, train_size=train_split, shuffle=shuffle)
    else:
        x, y = [], []
        for date, day_data in data.groupby(data.index.date):
            x_, y_ = day_data[colsx], day_data[colsy]
            x_ = scaler.transform(x_.values)
            x.append(x_[:-lag])
            y.append(y_[lag:])
        n_train = int(train_split * len(x))
        indices = np.arange(len(x), dtype=int)
        if shuffle:
            train_idx = random.choice(indices, replace=False, size=n_train)
        else:
            train_idx = indices[:n_train]
        val_idx = [i for i in indices if i not in train_idx]
        x_train, y_train = [x[i] for i in train_idx], [y[i] for i in train_idx]
        x_val, y_val = [x[i] for i in val_idx], [y[i] for i in val_idx]
    
    return x_train, x_val, y_train, y_val, ticker, scaler

def train_model(x, y, verbose=False, **model_params):
    params = dict(hidden_layer_sizes=(48,48,32), learning_rate_init=1e-4, max_iter=500, verbose=verbose)
    params.update(model_params)
    model = MLPRegressor(**params)
    model.fit(x, y)
    return model

def get_env(model_fn, scaler_fn, ticker, seed=0):
    if isinstance(model_fn, sklearn.base.BaseEstimator):
        model_fn = model_fn.predict
    if isinstance(scaler_fn, sklearn.base.TransformerMixin):
        scaler_fn = scaler_fn.transform
    return CoolingTowerEnv(model_fn, ticker, seed, scaler_fn)

### ESB Cooling Tower 1 Model

In [None]:
x_train1, x_val1, y_train1, y_val1, ticker1, scaler1 = get_env_data(ch1, cfg1.colsx, cfg1.colsy, cfg1.ticker_vars)
model1 = train_model(x_train1, y_train1, verbose=True)

In [None]:
%matplotlib inline
xx = x_val1[:200]
xx_ = scaler1.inverse_transform(xx)
yy = y_val1[:200]
yp = model1.predict(xx)

plt.figure(figsize=(10,5))
plt.subplot(2,1,1)
plt.plot(yp[:,0], label='TempCondIn-pred')
plt.plot(yp[:,1], label='TempCondOut-pred')
plt.plot(xx_[:,0], label='TempWetBulb', ls=':')
plt.plot(xx_[:,2], label='TempCondOut-last', ls=':')
plt.legend()
plt.subplot(2,1,2)
plt.plot(yy[:,2], label='PowFan-pred')

In [None]:
%matplotlib inline
stpts = [55., 60., 65., 70., 75.]
for stpt in stpts:
    env1 = get_env(model1, scaler1, ticker1, 0)
    scaled_stpt = env1.scale_setpoint([stpt])
    rewards = helpers.rewards(env1, lambda x: scaled_stpt)[0]
    plt.plot(rewards, label='{:.0f}, total:{:.0f}'.format(stpt, sum(rewards)))
plt.legend()

### ESB Cooling Tower 2 Model

In [None]:
x_train2, x_val2, y_train2, y_val2, ticker2, scaler2 = get_env_data(ch2, cfg2.colsx, cfg2.colsy, cfg2.ticker_vars)
model2 = train_model(x_train2, y_train2)

In [None]:
xx = data.loc[data.index[:2000], colsx]
xx = scaler2.transform(xx)
yy = model2.predict(xx)
plt.figure(figsize=(10,5))
# plt.plot(np.arange(len(yy)), yy[:, 0], label='TempCondIn-Pred')
# plt.plot(np.arange(len(yy)), ch2.loc[ch2.index[1:501], 'TempCondIn'].to_numpy().squeeze(), label='TempCondIn', ls=':')
# plt.plot(np.arange(len(yy)), ch2.loc[ch2.index[:500], 'TempCondIn'].to_numpy().squeeze(), label='TempCondIn-Last', ls=':')
# plt.legend()
# plt.twinx()
plt.plot(np.arange(len(yy)), yy[:, 2], label='PowFanA-Pred')
plt.plot(np.arange(len(yy)), ch2.loc[data.index[1:2001], 'PowFanA'].to_numpy().squeeze(), label='PowFanA', ls=':')
plt.plot(np.arange(len(yy)), ch2.loc[data.index[0:2000], 'PowFanA'].to_numpy().squeeze(), label='PowFanA-Last', ls=':')
plt.legend()

In [None]:
%matplotlib inline
stpts = [55., 60., 65., 70., 75.]
for stpt in stpts:
    env2 = get_env(model2, scaler2, ticker2, 0)
    scaled_stpt = env2.scale_setpoint([stpt])
    rewards = helpers.rewards(env2, lambda x: scaled_stpt)[0]
    plt.plot(rewards, label='{:.0f}, total:{:.0f}'.format(stpt, sum(rewards)))
plt.legend()

### Kissam Combined Cooling Tower Model

In [None]:
# Kissam condenser differential pressure sensor is not set, so using
# condenser motor frequency
cfgk = DataConfig(
    colsx = [
        # Ambient
        'TempWetBulb', 'TempAmbient',
        # Machine temperatures
        'TempCondOut',
        # Machine state
        'Tonnage', 'PerFreqConP',
        # Action
        'Setpoint'
    ],
    colsy = ['TempCondIn', 'TempCondOut', 'PowFan'],
    ticker_vars = ['TempWetBulb', 'TempAmbient', 'Tonnage', 'PerFreqConP'],
    lag = 1
)

In [None]:
x_traink, x_valk, y_traink, y_valk, tickerk, scalerk = get_env_data(chk, cfgk.colsx, cfgk.colsy, cfgk.ticker_vars)
modelk = train_model(x_traink, y_traink)

In [None]:
xx = chk.loc[chk.index[:288], colsxk]
xx = scalerk.transform(xx)
yy = modelk.predict(xx)
plt.figure(figsize=(10,5))
plt.plot(np.arange(len(yy)), yy[:, 0], label='TempCondIn-Pred')
plt.plot(np.arange(len(yy)), chk.loc[chk.index[1:289], 'TempCondIn'].to_numpy().squeeze(), label='TempCondIn', ls=':')
plt.plot(np.arange(len(yy)), chk.loc[chk.index[:288], 'TempCondIn'].to_numpy().squeeze(), label='TempCondIn-Last', ls=':')
plt.legend()
plt.twinx()
plt.plot(np.arange(len(yy)), yy[:, 2], label='PowFan-Pred')
plt.plot(np.arange(len(yy)), chk.loc[chk.index[1:289], 'PowFan'].to_numpy().squeeze(), label='PowFan', ls=':')
plt.plot(np.arange(len(yy)), chk.loc[chk.index[0:288], 'PowFan'].to_numpy().squeeze(), label='PowFan-Last', ls=':')
plt.legend()

In [None]:
%matplotlib inline
stpts = [55., 60., 65., 70., 75.]
for stpt in stpts:
    envk = get_env(modelk, scalerk, tickerk, 0)
    scaled_stpt = envk.scale_setpoint([stpt])
    rewards = helpers.rewards(envk, lambda x: scaled_stpt)[0]
    plt.plot(rewards, label='{:.0f}, total:{:.0f}'.format(stpt, sum(rewards)))
plt.legend()

## System Identification

In [None]:
xt, xv, yt, yv, ticker, scaler = get_env_data(chk, colsxk, colsyk, ticker_varsk, group_days=True, shuffle=False)

In [None]:
import sippy, control
from sippy.functionsetSIM import SS_lsim_innovation_form

In [None]:
xt_, yt_ = np.vstack(xt), np.vstack(yt)
xv_, yv_ = np.vstack(xv), np.vstack(yv)

nx , ny, L = xt_.shape[1], yt_.shape[1], len(xt_)
t = np.arange(len(xt_))
system  = sippy.system_identification(yt_, xt_, 'FIR', tsample=1)

In [None]:
y_ = np.zeros_like(yv_.T)
t__ = np.arange(2)
x0 = np.zeros((3, 1))
for i in range(2, len(xv_)):
    m = control.forced_response(system.G, t__, xv_[i-2:i].T, X0=x0[:, -1], return_x=True)
    t_, y_[:, i-2:i], x0 = m
y_ = y_.T

In [None]:
plt.plot(y_[:,1], label='TempCondIn-Pred')
plt.plot(yv_[:,1], label='TempCondIn')
plt.legend()
plt.ylim(50,100)

In [None]:
plt.plot(y_[:288,2], label='PowFan-Pred')
plt.plot(yv_[:288,2], label='PowFan')
plt.legend()
# plt.ylim(50,100)

In [None]:
env = CoolingTowerIOEnv(system.G, tickerk, seed=0, scaler_fn=scalerk.transform)

In [None]:
done = False
states = []
env.reset()
i = 0
while not done:
    i += 1
    if i == 20: break
    state, reward, done, _ = env.step(env.action_space.sample())
    states.append(state)
states = np.asarray(states)
plt.plot(states[:, 2], label='TempCondOut')

In [None]:
tickerk[0]

In [None]:
env.state

## Transfer

### Test data/environments

In [None]:
train_split = 0.5
test_split = 1 - train_split
train_sizes = np.linspace(0.25, 1.0, num=4, endpoint=True)
group_by_day = lambda dframe: [day_data for date, day_data in dframe.groupby(dframe.index.date) if len(day_data)==288]

name_df_cfg = ('CT1', 'CT2', 'K'), (ch1, ch2, chk), (cfg1, cfg2, cfgk)
reward_baseline = -288  # baseline for episidic rewards

In [None]:
def df_train_test_split(df, train_split, shuffle=False, seed=0):
    ntrain = int(train_split * len(df))
    indices = df.index.to_list()
    random = np.random.RandomState(seed=seed)
    if shuffle: random.shuffle(indices)
    df_train = df.loc[indices[:ntrain]]
    df_test = df.loc[indices[ntrain:]]
    return df_train, df_test

def transfer_experiment(
    train, test, agent_params, cfg, seed=0, timesteps=288*30, no_train=False
):
    agent = rl.PPO(None, **agent_params)
    # Training environment
    rewards_train = None
    if not no_train:
        if isinstance(train, pd.DataFrame):
            x, _, y, _, ticker, scaler = get_env_data(
                train, cfg.colsx, cfg.colsy, cfg.ticker_vars, train_split=1.
            )
            model = train_model(x, y)
            env = get_env(model, scaler, ticker, seed=seed)
            env.reset()
        elif isinstance(train, gym.Env):
            env = train
        agent.env = env
        rewards_train = agent.learn(timesteps=timesteps) if not no_train else None
    # policy = helpers.clone(agent.policy.state_dict())
    # Testing environment
    if isinstance(test, pd.DataFrame):
        x, _, y, _, ticker, scaler = get_env_data(
            test, cfg.colsx, cfg.colsy, cfg.ticker_vars, train_split=1.
        )
        model = train_model(x, y)
        env = get_env(model, scaler, ticker, seed=seed)
    elif isinstance(test, gym.Env):
        env = test
    agent.env = env
    rewards_test = agent.learn(timesteps=timesteps * 1.5)
    
    return rewards_train, rewards_test


def agg_transfer_experiments(dfs, agent_params):
    R = {}  # name -> {training set fraction -> (training reward, testing rewards)}
    for name, (df_train, env_test, cfg) in tqdm(dfs.items(), total=len(dfs), desc='Tower'):
        res = {}
        for fraction in tqdm(train_sizes, leave=False, desc='Fraction'):
            df_fraction, _ = df_train_test_split(df_train, fraction)
            r_train, r_test = transfer_experiment(df_fraction, env_test, agent_params,
                                                  cfg, timesteps=5000)
            res[fraction] = (r_train, r_test)

        _, r_notrain = transfer_experiment(df_fraction, env_test, agent_params,
                                           cfg, timesteps=5000, no_train=True)
        res[0.] = (np.ones(len(r_train)) * np.nan, r_notrain)
        R[name] = res
    return R

### Hyperparameters

In [None]:
# RL hyperparameter search
def objective(trial: optuna.Trial):
    params = dict(
        policy = rl.ActorCriticBox,
        activation = nn.Tanh,
        state_dim = len(state_vars),
        action_dim = 1,
        n_latent_var = trial.suggest_int('n_latent_var', 16, 128),
        lr = trial.suggest_loguniform('lr', 1e-4, 1e-1),
        gamma = 0.,
        epochs = trial.suggest_int('epochs', 1, 10),
        update_interval = trial.suggest_int('update_interval', 16, 288, log=True)
    )
    
    env = get_env(modelk, scalerk, tickerk, 0)
    agent = rl.PPO(env = env, seed=0, **params)
    rewards = agent.learn(timesteps=trial.suggest_int('timesteps', 288*10, 288*30),
                          reward_aggregation='episodic.normalized')
    feedback = np.mean(rewards[-5:])
    return feedback

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=30)

In [None]:
agent_params = dict(
    policy = rl.ActorCriticBox,
    activation = nn.Tanh,
    state_dim = 6,
    action_dim = 1,
    n_latent_var = 48,
    lr = 1e-3,
    gamma = 0.9,
    epochs = 5,
    update_interval = 100,
    truncate=False,
    seed=0
)
timesteps = 288 * 30

### RL Trial

In [None]:
x, _, y, _, ticker, scaler = get_env_data(
    ch1, cfg1.colsx, cfg1.colsy, cfg1.ticker_vars,
    train_split=1., shuffle=False
)
model = train_model(x, y, verbose=False)

In [None]:
env = get_env(model, scaler, ticker, seed=0)
env.reset()
agent = rl.PPO(env, **agent_params)

In [None]:
r = agent.learn(timesteps * 5)

In [None]:
plt.plot(r)

In [None]:
from controllers.esb import Controller as FeedbackController

bounds = (58, 75)
fc = FeedbackController((bounds,), 1, 7, 0.5, 'temperature')

class RLController:
    
    def __init__(self, agent, state_vars, bounds):
        self.agent, self.state_vars = agent, state_vars
        self.bounds = np.asarray(bounds)
    def predict(self, X: pd.DataFrame):
        X_rl = X[self.state_vars].to_numpy(np.float32).reshape(1, -1)
        action, logprob = self.agent.predict(X_rl)
        action_scaled = self.bounds[0].mean() + action * 0.5 *np.diff(self.bounds[0])[0]
        return action_scaled[0],
rc = RLController(agent, cfg1.colsx[:-1], (bounds,))

factions = []
ractions = []
indices = []
feedbacks = []
for idx, row in ch1.loc['2021-09-20':'2021-10-10'].iterrows():
    if pd.isnull(row[cfg1.colsx]).any(): continue
    factions.append(fc.predict(row)[0][0])
    feedbacks.append(fc._feedbacks[-1])
#     ractions.append(rc.predict(row)[0][0])
    indices.append(idx)

%matplotlib notebook
plt.figure(figsize=(15,8))
plt.plot(indices, factions, label='F.Setpoint', ls='-')
plt.plot(indices, ch1.Setpoint.loc[indices], label='Actual', c='r', ls=':')
# plt.plot(indices, ractions, label='RL', ls=':')
plt.plot(indices, ch1.TempWetBulb.loc[indices], label='TempWetBulb')
plt.plot(indices, ch1.TempCondIn.loc[indices], label='TempCondIn')
plt.ylim(55, 85)
plt.legend()
plt.ylabel('temp')
plt.grid('both')
plt.twinx()
plt.plot(indices, feedbacks, ls='-.', c='k', lw='0.5')
plt.ylabel('feedback')
plt.title('Re-run on measured data')

In [None]:
%matplotlib notebook
from controllers.esb import Controller as BaseFBController

class FBController(BaseFBController):

    def feedback(self, X):
        f = -(X[2] - X[0])
        if np.isnan(f):
            raise ValueError('TempCondIn is NaN. Could not calculate feedback.')
        return f

    def starting_action(self, X):
        return np.asarray([X[0] + self.random.uniform(low=4, high=6)])

    def clip_action(self, u, X):
        return np.clip(u, a_min=max(X[0]+3., self.bounds[0][0]), a_max=self.bounds[0][1])

fc = FBController((bounds,), 1, 3, 0.25, 'temperature')


# all_tickers = pd.concat(ticker, axis=0)
new_ticker = ch1.loc['2021-09-20':'2021-10-10', cfg1.ticker_vars]
new_ticker.dropna(axis=0, inplace=True)
# del all_tickers
env = get_env(model, scaler, [new_ticker], seed=0)

rewards = []
factions = []
indices = []
states = []
feedbacks = []
for ticker_idx in trange(1, leave=False):
    state = env.reset(ticker_idx)
    done = False
    while not done:
        action = fc.predict(state)[0]
        action_scaled = 2 * (action - np.mean(bounds)) / (bounds[1] - bounds[0])
        state, reward, done, _ = env.step(action_scaled)
        states.append(state)
        rewards.append(reward)
        factions.append(action[0])
        feedbacks.append(fc._feedbacks[-1])
    indices.extend(env.ticker.index[:-1])

states = np.asarray(states)

plt.figure(figsize=(15,8))
plt.grid('both')
plt.plot(indices, factions, label='F.Setpoint')
plt.plot(indices, ch1.loc[indices].Setpoint, label='Actual', c='r', ls=':')
plt.plot(indices, ch1.loc[indices].TempWetBulb, label='TempWetBulb')
# plt.plot(indices, ch1.loc[indices].TempCondIn, label='Old-TempCondIn', c='g', ls=':')
plt.plot(indices, states[:, 2], label='TempCondIn', lw=4)
plt.legend()
plt.ylabel('temp')
plt.twinx()
plt.plot(indices, feedbacks, ls='-.', c='k', lw='0.5')
plt.ylabel('feedback')
plt.title('Re-run on simulated data')

In [None]:
scatterdf = pd.DataFrame({
    'Chiller load /%': df['Tonnage'] * 100/800,
    'kw/Ton': df['PowChi'] / df['Tonnage'],
    'Returning Water Temp /F': df['TempCondIn']
})
scatterdf = pd.concat((scatterdf, df), axis=1)

In [None]:
%matplotlib notebook
from matplotlib.animation import FuncAnimation

frames = [day_data for date, day_data in scatterdf.groupby(df.index.date)]
frames = pd.concat(frames)
window = 288
stride = 72
fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(16,8))
fig.colorbar(matplotlib.cm.ScalarMappable(norm=matplotlib.colors.Normalize(50,90),
                                          cmap=matplotlib.cm.coolwarm),
             ax=ax0)
ax0.set_xlim(0, 100)
ax0.set_ylim(0, 1)

def func(fnum):
    frame = frames[stride*fnum:stride*fnum+window]
    ax0.clear()
    ax0.grid('both')
    ax0.set_xlim(0, 100)
    ax0.set_ylim(0, 1)
    ax0.set_xlabel('Chiller load /%')
    ax0.set_ylabel('kW / ton')
    ax0.scatter(frame['Chiller load /%'].values, frame['kw/Ton'].values, c=frame['Returning Water Temp /F'].values,
             cmap='coolwarm', vmin=50, vmax=90)
    fig.suptitle('%s to %s' % (frame.index[0], frame.index[-1]))
    
    ax1.clear()
    ax1.grid('both')
    ax1.set_xlim(frame.index[0], frame.index[-1])
    ax1.set_ylim(50, 90)
    ax1.plot(frame.index, frame.TempWetBulb, label='WetBulb')
    ax1.plot(frame.index, frame.TempAmbient, label='Ambient')
    ax1.plot(frame.index, frame.Setpoint, label='Setpoint')
    ax1.plot(frame.index, frame.TempCondIn, label='TempCondIn')
    plt.legend()
    ax1.set_xlabel('Time')
    ax1.set_ylabel('Temperature')
    ax2 = ax1.twinx()
    ax2.set_ylim(0, 105)
    ax2.plot(frame.index, frame.PerFreqFanA, label='Fan Speed', c='r', ls=':')
    plt.legend()

# func(114)
anim = FuncAnimation(fig, func, frames=(len(frames) - window) // stride, repeat=True, interval=100)
plt.show()
# anim.save('movie.mp4')

### Transfer Across Towers

Size of training set w.r.t to transfer on a different environment

In [None]:
# Test environments
dfs = {}  # name -> (training dataframe, test gym environment, dataconfig)
for name, df, cfg in zip(name_df_cfg):
    df_train, df_test = df_train_test_split(df, train_split)
    if name=='K':
    x, _, y, _, ticker, scaler = get_env_data(df_test, cfg.colsx, cfg.colsy, cfg.ticker_vars, cfg.lag, train_split=1)
    model = train_model(x, y, verbose=False)
    env_test = get_env(model, scaler, ticker, seed=0)
    dfs[name] = (df_train, env_test, cfg)

In [None]:
R = {}  # name -> {training set fraction -> (training reward, testing rewards)}
for name, (df_train, env_test, cfg) in tqdm(dfs.items(), total=len(dfs), desc='Tower'):
    res = {}
    for fraction in tqdm(train_sizes, leave=False, desc='Fraction'):
        df_fraction, _ = df_train_test_split(df_train, fraction)
        r_train, r_test = transfer_experiment(df_fraction, env_test, agent_params,
                                              cfg, timesteps=5000)
        res[fraction] = (r_train, r_test)

    _, r_notrain = transfer_experiment(df_fraction, env_test, agent_params,
                                       cfg, timesteps=5000, no_train=True)
    res[0.] = (np.ones(len(r_train)) * np.nan, r_notrain)
    R[name] = res

In [None]:
plt.figure(figsize=(6,12))
for i, (name, res) in enumerate(R.items()):
    plt.subplot(3,1,i+1)
    results = sorted(res.items(), key=lambda x: x[0])
    for fraction, arrs in results:
        tr, te = res[fraction]
        tr = stats.rolling_mean(tr, 4) - reward_baseline
        te = stats.rolling_mean(te, 4) - reward_baseline
        l, *_ = plt.plot(tr, ls=':')
        plt.plot(np.arange(len(tr)-1, len(tr)+len(te)-1), te, c=l.get_color(), label='%.2f' % fraction)
    plt.title(name)
    plt.ylim(0, 350)
plt.legend()

### Transfer Inside Tower

In [None]:
similarity_idx = ['TempWetBulb', 'TempAmbient', 'Tonnage'] # variables for judging env similarity in ticker
dfs_c = {}  # name -> (training dataframe, test gym environment, dataconfig)
for (name, df, cfg) in zip(*name_df_cfg):
    _, _, _, _, ticker, _ = get_env_data(df, cfg.colsx, cfg.colsy, cfg.ticker_vars, cfg.lag, train_split=1)
    similarity_matrix = stats.timeseries.similarity_matrix(ticker, similarity_idx)
    clusterer = cluster.SpectralClustering(n_clusters=2, affinity='precomputed')
    cluster_labels = clusterer.fit_predict(similarity_matrix)
    projecter = manifold.SpectralEmbedding(affinity='precomputed')
    coords = projecter.fit_transform(similarity_matrix)
    # get training df and env
    tickera = [t for label, t in zip(cluster_labels, ticker) if label==0]
    cha = df.loc[pd.concat(tickera).index]
    # get test df and env
    tickerb = [t for label, t in zip(cluster_labels, ticker) if label==1]
    chb = df.loc[pd.concat(tickerb).index]
    x_trainb, x_valb, y_trainb, y_valb, _, scalerb = get_env_data(chb, cfg.colsx, cfg.colsy, cfg.ticker_vars)
    modelb = train_model(x_trainb, y_trainb)
    envb = get_env(modelb, scalerb, tickerb)
    
    dfs_c[name] = (cha, envb, cfg)

In [None]:
R_c = agg_transfer_experiments(dfs_c, agent_params)

In [None]:
plt.figure(figsize=(6,12))
for i, (name, res) in enumerate(R_c.items()):
    plt.subplot(3,1,i+1)
    results = sorted(res.items(), key=lambda x: x[0])
    for fraction, arrs in results:
        tr, te = res[fraction]
        tr = stats.rolling_mean(tr, 4) - reward_baseline
        te = stats.rolling_mean(te, 4) - reward_baseline
        l, *_ = plt.plot(tr, ls=':')
        plt.plot(np.arange(len(tr)-1, len(tr)+len(te)-1), te, c=l.get_color(), label='%.2f' % fraction)
    plt.title(name)
    plt.ylim(0, 350)
plt.legend()

In [None]:
drop(R)

In [None]:
cumulative_rewards(R)

In [None]:
def drop(R, window=4):
    for name, res in R.items():
        print(name)
        for fraction, (tr, te) in res.items():
            before = stats.rolling_mean(tr)[-1] - reward_baseline
            after = stats.rolling_mean(te)[-1] - reward_baseline
            diff = after - before
            per = diff * 100 / before
            print('\t%.2f\t%5.2f\t%5.2f' % (fraction, diff, per))
drop(R_c)

In [None]:
def cumulative_rewards(R):
    for name, res in R.items():
        print(name)
        for fraction, (tr, te) in res.items():
            # -288 is the worst possible reward for cooling tower
            tot_te = sum(np.asarray(te) - reward_baseline)
            bench = sum(np.asarray(res[0.][1]) - reward_baseline)
            per = tot_te * 100 / bench
            print('\t%.2f\t%5.2f\t%5.2f' % (fraction, tot_te, per))

cumulative_rewards(R_c)