In [None]:
%matplotlib notebook
%reload_ext autoreload
%autoreload 2

import datetime
from os import path, environ
import pickle
import warnings

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
import torch
import torch.nn as nn

from utils import contiguous_sequences

# source file, see docs/5-dataset.md for info on field names
chiller_file = path.join(environ['DATADIR'],
                         'EngineeringScienceBuilding',
                         'Chillers.csv')
plot_path = path.join('..', 'docs', 'img')
bin_path = './bin/'

In [None]:
# Data selection 'all' or 'chiller_on' or 'fan_on'
MODE = 'chiller_on'
# Read pre-processed data:
# Pytorch uses float32 as default type for weights etc,
# so input data points are also read in the same type.
df = pd.read_csv(chiller_file, index_col='Time',
                 parse_dates=['Time'], dtype=np.float32)
df.dropna(inplace=True)
if MODE == 'chiller_on':
    df = df[df['PowChi'] != 0.]
if MODE == 'fan_on':
    df = df[(df['PerFreqFanA'] != 0.) | df['PerFreqFanB'] != 0.]
print(len(df), 'Records')

## Environment model

In [None]:
inputs = ['PerFreqFans', 'PowConP', 'TempCondOut', 'TempAmbient', 'TempWetBulb']
outputs = ['PowChi', 'PowFans', 'TempCondOut']

df_in = pd.DataFrame(columns=inputs, index=df.index)
df_in['PerFreqFans'] = (df['PerFreqFanA'] + df['PerFreqFanB']) / 2
df_in[inputs[1:]] = df[inputs[1:]]

df_out = pd.DataFrame(columns=outputs, index=df.index)
df_out['PowChi'] = df['PowChi']
df_out['PowFans'] = df['PowFanA'] + df['PowFanB']
df_out['TempCondOut'] = df['TempCondOut']

idx_list = contiguous_sequences(df.index, pd.Timedelta(5, unit='min'))

dfs_in, dfs_out = [], []
for idx in idx_list:
    dfs_in.append(df_in.loc[idx[:-1]])
    dfs_out.append(df_out.loc[idx[1:]])

df_in = pd.concat(dfs_in, sort=False)
df_out = pd.concat(dfs_out, sort=False)

print('{:6d} time series'.format(len(dfs_in)))
print('{:6d} total rows'.format(len(df_in)))

### Fully-connected network

In [None]:
std_in, std_out = StandardScaler(), StandardScaler()
net = MLPRegressor(hidden_layer_sizes=(32, 32, 32),
                   activation='relu',
                   solver='adam',
                   verbose=True,
                   early_stopping=True)
est = Pipeline([('std', std_in), ('net', net)])

with warnings.catch_warnings():
    warnings.simplefilter('ignore', category=FutureWarning)
    est.fit(df_in, std_out.fit_transform(df_out))



In [None]:
save = {
    'loss': est['net'].loss_,
    'estimator': est,
    'output_norm': std_out,
    'inputs': inputs,
    'outputs': outputs
}
with open(path.join(bin_path, 'chiller_model_nn'), 'wb') as f:
    pickle.dump(save, f)

In [None]:
test_in, test_out = dfs_in[2], dfs_out[2]
pred = pd.DataFrame(std_out.inverse_transform(est.predict(test_in)),
                    index=test_out.index, columns=test_out.columns)

test_out.plot(grid=True)
pred.plot(grid=True)

In [None]:
# df_out.loc[idx_list[0]]
df_out[idx_list[0]]

In [None]:
df_out.head()