In [None]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

import datetime
from os import path, environ

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.cluster import DBSCAN
from sklearn.metrics import mutual_info_score
# import ipyvolume as ipv

from thermo import CONSTANTS
from preprocessing.cleanup import POW_FIELDS
from utils.stats import mutual_information, temporal_correlations
# source file, see docs/5-dataset.md for info on field names
chiller_file = path.join(environ['DATADIR'], 'EngineeringScienceBuilding', 'Chillers.csv')
plot_path = path.join('..', 'docs', 'img')

In [None]:
# Read pre-processed data:
# Pytorch uses float32 as default type for weights etc,
# so input data points are also read in the same type.
df = pd.read_csv(chiller_file, index_col='Time',
                 parse_dates=['Time'], dtype=np.float32)
df.dropna(inplace=True)

# Correlations

## Cross-correlation

Pearson product-moment correlation coefficients between variables. Measure of linear relationship between variables.

$$
r_{X,Y} = \frac{\textrm{cov}(X,Y)}{\sigma_X \sigma_Y}
$$

In [None]:
corr = np.corrcoef(df, rowvar=False)
fig, ax = plt.subplots(figsize=(10, 10))
axImg = ax.matshow(corr, vmin=-1, vmax=1)
fig.colorbar(axImg)
plt.xticks(np.arange(len(df.columns)), df.columns, rotation='vertical')
plt.yticks(np.arange(len(df.columns)), df.columns);
plt.savefig(path.join(plot_path, '7-pearson-correlation.png'))

## Mutual information

Mutual information is a measure of how much information about the distribution variable $X$ is contained in the distribution of variable $Y$.

$$
\mathrm{MI}(X,Y) = \sum_{i}^{\mid X \mid} \sum_{j}^{\mid Y \mid} \frac{\mid X_i \cap Y_j \mid}{N} \log{\frac{N \; \mid X_i \cap Y_j \mid}{\mid X_i \mid \; \mid Y_j \mid}}
$$

Where $N$ is the total number of samples, $i$ and $j$ are class labels for samples (in this case, histogram bins). $X_i$ is the number of samples of $X$ with a label $i$. The MI score is normalized to lie between 0 and 1.

In [None]:
mi = mutual_information(df, bins=32)
fig, ax = plt.subplots(figsize=(10, 10))
axImg = ax.matshow(mi)
fig.colorbar(axImg)
plt.xticks(np.arange(len(df.columns)), df.columns, rotation='vertical')
plt.yticks(np.arange(len(df.columns)), df.columns);
plt.savefig(path.join(plot_path, '7-mutual-information.png'))

## Temporal correlation

Measure the relationship variables after introducing a lag.

In [None]:
lags = (0, 1, 2, 5)
corrs = temporal_correlations(df, lags=lags)
maxcorridx = np.argmax(np.square(corrs), axis=0)
maxcorrs = np.asarray(lags)[maxcorridx.flatten()]
plt.hist(maxcorrs.flatten())
plt.xticks(lags)
plt.title('Lags with highest cross-correlation')
plt.xlabel('Lag / time steps')
plt.ylabel('Feature pairs');

# Clusters

## Temperature

In [None]:
X = df.loc[:, ('TempAmbient', 'TempWetBulb')]
X['DeltaTemp'] = df['TempCondOut'] - df['TempCondIn']
labels = DBSCAN().fit_predict(X)

In [None]:
ipv.clear()
cmap = plt.cm.Accent(labels)
f = ipv.scatter(X['TempAmbient'], X['DeltaTemp'], X['TempWetBulb'], color=cmap,
            size=10, marker='point_2d')
ipv.xyzlim(280, 310)
ipv.ylim(0, 10)
ipv.xyzlabel('TempAmbient', 'DeltaTemp', 'TempWetBulb')
ipv.show()

In [None]:
def view(fig, n, frac):
    ipv.view(360*frac)
ipv.movie(path.join(plot_path, '7-ct-temp-clusters.gif'), view, fps=12, frames=36)

![](../docs/img/7-ct-temp-clusers.gif)

## Power

In [None]:
X = df.loc[:, ('PowConP', 'PowFanA', 'PowFanB')]
labels = DBSCAN().fit_predict(X)

In [None]:
ipv.clear()
cmap = plt.cm.Accent(labels)
f = ipv.scatter(X['PowConP'], X['PowFanA'], X['PowFanB'], color=cmap,
            size=10, marker='point_2d')
# ipv.xyzlim(280, 310)
ipv.xyzlabel('PowConP', 'PowFanA', 'PowFanB')
ipv.show()

In [None]:
def view(fig, n, frac):
    ipv.view(360*frac)
ipv.movie(path.join(plot_path, '../docs/img/7-ct-power-clusters.gif'), view, fps=12, frames=36)