In [333]:
import numpy as np
import pandas as pd
from datetime import datetime
import IPython
import IPython.display
import matplotlib as mpl
import matplotlib.pyplot as plt
import time
import tensorflow as tf
from keras.models import Sequential
from keras.layers import LSTM,Dense,Dropout,Masking,Embedding,Bidirectional,TimeDistributed,Reshape
from sklearn.metrics import mean_squared_error
#%tensorflow_version 2.x

In [334]:
class WindowGenerator():
    def __init__(self, input_width, label_width, shift,
               train_df=None, val_df=None, test_df=None,
               label_columns=None, mean = 0, std = 1):
        # Store the raw data.
        self.train_df = train_df
        self.val_df = val_df
        self.test_df = test_df

        self.mean = mean
        self.std = std

        # Work out the label column indices.
        self.label_columns = label_columns
        if label_columns is not None:
            self.label_columns_indices = {name: i for i, name in
                                        enumerate(label_columns)}
        self.column_indices = {name: i for i, name in
                               enumerate(train_df.columns)}

        # Work out the window parameters.
        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift

        self.total_window_size = input_width + shift

        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]

        self.label_start = self.total_window_size - self.label_width
        self.labels_slice = slice(self.label_start, None)
        self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

    def __repr__(self):
        return '\n'.join([
            f'Total window size: {self.total_window_size}',
            f'Input indices: {self.input_indices}',
            f'Label indices: {self.label_indices}',
            f'Label slice: {self.labels_slice}',
            f'Input Slice: {self.input_slice}',
            f'Label column name(s): {self.label_columns}'])
    
    def plot(self, plot_col, model=None, max_subplots=3):
        inputs, labels = self.example
        
        
        plot_col_index = self.column_indices[plot_col]
        print("col index",plot_col_index)
        max_n = min(max_subplots, len(inputs))
        plt.figure(figsize=(12, 3*max_n))

        inputs_denorm = (inputs*std)+mean
        labels_denorm = (labels*std[plot_col_index])+mean[plot_col_index]

        for n in range(max_n):
            plt.subplot(max_subplots, 1, n+1)
            #plt.ylabel(f'{plot_col} [normed]')
            plt.plot(self.input_indices, inputs_denorm[n, :, plot_col_index],
                     label='Inputs', c='#007bff')

            if self.label_columns:
                label_col_index = self.label_columns_indices.get(plot_col, None)
            else:
                label_col_index = plot_col_index

            if label_col_index is None:
                continue

            plt.plot(self.label_indices, labels_denorm[n, :, label_col_index], label='Labels', c='#ffc107')

            if model is not None:
                #print((self.label_indices).shape)
                #print((labels_denorm).shape)
                print(inputs.shape)
                predictions = model(inputs)
                print("model(inputs)", inputs)
                print("prediction shape", predictions.shape)
                print("prediction itself",predictions)
                predictions_denorm = (predictions*std[plot_col_index])+mean[plot_col_index]
                plt.plot(self.label_indices, predictions_denorm[n, :, label_col_index],
                          label='Predictions',
                          c='#dc3545')

            if n == 0:
                plt.legend()

            #plt.xticks(np.arange(0,48,2))

        
        plt.suptitle("Prediction of "+plot_col)
        plt.xlabel('Time [h]')
        #plt.ylabel('Temperature [°C]')
        
    def plot2(self, plot_col, model=None, max_subplots=3):
        inputs, labels = self.example2
    
        
        plot_col_index = self.column_indices[plot_col]
        print("col index",plot_col_index)
        max_n = min(max_subplots, len(inputs))
        plt.figure(figsize=(12, 3*max_n))

        inputs_denorm = (inputs*std)+mean
        labels_denorm = (labels*std[plot_col_index])+mean[plot_col_index]

        for n in range(max_n):
            plt.subplot(max_subplots, 1, n+1)
            #plt.ylabel(f'{plot_col} [normed]')
            plt.plot(self.input_indices, inputs_denorm[n, :, plot_col_index],
                     label='Inputs', c='#007bff', marker='*')

            if self.label_columns:
                label_col_index = self.label_columns_indices.get(plot_col, None)
            else:
                label_col_index = plot_col_index

            if label_col_index is None:
                continue

            plt.plot(self.label_indices, labels_denorm[n, :, label_col_index], label='Labels', c='#ffc107',marker='*')

            if model is not None:
                #print((self.label_indices).shape)
                #print((labels_denorm).shape)
                #print("inputs.shape:", inputs.shape)
                predictions = model(inputs)
                print("prediction shape", predictions.shape)
                #print("prediction itself",predictions)
                predictions_denorm = (predictions*std[plot_col_index])+mean[plot_col_index]
                plt.plot(self.label_indices, predictions_denorm[n, :, label_col_index],
                          label='Predictions',
                          c='#dc3545', marker='o')

            if n == 0:
                plt.legend()

            #plt.xticks(np.arange(0,48,2))

        
        plt.suptitle("Prediction of "+plot_col)
        plt.xlabel('TimeSteps')
        #plt.ylabel('Temperature [°C]')
    def get_train(self):
        return self.train
        
        
    def split_window(self, features):
        inputs = features[:, self.input_slice, :]
        labels = features[:, self.labels_slice, :]
        if self.label_columns is not None:
            labels = tf.stack(
            [labels[:, :, self.column_indices[name]] for name in self.label_columns],
            axis=-1)
        
        inputs.set_shape([None, self.input_width, None])
        labels.set_shape([None, self.label_width, None])

        return inputs, labels

@property
def train(self):
    return self.make_dataset(self.train_df)

@property
def val(self):
    return self.make_dataset(self.val_df)

@property
def test(self):
    return self.make_dataset(self.test_df)

@property
def example(self):
    """Get and cache an example batch of `inputs, labels` for plotting."""
    result = getattr(self, '_example', None)
    if result is None:
        result = next(iter(self.test))
    self._example = result
    return result

@property
def example2(self):
    """Get and cache an example batch of `inputs, labels` for plotting."""
    result = next(iter(self.test))
    return result

def make_dataset(self, data):
    data = np.array(data, dtype=np.float32)
    ds = tf.keras.preprocessing.timeseries_dataset_from_array(
          data=data,
          targets=None,
          sequence_length=self.total_window_size,
          sequence_stride=1,
          shuffle=False,
          batch_size=32)

    ds = ds.map(self.split_window)
    return ds

WindowGenerator.make_dataset = make_dataset
WindowGenerator.train = train
WindowGenerator.val = val
WindowGenerator.test = test
WindowGenerator.example = example
WindowGenerator.example2 = example2

In [335]:
df1=pd.read_csv('https://raw.githubusercontent.com/bre3760/ictbuilding/dev/data/anno1/eplusout/eplusout.csv')
df2=pd.read_csv('https://raw.githubusercontent.com/bre3760/ictbuilding/dev/data/anno2/eplusout/eplusout.csv')
df3=pd.read_csv('https://raw.githubusercontent.com/bre3760/ictbuilding/dev/data/anno3/eplusout/eplusout.csv')

URLError: <urlopen error [Errno 11001] getaddrinfo failed>

In [None]:
def changetime(df,year):
    datetimeform = []
    for i in range(len(df.index)):
        tt = df["Date/Time"][i]
        days, hours = tt.split('  ')
        tt = f'{days.split("/")[1]}/{days.split("/")[0]}/{year}{hours}'
        tt = tt.replace(' ', '')
        if '201724:' or '201824:' or '201924:' in tt:
            tt=tt.replace('24:', '00:')
            timestamp = time.mktime(datetime.strptime(tt, "%d/%m/%Y%H:%M:%S").timetuple())
            timestamp += 86400
            #timestamp = datetime.fromtimestamp(timestamp)

        else:
            timestamp = time.mktime(datetime.strptime(tt, "%d/%m/%Y%H:%M:%S").timetuple())
            #timestamp = datetime.fromtimestamp(timestamp)
        datetimeform.append(timestamp)

    df["TimeStep"]=datetimeform
    
changetime(df1,"2017")
changetime(df2,"2018")
changetime(df3,"2019")
df1.head()

In [None]:
# Extract mean temp time series
# mean value of the 3 indoor zones
def meanDF(df):
    df_mean = pd.DataFrame()
    #"Date/Time",\
    #"Environment:Site Outdoor Air Drybulb Temperature [C](TimeStep)",\
    #"Electricity:Facility [J](TimeStep)"
    df_mean = df[[
              "BLOCK1:BEDROOM:Zone Mean Air Temperature [C](TimeStep:ON)",\
              "BLOCK1:BATHROOM:Zone Mean Air Temperature [C](TimeStep:ON)",\
              "BLOCK1:KITCHEN:Zone Mean Air Temperature [C](TimeStep:ON)",\
              ]]
    df_temp=df_mean.copy()
    df_temp.dropna(inplace=True)
    df_temp["mean"] = df_temp.mean(1)


    df_final=pd.DataFrame()
    df_final["DateTime"] = df["TimeStep"]
    df_final["Outdoor_Mean"] = df["Environment:Site Outdoor Air Drybulb Temperature [C](TimeStep)"]
    df_final["Indoor_Mean"] = df_temp["mean"]
    df_final["Cooling"] = df["DistrictCooling:Facility [J](TimeStep)"]
    df_final["Power"]=df["Electricity:Facility [J](TimeStep)"]
    df_final["Delta_T"] = df_final["Indoor_Mean"]-df_final["Outdoor_Mean"]
    
    #df_final
    return df_final

df1_final=meanDF(df1)
df2_final=meanDF(df2)
df3_final=meanDF(df3)
df1_final.head()

In [None]:
# selecting only the values to train the rnn on 
x = pd.DataFrame()
meas = ["TimeStep",
        'Electricity:Facility [J](TimeStep)',
        'DistrictCooling:Facility [J](TimeStep)',
        "Environment:Site Outdoor Air Drybulb Temperature [C](TimeStep)",
        "Environment:Site Direct Solar Radiation Rate per Area [W/m2](TimeStep)",
        "Environment:Site Wind Speed [m/s](TimeStep)",
        "Environment:Site Outdoor Air Barometric Pressure [Pa](TimeStep)",
        "Environment:Site Wind Direction [deg](TimeStep)",
        "Environment:Site Diffuse Solar Radiation Rate per Area [W/m2](TimeStep)"]

x["Indoor_Mean"] = pd.concat([df1_final["Indoor_Mean"],df2_final["Indoor_Mean"],df3_final["Indoor_Mean"]])
print(x[78000:])
for m in meas:
    x[m]=pd.concat([df1[m],df2[m],df3[m]])
#x.set_index("TimeStep",inplace=True)
#x.head()
x.reset_index(inplace=True,drop=True)

In [None]:
column_indices = {name: i for i, name in enumerate(x.columns)}
x.describe().transpose()

In [None]:
# Split dataset into train, validation and test and normalize data

ntot = len(x)

#first two years for trainig 

x_train = x[:(len(df1)+len(df2))]
#x_train.set_index("TimeStep",inplace=True)
mean = x_train.mean()
std = x_train.std()
x_train_norm = (x_train - mean)/std
print("Training set dimension: ",len(x_train))

x_val =  x[(len(df1)+len(df2)): (len(df1)+len(df2)) + int(0.5*len(df3)) ]
#x_val.set_index("TimeStep",inplace=True)
x_val_norm = (x_val-mean)/std
print("Validation set dimension: ",len(x_val))

x_test =  x[(len(df1)+len(df2)) + int(0.5*len(df3)):]
#x_test.set_index("TimeStep",inplace=True)
x_test_norm = (x_test-mean)/std
print("Test set dimension: ",len(x_test))

In [None]:
powr2 = x.copy()
#powr2=powr[0:1000]
powr2['DistrictCooling:Facility [J](TimeStep)'].plot(figsize=(20,20))
#powr2['Electricity:Facility [J](TimeStep)'].plot(figsize=(20,20))

#powr2.head(100)

In [None]:
column_to_predict_indoor = 'Indoor_Mean'
w_indoor = WindowGenerator(input_width=6*24, label_width=6*24, shift=6*24,
                     train_df=x_train_norm, val_df=x_val_norm, test_df=x_test_norm,
                     label_columns=[column_to_predict_indoor],
                     mean = mean, std = std)

In [None]:
column_to_predict_outdoor = 'Environment:Site Outdoor Air Drybulb Temperature [C](TimeStep)'
w_outdoor = WindowGenerator(input_width=6*24*7, label_width=6*24, shift=6*24,
                     train_df=x_train_norm, val_df=x_val_norm, test_df=x_test_norm,
                     label_columns=[column_to_predict_outdoor],
                     mean = mean, std = std)

In [None]:
column_to_predict_power = 'Electricity:Facility [J](TimeStep)'
w_power = WindowGenerator(input_width=6*2, label_width=2, shift=1,
                     train_df=x_train_norm, val_df=x_val_norm, test_df=x_test_norm,
                     label_columns=[column_to_predict_power],
                     mean = mean, std = std)

In [None]:
in_steps = 6
out_steps = 6
column_to_predict_cooling = 'DistrictCooling:Facility [J](TimeStep)'
w_cooling = WindowGenerator(input_width=in_steps, label_width=out_steps, shift=out_steps,
                     train_df=x_train_norm, val_df=x_val_norm, test_df=x_test_norm,
                     label_columns=[column_to_predict_cooling],
                     mean = mean, std = std)
w_cooling

In [None]:
MAX_EPOCHS = 20

def compile_and_fit(model, window, patience=3):
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                    patience=patience,
                                                    mode='min')
    #different choices
    model.compile(loss=tf.losses.MeanSquaredError(),
                optimizer=tf.optimizers.Adam(),
                metrics=[tf.metrics.MeanAbsolutePercentageError()])
                # metrics=[tf.metrics.Accuracy()])
                # metrics=[tf.metrics.MeanAbsoluteError()])

    history = model.fit(window.train, epochs=MAX_EPOCHS, shuffle = False,
                      validation_data=window.val,
                      callbacks=[early_stopping],
                      batch_size=32)
    return history

multi_lstm_model = tf.keras.Sequential([
    # Shape [batch, time, features] => [batch, lstm_units]
    # Adding more `lstm_units` just overfits more quickly.
    tf.keras.layers.LSTM(12, return_sequences=True),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.LSTM(12, return_sequences=False),
    # Shape => [batch, out_steps*features]
    tf.keras.layers.Dense(2*1,
                          kernel_initializer=tf.initializers.zeros),
    # Shape => [batch, out_steps, features]
    tf.keras.layers.Reshape([2, 1])
])

In [None]:
# model that makes use of multiple input variables, multivariate and multi-step time series forecasting model
num_features = 1
neurons = out_steps
multi_lstm_model = tf.keras.Sequential()
multi_lstm_model.add(LSTM(neurons, return_sequences=True))
multi_lstm_model.add(Dropout(0.2))
multi_lstm_model.add(LSTM(neurons, return_sequences=False))
multi_lstm_model.add(Dropout(0.2))
multi_lstm_model.add(Dense(out_steps*num_features,kernel_initializer=tf.initializers.zeros))
multi_lstm_model.add(Reshape([out_steps, num_features]))

num_features = 1
multi_dense_model = tf.keras.Sequential([
    # Take the last time step.
    # Shape [batch, time, features] => [batch, 1, features]
    tf.keras.layers.Lambda(lambda x: x[:, -1:, :]),
    # Shape => [batch, 1, dense_units]
    tf.keras.layers.Dense(512, activation='relu'),
    # Shape => [batch, out_steps*features]
    tf.keras.layers.Dense(out_steps*num_features,
                          kernel_initializer=tf.initializers.zeros),
    # Shape => [batch, out_steps, features]
    tf.keras.layers.Reshape([out_steps, num_features])
])

CNN

CONV_WIDTH = 3
multi_conv_model = tf.keras.Sequential([
    # Shape [batch, time, features] => [batch, CONV_WIDTH, features]
    tf.keras.layers.Lambda(lambda x: x[:, -CONV_WIDTH:, :]),
    # Shape => [batch, 1, conv_units]
    tf.keras.layers.Conv1D(256, activation='relu', kernel_size=(CONV_WIDTH)),
    # Shape => [batch, 1,  out_steps*features]
    tf.keras.layers.Dense(OUT_STEPS*num_features,
                          kernel_initializer=tf.initializers.zeros),
    # Shape => [batch, out_steps, features]
    tf.keras.layers.Reshape([OUT_STEPS, num_features])
])

In [None]:
history = compile_and_fit(multi_lstm_model, w_cooling)
multi_lstm_model.summary()

In [None]:
IPython.display.clear_output()
val_performance = {}
val_performance['LSTM'] = multi_lstm_model.evaluate(w_cooling.val)
performance={}
performance['LSTM'] = multi_lstm_model.evaluate(w_cooling.test, verbose=0)

In [None]:
w_cooling.plot2(plot_col=column_to_predict_cooling, model=multi_lstm_model, max_subplots=2)

In [None]:
#predictions = multi_lstm_model.predict(w_cooling.test)[:,:,0]
print(len(w_cooling.test_df)) # dataframe in input ha dimensione 13176 mentre predictions 13163... Perchè? 
predictions = multi_lstm_model.predict(w_cooling.test)[:,:,0]
#print(len(predictions))
#print(predictions)

In [None]:
#test=predictions[:,:,0]
#print(test)

In [None]:
pred_denorm = [[t[0]*std+mean,t[1]*std+mean] for t in predictions]
# print(type(pred_denorm)) # <class 'list'>

In [None]:
tmp = []
for i in range(len(pred_denorm)):
    tmp.append(pred_denorm[i][1][3])

pred_df = pd.DataFrame(tmp)

In [None]:
plt.figure(figsize=(20,20))
plot_test = x_test['DistrictCooling:Facility [J](TimeStep)'][in_steps-1:].copy()
plot_test.reset_index(inplace=True,drop=True)
plt.plot(plot_test)
plt.plot(pred_df)
plt.legend(['x_test','predicted'])
plt.title("DistrictCooling Facility Prediction")
#print(plot_test[4:])
#print(pred_df[0:8])
plot_t = plot_test[out_steps:]
plot_t.reset_index(inplace=True,drop=True)
print(plot_t.shape)
print(pred_df.shape)

In [None]:
plt.figure(figsize=(10,10))

#plt.plot(x_test['DistrictCooling:Facility [J](TimeStep)'][11:])
plt.plot(plot_t)

In [None]:
plt.figure(figsize=(10,10))

plt.plot(pred_df)

In [None]:
rmse_neurons = {}
rmse = np.sqrt(mean_squared_error(plot_t, pred_df))
rmse_neurons['neurons'] = neurons
rmse_neurons['rmse'] = rmse
print(rmse_neurons)

In [None]:
## neurons = 12

# Total window size: 8 -> in_steps = 6, out_steps = 2
# loss: 0.0151 - mean_absolute_percentage_error: 36.4191 - rmse: 89737.6441121866

# Total window size: 12 -> in_steps = 6, out_steps = 6
# loss: 0.0378 - mean_absolute_percentage_error: 75.3893 - rmse: 165518.73420347186

# Total window size: 16 -> in_steps = 10, out_steps = 6
# loss: 0.0358 - mean_absolute_percentage_error: 58.4344 - rmse: 168050.95017360165

# Total window size: 16 -> in_steps = 12, out_steps = 4
# loss: 0.0278 - mean_absolute_percentage_error: 71.6392 - rmse: 140321.72971796765

# Total window size: 18 -> in_steps = 12, out_steps = 6
# loss: 0.0356 - mean_absolute_percentage_error: 84.7530 - rmse: 176186.70476650228

## neurons = 144
# in_steps = 6*24, out_steps = 6*24 - Total params: 276,624
# loss: 0.4198 - mean_absolute_percentage_error: 267.9347 - rmse: 315118.540658671

## neurons = 6
# Total window size: 12 -> in_steps = 6, out_steps = 6
# 

## BIDIRECTIONAL
# Bidirectional(LSTM(neurons, return_sequences=False))) - Dropout(0.2) - Dense(out_steps*num_features,kernel_initializer=tf.initializers.zeros) - Reshape([out_steps, num_features]) 

# neurons = 4
# in_steps = 12, out_steps = 4 - Total params: 516
# loss: 0.0417 - mean_absolute_percentage_error: 68.9218 - rmse: 126882.33414725878

# neurons = 6
# in_steps = 6*3, out_steps = 6 - Total params: 894
# loss: 0.0453 - mean_absolute_percentage_error: 62.9365 - rmse: 160592.68083785265

# neurons = 6
# in_steps = 10, out_steps = 6 - Total params: 894
# loss: 0.0326 - mean_absolute_percentage_error: 79.8132 - rmse: 161000.8467548974

# neurons = 12
# in_steps = 10, out_steps = 6 - Total params: 2,358
# loss: 0.0604 - mean_absolute_percentage_error: 80.2221 - rmse: 158812.95248815964

# neurons = 12
# in_steps = 6*6, out_steps = 6*2 - Total params: 2,508
# loss: 0.0871 - mean_absolute_percentage_error: 93.1802 - rmse: 282366.7647047895

# neurons = 72
# Total window size: 216 (144 inputs = 24 ore, 72 outputs = 12 ore) - Total params: 70,920
# loss: 0.3777 - mean_absolute_percentage_error: 197.3874 - rmse: 749157.4615594941

