In [48]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras

In [49]:
df = pd.read_csv('Dataset_raw_20230414-20230630.csv')

In [50]:
df.head()

Unnamed: 0,time,Building Power/Total Active Power,HVAC Power/Total Active Power,Breakroom VAV1_02/Current Cooling Setpoint,Breakroom VAV1_02/Current Heating Setpoint,Collab Lounge VAV1_01/Current Cooling Setpoint,Collab Lounge VAV1_01/Current Heating Setpoint,Conference VAV1_04/Current Cooling Setpoint,Conference VAV1_04/Current Heating Setpoint,GE Additive Manufacturing VAV2_01/Current Cooling Setpoint,...,Offices 108_110 VAV1_06/Current Heating Setpoint,Open Office VAV1_05/Current Cooling Setpoint,Open Office VAV1_05/Current Heating Setpoint,Training Room Nine VAV1_08/Current Cooling Setpoint,Training Room Nine VAV1_08/Current Heating Setpoint,Training Room Twelve VAV1_09/Current Cooling Setpoint,Training Room Twelve VAV1_09/Current Heating Setpoint,Weather Station/Outside Humidity,Weather Station/Outside Temp,Weather Station/Solar Radiation
0,2023-04-14 00:00:18.671,6410.618,,,,,,,,,...,,,,,,,,,,
1,2023-04-14 00:00:53.673,6533.5146,,,,,,,,,...,,,,,,,,,,
2,2023-04-14 00:01:29.676,6425.558,,,,,,,,,...,,,,,,,,,,
3,2023-04-14 00:01:52.684,6545.1597,,,,,,,,,...,,,,,,,,,,
4,2023-04-14 00:03:16.693,7480.0654,,,,,,,,,...,,,,,,,,,,


In [51]:
df = df.drop('Building Power/Total Active Power', axis=1)
# 'ffill' means "forward fill." Tte missing values will be filled with the most recent non-null value in the same column.
df.fillna(method='ffill', inplace=True)
# any row containing at least one NaN value will be dropped. 
# axis=0 indicates that the operation will be applied along the rows
df.dropna(how='any', axis=0, inplace=True)

# convert the dates to datetime to be able to acess it
timestamp = pd.to_datetime(df['time'])
# The day of the week with Monday=0, Sunday=6.
df['DayofWeek'] = timestamp.dt.dayofweek
# The ordinal day of the year
df['DayofYear'] = timestamp.dt.dayofyear

In [52]:
df.shape

(2779034, 37)

In [53]:
df.head()

Unnamed: 0,time,HVAC Power/Total Active Power,Breakroom VAV1_02/Current Cooling Setpoint,Breakroom VAV1_02/Current Heating Setpoint,Collab Lounge VAV1_01/Current Cooling Setpoint,Collab Lounge VAV1_01/Current Heating Setpoint,Conference VAV1_04/Current Cooling Setpoint,Conference VAV1_04/Current Heating Setpoint,GE Additive Manufacturing VAV2_01/Current Cooling Setpoint,GE Additive Manufacturing VAV2_01/Current Heating Setpoint,...,Open Office VAV1_05/Current Heating Setpoint,Training Room Nine VAV1_08/Current Cooling Setpoint,Training Room Nine VAV1_08/Current Heating Setpoint,Training Room Twelve VAV1_09/Current Cooling Setpoint,Training Room Twelve VAV1_09/Current Heating Setpoint,Weather Station/Outside Humidity,Weather Station/Outside Temp,Weather Station/Solar Radiation,DayofWeek,DayofYear
8382,2023-04-14 08:35:44.056,15497.989,74.894394,72.894394,74.212616,72.212616,73.106316,71.106316,77.0,75.0,...,72.73064,72.43602,70.43602,72.47923,70.47923,55.0,62.1,91.0,4,104
8383,2023-04-14 08:35:44.224,15342.122,74.894394,72.894394,74.212616,72.212616,73.106316,71.106316,77.0,75.0,...,72.73064,72.43602,70.43602,72.47923,70.47923,55.0,62.1,91.0,4,104
8384,2023-04-14 08:35:45.223,14988.585,74.894394,72.894394,74.212616,72.212616,73.106316,71.106316,77.0,75.0,...,72.73064,72.43602,70.43602,72.47923,70.47923,55.0,62.1,91.0,4,104
8385,2023-04-14 08:35:45.956,14988.585,74.894394,72.894394,74.212616,72.212616,73.106316,71.106316,77.0,75.0,...,72.73064,72.43602,70.43602,72.47923,70.47923,55.0,62.1,91.0,4,104
8386,2023-04-14 08:35:46.224,15189.883,74.894394,72.894394,74.212616,72.212616,73.106316,71.106316,77.0,75.0,...,72.73064,72.43602,70.43602,72.47923,70.47923,55.0,62.1,91.0,4,104


In [54]:
# randomly get 10% of the data for the model
dataset = df.sample(frac=0.1)

In [55]:
dataset.shape

(277903, 37)

In [56]:
dataset

Unnamed: 0,time,HVAC Power/Total Active Power,Breakroom VAV1_02/Current Cooling Setpoint,Breakroom VAV1_02/Current Heating Setpoint,Collab Lounge VAV1_01/Current Cooling Setpoint,Collab Lounge VAV1_01/Current Heating Setpoint,Conference VAV1_04/Current Cooling Setpoint,Conference VAV1_04/Current Heating Setpoint,GE Additive Manufacturing VAV2_01/Current Cooling Setpoint,GE Additive Manufacturing VAV2_01/Current Heating Setpoint,...,Open Office VAV1_05/Current Heating Setpoint,Training Room Nine VAV1_08/Current Cooling Setpoint,Training Room Nine VAV1_08/Current Heating Setpoint,Training Room Twelve VAV1_09/Current Cooling Setpoint,Training Room Twelve VAV1_09/Current Heating Setpoint,Weather Station/Outside Humidity,Weather Station/Outside Temp,Weather Station/Solar Radiation,DayofWeek,DayofYear
181991,2023-04-18 17:22:18.813,12450.646,74.893290,72.893290,74.217200,72.217200,73.106316,71.106316,77.0,75.0,...,72.725540,72.436020,70.436020,72.475845,70.475845,31.0,61.7,492.0,1,108
1868216,2023-06-07 07:35:34.529,60986.984,74.902160,72.902160,74.397440,72.397440,71.786990,69.786990,80.0,60.0,...,72.738770,74.833840,72.833840,72.474720,70.474720,75.0,64.4,37.0,2,158
2779103,2023-06-30 15:51:19.293,103738.484,74.906586,72.906586,74.403114,72.403114,71.790330,69.790330,77.0,75.0,...,70.687850,74.843960,72.843960,73.782234,71.782234,54.0,89.5,237.0,4,181
2721323,2023-06-29 12:22:15.911,59113.780,74.908806,72.908806,74.403114,72.403114,71.792560,69.792560,77.0,75.0,...,70.690094,74.844970,72.844970,73.783295,71.783295,62.0,82.0,824.0,3,180
1597252,2023-05-31 07:31:41.397,96642.640,74.903270,72.903270,74.399710,72.399710,71.786990,69.786990,80.0,60.0,...,72.742840,74.838900,72.838900,72.481500,70.481500,70.0,71.4,132.0,2,151
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
903027,2023-05-08 10:22:03.072,14383.379,74.897720,72.897720,74.220634,72.220634,71.783640,69.783640,77.0,75.0,...,72.736740,73.552620,71.552620,72.474720,70.474720,84.0,64.0,79.0,0,128
868245,2023-05-05 15:53:38.064,56133.047,74.894394,72.894394,74.219490,72.219490,71.778060,69.778060,77.0,75.0,...,72.730640,73.548320,71.548320,72.471330,70.471330,29.0,75.3,737.0,4,125
1635857,2023-05-31 14:58:29.898,105106.330,74.903270,72.903270,74.399710,72.399710,71.786990,69.786990,77.0,75.0,...,72.742840,74.849014,72.849014,72.481500,70.481500,39.0,88.5,190.0,2,151
2098218,2023-06-13 09:53:32.530,14682.004,74.898834,72.898834,74.401980,72.401980,71.781400,69.781400,77.0,75.0,...,70.677770,74.830800,72.830800,72.470200,70.470200,58.0,63.1,338.0,1,164


In [57]:
# get rid of the time colume and convert the rest of the columes to float
dataset.index = pd.to_datetime(dataset['time'], format='%Y-%m-%d %H:%M:%S.%f')
dataset = dataset.drop('time', axis=1)

#dataset = dataset.astype(float)
dataset.iloc[:-2] = dataset.iloc[:-2].astype(float)
dataset.head()

Unnamed: 0_level_0,HVAC Power/Total Active Power,Breakroom VAV1_02/Current Cooling Setpoint,Breakroom VAV1_02/Current Heating Setpoint,Collab Lounge VAV1_01/Current Cooling Setpoint,Collab Lounge VAV1_01/Current Heating Setpoint,Conference VAV1_04/Current Cooling Setpoint,Conference VAV1_04/Current Heating Setpoint,GE Additive Manufacturing VAV2_01/Current Cooling Setpoint,GE Additive Manufacturing VAV2_01/Current Heating Setpoint,GE Additive Manufacturing VAV2_02/Current Cooling Setpoint,...,Open Office VAV1_05/Current Heating Setpoint,Training Room Nine VAV1_08/Current Cooling Setpoint,Training Room Nine VAV1_08/Current Heating Setpoint,Training Room Twelve VAV1_09/Current Cooling Setpoint,Training Room Twelve VAV1_09/Current Heating Setpoint,Weather Station/Outside Humidity,Weather Station/Outside Temp,Weather Station/Solar Radiation,DayofWeek,DayofYear
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-04-18 17:22:18.813,12450.646,74.89329,72.89329,74.2172,72.2172,73.106316,71.106316,77.0,75.0,77.0,...,72.72554,72.43602,70.43602,72.475845,70.475845,31.0,61.7,492.0,1,108
2023-06-07 07:35:34.529,60986.984,74.90216,72.90216,74.39744,72.39744,71.78699,69.78699,80.0,60.0,80.0,...,72.73877,74.83384,72.83384,72.47472,70.47472,75.0,64.4,37.0,2,158
2023-06-30 15:51:19.293,103738.484,74.906586,72.906586,74.403114,72.403114,71.79033,69.79033,77.0,75.0,77.0,...,70.68785,74.84396,72.84396,73.782234,71.782234,54.0,89.5,237.0,4,181
2023-06-29 12:22:15.911,59113.78,74.908806,72.908806,74.403114,72.403114,71.79256,69.79256,77.0,75.0,77.0,...,70.690094,74.84497,72.84497,73.783295,71.783295,62.0,82.0,824.0,3,180
2023-05-31 07:31:41.397,96642.64,74.90327,72.90327,74.39971,72.39971,71.78699,69.78699,80.0,60.0,80.0,...,72.74284,74.8389,72.8389,72.4815,70.4815,70.0,71.4,132.0,2,151


In [34]:
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

In [35]:
#  standardizing features in a dataset
scaler = preprocessing.StandardScaler()
scaler.fit(dataset)
scaled_dataset = scaler.fit_transform(dataset)

In [36]:
X = [] # test data
y = [] # prediction data

n_predict = 1 # amount of data that is predicted
n_past = 100 # amount of data used for the prediction

In [37]:
for i in range(n_past, len(dataset) - n_predict + 1):
    X.append(scaled_dataset[i - n_past:i, 0:dataset.shape[1]])
    y.append(scaled_dataset[i + n_predict - 1:i + n_predict, 0])

In [38]:
# turn X and y into arrays
X, y = np.array(X), np.array(y)

In [39]:
len(X), len(y)

(277803, 277803)

In [40]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import LSTM
from keras.optimizers.schedules import InverseTimeDecay
from keras.optimizers import Adam

In [41]:
# gradient clipping
initial_learning_rate = 0.001
decay_steps = 1000
decay_rate = 0.95
learning_rate_fn = InverseTimeDecay(initial_learning_rate, decay_steps, decay_rate)

# create optimizer
optimizer = Adam(learning_rate = learning_rate_fn, clipvalue = 0.5)

In [42]:
# define the model
model = Sequential()
model.add(LSTM(64, activation='relu', input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(LSTM(32, activation='relu', return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1]))
model.compile(optimizer=optimizer, loss='mse')

In [43]:
X.shape

(277803, 100, 36)

In [44]:
y.shape

(277803, 1)

In [45]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 100, 64)           25856     
                                                                 
 lstm_1 (LSTM)               (None, 32)                12416     
                                                                 
 dropout (Dropout)           (None, 32)                0         
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 38305 (149.63 KB)
Trainable params: 38305 (149.63 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
#fit model
#history = model.fit(X, y, epochs=10, batch_size=10, validation_split=0.1, verbose=1)
model.fit(X, y, epochs=50, batch_size=10, validation_split=0.1, verbose=1)

Epoch 1/50
 1579/25003 [>.............................] - ETA: 20:40 - loss: 1.0119

 1634/25003 [>.............................] - ETA: 20:51 - loss: 1.0141

In [None]:
# plot how the training went
plt.plot(history.history['loss'], label='Training loss')
plt.plot(history.history['val_loss'], label='Validation loss')