In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import TimeSeriesSplit

import tensorflow as tf
from tensorflow.keras.layers import Input, LSTM, Dense

%load_ext tensorboard
from tensorboard.plugins.hparams import api as hp

## Data

In [2]:
df = pd.read_csv('../data/energy_data_cleaned.csv')
df = df.drop(['timestamp', 'year_built'], axis=1)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1048346 entries, 0 to 1048345
Data columns (total 22 columns):
 #   Column                                     Non-Null Count    Dtype  
---  ------                                     --------------    -----  
 0   square_feet                                1048346 non-null  float64
 1   floor_count                                1048346 non-null  float64
 2   air_temperature                            1048346 non-null  float64
 3   m2_per_floor                               1048346 non-null  float64
 4   target_lag_1                               1048346 non-null  float64
 5   target_lag_6                               1048346 non-null  float64
 6   target_lag_12                              1048346 non-null  float64
 7   target_lag_72                              1048346 non-null  float64
 8   target_lag_120                             1048346 non-null  float64
 9   target_lag_168                             1048346 non-null  float64

## Preprocessing

In [3]:
# 3. prepare features dataframe (X) and target (y) as data series
X = df.drop('target', axis=1)
y = df['target']

In [4]:
# 85% training, 15% test.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

In [5]:
# taking 15% of the training data for validation
num_validation_samples = int(0.15 * X_train.shape[0])  # Convert to Python int
X_val = X_train.iloc[:num_validation_samples]
y_val = y_train.iloc[:num_validation_samples]

X_train = X_train.iloc[num_validation_samples:]
y_train = y_train.iloc[num_validation_samples:]

In [6]:
print('70% training:', X_train.shape)
print('15% validation: ', X_val.shape)
print('15% test: ',X_test.shape)

70% training: (757430, 21)
15% validation:  (133664, 21)
15% test:  (157252, 21)


In [7]:
# Standardizing the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

y_train_scaled = scaler.fit_transform(y_train.values.reshape(-1, 1))
y_val_scaled = scaler.transform(y_val.values.reshape(-1, 1))
y_test_scaled = scaler.transform(y_test.values.reshape(-1, 1))

## Model

### Defining hyperparameters

In [8]:
HP_NUM_UNITS = hp.HParam('units', hp.Discrete([32, 64, 128]))
HP_REC_DROPOUT = hp.HParam('recurrent_dropout', hp.Discrete([0.0, 0.2]))

METRIC_MAE = 'MAE'
METRIC_MSE = 'MSE'
METRIC_RMSE = 'RMSE'
METRIC_R2 = 'R2'

with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
  hp.hparams_config(
    hparams=[HP_NUM_UNITS, HP_REC_DROPOUT],
    metrics=[hp.Metric(METRIC_MAE, display_name='MAE'),
             hp.Metric(METRIC_MSE, display_name='MSE'), 
             hp.Metric(METRIC_RMSE, display_name='RMSE'),
             hp.Metric(METRIC_R2, display_name='R2')]
  )

In [9]:
def r2_metric(y_true, y_pred):
    ss_res = tf.reduce_sum(tf.square(y_true - y_pred))
    ss_tot = tf.reduce_sum(tf.square(y_true - tf.reduce_mean(y_true)))
    r2 = 1 - ss_res / (ss_tot + tf.keras.backend.epsilon())
    return r2

In [10]:
def train_test_model(log_dir, hparams, file_writer_cm=None):
  model = tf.keras.models.Sequential([
    Input(shape=((X_train_scaled.shape[1], 1))),
    LSTM(units=hparams[HP_NUM_UNITS], return_sequences=False, recurrent_dropout=hparams[HP_REC_DROPOUT]),
    Dense(units=1)
  ])
  
  model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae', 'mse', r2_metric])
  
  tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir)
  model.fit(X_train_scaled, 
            y_train_scaled, 
            epochs=20, 
            batch_size=256, 
            validation_data=(X_val_scaled, y_val_scaled),
            callbacks=[tensorboard_callback],
            verbose=2
  )

  # Make predictions
  y_pred_scaled = model.predict(X_test_scaled)
  y_pred = scaler.inverse_transform(y_pred_scaled)
  y_test_orig = scaler.inverse_transform(y_test_scaled)

  # Test performance metrics
  mae = mean_absolute_error(y_test_orig, y_pred)
  mse = mean_squared_error(y_test_orig, y_pred)
  rmse = np.sqrt(mse)
  r2 = r2_score(y_test_orig, y_pred)

  print(f'\nModel with hyperparameters:\nNum units: {hparams[HP_NUM_UNITS]}, rec. dropout: {hparams[HP_REC_DROPOUT]}\n   Test performance metrics:\n')
  print(f"    MAE: {mae:.4f}, MSE: {mse:.4f}, RMSE: {rmse:.4f}, R2: {r2:.4f}\n")
  return mae, mse, rmse, r2


## Training the model with the different hyperparameters

In [11]:
# Creating a function to log the resuls
def run(log_dir, hparams):
    with tf.summary.create_file_writer(log_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        #file_writer_cm = tf.summary.create_file_writer(log_dir + '/cm')
        mae, mse, rmse, r2 = train_test_model(log_dir, hparams)
        # register test performance metrics
        tf.summary.scalar(METRIC_MAE, mae, step=1)
        tf.summary.scalar(METRIC_MSE, mse, step=1)
        tf.summary.scalar(METRIC_RMSE, rmse, step=1)
        tf.summary.scalar(METRIC_R2, r2, step=1)

In [12]:
# Performing a grid search on the hyperparameters we need to test
session_num = 0

for units in HP_NUM_UNITS.domain.values:
    for dropout in HP_REC_DROPOUT.domain.values:
        hparams = {
            HP_NUM_UNITS: units,
            HP_REC_DROPOUT: dropout
        }
        run_name = "run-%d" % session_num
        print('--- Starting trial: %s' % run_name)
        print({h.name: hparams[h] for h in hparams})
        
        run('logs/hparam_tuning/' + run_name, hparams)

        session_num += 1

--- Starting trial: run-0
{'units': 32, 'recurrent_dropout': 0.0}
Epoch 1/20


2025-04-19 18:44:05.628655: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2025-04-19 18:44:05.629859: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2025-04-19 18:44:05.630584: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

2959/2959 - 37s - loss: 0.1494 - mae: 0.2632 - mse: 0.1494 - r2_metric: 0.8497 - val_loss: 0.1043 - val_mae: 0.2205 - val_mse: 0.1043 - val_r2_metric: 0.8945 - 37s/epoch - 12ms/step
Epoch 2/20
2959/2959 - 35s - loss: 0.0987 - mae: 0.2136 - mse: 0.0987 - r2_metric: 0.9006 - val_loss: 0.1000 - val_mae: 0.2128 - val_mse: 0.1000 - val_r2_metric: 0.8988 - 35s/epoch - 12ms/step
Epoch 3/20
2959/2959 - 34s - loss: 0.0928 - mae: 0.2062 - mse: 0.0928 - r2_metric: 0.9065 - val_loss: 0.0901 - val_mae: 0.2019 - val_mse: 0.0901 - val_r2_metric: 0.9088 - 34s/epoch - 12ms/step
Epoch 4/20
2959/2959 - 34s - loss: 0.0870 - mae: 0.2005 - mse: 0.0870 - r2_metric: 0.9124 - val_loss: 0.0871 - val_mae: 0.2029 - val_mse: 0.0871 - val_r2_metric: 0.9118 - 34s/epoch - 12ms/step
Epoch 5/20
2959/2959 - 36s - loss: 0.0746 - mae: 0.1847 - mse: 0.0746 - r2_metric: 0.9248 - val_loss: 0.0659 - val_mae: 0.1716 - val_mse: 0.0659 - val_r2_metric: 0.9333 - 36s/epoch - 12ms/step
Epoch 6/20
2959/2959 - 35s - loss: 0.0563 - ma

2025-04-19 18:55:52.635555: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2025-04-19 18:55:52.636389: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2025-04-19 18:55:52.637256: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus


Model with hyperparameters:
Num units: 32, rec. dropout: 0.0
   Test performance metrics:

    MAE: 0.1109, MSE: 0.0413, RMSE: 0.2032, R2: 0.9844

--- Starting trial: run-1
{'units': 32, 'recurrent_dropout': 0.2}
Epoch 1/20
2959/2959 - 65s - loss: 0.1886 - mae: 0.3068 - mse: 0.1886 - r2_metric: 0.8102 - val_loss: 0.1114 - val_mae: 0.2317 - val_mse: 0.1114 - val_r2_metric: 0.8872 - 65s/epoch - 22ms/step
Epoch 2/20
2959/2959 - 63s - loss: 0.1060 - mae: 0.2285 - mse: 0.1060 - r2_metric: 0.8933 - val_loss: 0.0789 - val_mae: 0.1899 - val_mse: 0.0789 - val_r2_metric: 0.9202 - 63s/epoch - 21ms/step
Epoch 3/20
2959/2959 - 63s - loss: 0.0738 - mae: 0.1898 - mse: 0.0738 - r2_metric: 0.9257 - val_loss: 0.0388 - val_mae: 0.1337 - val_mse: 0.0388 - val_r2_metric: 0.9607 - 63s/epoch - 21ms/step
Epoch 4/20
2959/2959 - 65s - loss: 0.0462 - mae: 0.1492 - mse: 0.0462 - r2_metric: 0.9535 - val_loss: 0.0251 - val_mae: 0.1014 - val_mse: 0.0251 - val_r2_metric: 0.9747 - 65s/epoch - 22ms/step
Epoch 5/20
295

2025-04-19 19:11:07.076493: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2025-04-19 19:11:07.076956: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2025-04-19 19:11:07.077473: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

2959/2959 - 48s - loss: 0.1348 - mae: 0.2470 - mse: 0.1348 - r2_metric: 0.8644 - val_loss: 0.1031 - val_mae: 0.2138 - val_mse: 0.1031 - val_r2_metric: 0.8957 - 48s/epoch - 16ms/step
Epoch 2/20
2959/2959 - 46s - loss: 0.0966 - mae: 0.2074 - mse: 0.0966 - r2_metric: 0.9027 - val_loss: 0.0994 - val_mae: 0.2164 - val_mse: 0.0994 - val_r2_metric: 0.8994 - 46s/epoch - 16ms/step
Epoch 3/20
2959/2959 - 47s - loss: 0.0896 - mae: 0.2002 - mse: 0.0896 - r2_metric: 0.9097 - val_loss: 0.0851 - val_mae: 0.1955 - val_mse: 0.0851 - val_r2_metric: 0.9138 - 47s/epoch - 16ms/step
Epoch 4/20
2959/2959 - 47s - loss: 0.0688 - mae: 0.1740 - mse: 0.0688 - r2_metric: 0.9308 - val_loss: 0.0502 - val_mae: 0.1481 - val_mse: 0.0502 - val_r2_metric: 0.9493 - 47s/epoch - 16ms/step
Epoch 5/20
2959/2959 - 52s - loss: 0.0352 - mae: 0.1194 - mse: 0.0352 - r2_metric: 0.9646 - val_loss: 0.0259 - val_mae: 0.0988 - val_mse: 0.0259 - val_r2_metric: 0.9738 - 52s/epoch - 17ms/step
Epoch 6/20
2959/2959 - 50s - loss: 0.0240 - ma

2025-04-19 19:26:49.089437: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2025-04-19 19:26:49.089946: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2025-04-19 19:26:49.090568: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus


Model with hyperparameters:
Num units: 64, rec. dropout: 0.0
   Test performance metrics:

    MAE: 0.1048, MSE: 0.0366, RMSE: 0.1913, R2: 0.9862

--- Starting trial: run-3
{'units': 64, 'recurrent_dropout': 0.2}
Epoch 1/20
2959/2959 - 88s - loss: 0.1585 - mae: 0.2778 - mse: 0.1585 - r2_metric: 0.8405 - val_loss: 0.1037 - val_mae: 0.2206 - val_mse: 0.1037 - val_r2_metric: 0.8949 - 88s/epoch - 30ms/step
Epoch 2/20
2959/2959 - 91s - loss: 0.0893 - mae: 0.2079 - mse: 0.0893 - r2_metric: 0.9100 - val_loss: 0.0558 - val_mae: 0.1591 - val_mse: 0.0558 - val_r2_metric: 0.9436 - 91s/epoch - 31ms/step
Epoch 3/20
2959/2959 - 92s - loss: 0.0463 - mae: 0.1448 - mse: 0.0463 - r2_metric: 0.9534 - val_loss: 0.0231 - val_mae: 0.0917 - val_mse: 0.0231 - val_r2_metric: 0.9766 - 92s/epoch - 31ms/step
Epoch 4/20
2959/2959 - 87s - loss: 0.0261 - mae: 0.1027 - mse: 0.0261 - r2_metric: 0.9737 - val_loss: 0.0185 - val_mae: 0.0802 - val_mse: 0.0185 - val_r2_metric: 0.9813 - 87s/epoch - 30ms/step
Epoch 5/20
295

2025-04-19 19:57:27.700761: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2025-04-19 19:57:27.701267: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2025-04-19 19:57:27.701738: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

2959/2959 - 98s - loss: 0.1303 - mae: 0.2469 - mse: 0.1303 - r2_metric: 0.8689 - val_loss: 0.0948 - val_mae: 0.2111 - val_mse: 0.0948 - val_r2_metric: 0.9040 - 98s/epoch - 33ms/step
Epoch 2/20
2959/2959 - 97s - loss: 0.0711 - mae: 0.1813 - mse: 0.0711 - r2_metric: 0.9283 - val_loss: 0.0443 - val_mae: 0.1413 - val_mse: 0.0443 - val_r2_metric: 0.9551 - 97s/epoch - 33ms/step
Epoch 3/20
2959/2959 - 98s - loss: 0.0356 - mae: 0.1207 - mse: 0.0356 - r2_metric: 0.9641 - val_loss: 0.0391 - val_mae: 0.1346 - val_mse: 0.0391 - val_r2_metric: 0.9606 - 98s/epoch - 33ms/step
Epoch 4/20
2959/2959 - 97s - loss: 0.0260 - mae: 0.0986 - mse: 0.0260 - r2_metric: 0.9738 - val_loss: 0.0265 - val_mae: 0.0994 - val_mse: 0.0265 - val_r2_metric: 0.9732 - 97s/epoch - 33ms/step
Epoch 5/20
2959/2959 - 97s - loss: 0.0226 - mae: 0.0895 - mse: 0.0226 - r2_metric: 0.9772 - val_loss: 0.0263 - val_mae: 0.1055 - val_mse: 0.0263 - val_r2_metric: 0.9734 - 97s/epoch - 33ms/step
Epoch 6/20
2959/2959 - 108s - loss: 0.0205 - m

2025-04-19 20:34:01.913513: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2025-04-19 20:34:01.914045: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2025-04-19 20:34:01.914845: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus


Model with hyperparameters:
Num units: 128, rec. dropout: 0.0
   Test performance metrics:

    MAE: 0.0958, MSE: 0.0321, RMSE: 0.1791, R2: 0.9879

--- Starting trial: run-5
{'units': 128, 'recurrent_dropout': 0.2}
Epoch 1/20
2959/2959 - 198s - loss: 0.1488 - mae: 0.2695 - mse: 0.1488 - r2_metric: 0.8501 - val_loss: 0.0955 - val_mae: 0.2118 - val_mse: 0.0955 - val_r2_metric: 0.9033 - 198s/epoch - 67ms/step
Epoch 2/20
2959/2959 - 196s - loss: 0.0809 - mae: 0.1969 - mse: 0.0809 - r2_metric: 0.9185 - val_loss: 0.0336 - val_mae: 0.1183 - val_mse: 0.0336 - val_r2_metric: 0.9661 - 196s/epoch - 66ms/step
Epoch 3/20
2959/2959 - 201s - loss: 0.0327 - mae: 0.1175 - mse: 0.0327 - r2_metric: 0.9671 - val_loss: 0.0215 - val_mae: 0.0895 - val_mse: 0.0215 - val_r2_metric: 0.9783 - 201s/epoch - 68ms/step
Epoch 4/20
2959/2959 - 197s - loss: 0.0232 - mae: 0.0945 - mse: 0.0232 - r2_metric: 0.9767 - val_loss: 0.0176 - val_mae: 0.0755 - val_mse: 0.0176 - val_r2_metric: 0.9822 - 197s/epoch - 67ms/step
Epoc

## Visualizing the hyperparameter results with Tensorboard

In [13]:
# Loading the Tensorboard extension
%load_ext tensorboard
%tensorboard --logdir "./logs/hparam_tuning/"

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
