In [88]:
import os
os.chdir('C:/Users/WulfN/Python Projects/time_series_model_comparison')

# Functions
# %run data_prep_fns/general_scale.py

# Multiple Outputs per cell
%config interactive_shell.ast_node_interactivity='all'

In [21]:
# Read in test datasets
import pickle

with open('datasets/energy_dt.pickle', 'rb') as file:
    energy_dt = pickle.load(file)  

with open('datasets/glob_pop.pickle', 'rb') as file_2:
    glob_pop_dt = pickle.load(file_2)

In [14]:
energy_dt.columns

Index(['T1', 'RH_1', 'T2', 'RH_2', 'T3', 'RH_3', 'T4', 'RH_4', 'T5', 'RH_5',
       'T6', 'RH_6', 'T7', 'RH_7', 'T8', 'RH_8', 'T9', 'RH_9', 'T_out',
       'Press_mm_hg', 'RH_out', 'Windspeed', 'Visibility', 'Tdewpoint', 'rv1',
       'rv2', 'total_Wh'],
      dtype='object')

### Testing / Validation Split

In [81]:

# from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Split datasets into testing and validation

# Energy data
x_energy = energy_dt.drop(columns = ['total_Wh'])
x_energy_sc = MinMaxScaler().fit_transform(x_energy)
y_energy = energy_dt['total_Wh']
# y_energy_sc = MinMaxScaler().transform(y_energy) # transform so as not to introduce bias from training set from fit() function the testing set
### Test how scaling or NOT scaling y affects the model ###

x_train_1, x_test_1, y_train_1, y_test_1 = train_test_split(x_energy_sc, y_energy, test_size = .2, shuffle = False)

# Global Population data
x_glob_pop = glob_pop_dt.drop(columns = ['Population'])
x_glob_pop_sc = MinMaxScaler().fit_transform(x_glob_pop)
y_glob_pop = glob_pop_dt['Population']
# y_glob_pop_sc = MinMaxScaler().transform(y_glob_pop)

x_train_2, x_test_2, y_train_2, y_test_2 = train_test_split(x_glob_pop_sc, y_glob_pop, test_size = .2, shuffle = False)


In [137]:
from tensorflow.keras.preprocessing import timeseries_dataset_from_array
window_size = 2 # If hour context, then 6, if day, then 144 (perhaps try 72 has half day, or 24 as 1/6th of day)
# (2, 6, 24, 72, 144)
### Try lengths of different size: see what DFs of different lengths look like
ts_train_windows = timeseries_dataset_from_array(x_train_1, x_train_1, sequence_length=window_size)  # length = 2, 3, 4
# ts_train_windows_1 = timeseries_dataset_from_array(x_train_1, y_train_1, sequence_length=window_size)

ts_test_windows = timeseries_dataset_from_array(x_test_1, x_test_1, sequence_length=window_size)

In [141]:
### Building familiarity with keras and LSTM params

import tensorflow as tf
import keras
from tensorflow.keras.layers import LSTM, Dense

tf.random.set_seed(4)

callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3, 
                                            min_delta = .001,
                                            restore_best_weights=True,
                                            start_from_epoch=5) 

lstm_1 = tf.keras.Sequential() 
lstm_1.add(tf.keras.layers.LSTM(16, activation='relu', 
                                input_shape=(window_size, x_train_1.shape[1]))) 


lstm_1.add(tf.keras.layers.Dense(1)) 
lstm_1.compile(loss='mse', 
               optimizer='adam') 

lstm_1.fit(x=ts_train_windows, 
           shuffle=False,
           epochs=50,
           callbacks = [callback])


Epoch 1/50
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - loss: 0.1806
Epoch 2/50
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - loss: 0.0504
Epoch 3/50
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - loss: 0.0485
Epoch 4/50
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - loss: 0.0479
Epoch 5/50
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - loss: 0.0476
Epoch 6/50
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - loss: 0.0474
Epoch 7/50
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - loss: 0.0472
Epoch 8/50
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - loss: 0.0471
Epoch 9/50
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - loss: 0.0470


<keras.src.callbacks.history.History at 0x22509c7dc40>

In [None]:
# Try 1 with generateTrain set

import tensorflow as tf
import keras
import keras_tuner as kt
from tensorflow.keras.layers import LSTM, Dense

tf.random.set_seed(4)

# https://keras.io/guides/keras_tuner/getting_started/

callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3, 
                                            min_delta = .001,
                                            restore_best_weights=True,
                                            start_from_epoch=5) 
def lstm_model_build(hp):

    model_type = hp.Choice("model_type", ["base", "base_dropout"]) # "stacked", "stacked_dropout"

    # Perhaps include data window size variations too

    if model_type == 'base':
        lstm_1 = tf.keras.Sequential() 
        lstm_1.add(tf.keras.layers.LSTM(
            hp.Choice('units', [16, 26]), # , 32, 52, 64, 128
            hp.Choice('activation', values=['relu', 'sigmoid']),
            input_shape=(window_size, x_train_1.shape[1]))) 
        lstm_1.add(tf.keras.layers.Dense(1))

    elif model_type == 'base_dropout':
        lstm_1 = tf.keras.Sequential() 
        lstm_1.add(tf.keras.layers.LSTM(
            hp.Choice('units', [16, 26]), # , 32, 52, 64, 128
            hp.Choice('activation', values=['relu', 'sigmoid']), 
            input_shape=(window_size, x_train_1.shape[1]))) 
        lstm_1.add(tf.keras.layers.Dropout(.2)) 
        lstm_1.add(tf.keras.layers.Dense(1))

        lstm_1.compile(loss='mse',
                    optimizer='adam') # Another param: metrics=[keras.metrics.RootMeanSquaredError()]
        # Learning rate? 

    # Currently, only 1 epoch is run, need callback within this function in order to ensure early stopping occurs    
    return lstm_1

tuner = kt.GridSearch( # what is Hyperband
    hypermodel=lstm_model_build,
    objective=kt.Objective('loss', direction = 'min'),
    # max_trials=3, 
    seed=89,
    directory='keras_tuner_dir', 
    project_name='lstm_1',
    overwrite=True
)

# Perform hyperparameter search
tuner.search(
    x=ts_train_windows, 
    # should require a y set as well, right? 
    callbacks=[keras.callbacks.EarlyStopping(patience=3)]
)

# return lstm_1


In [199]:
dir()

AttributeError: module 'keras_tuner' has no attribute 'Gridsearch'

In [198]:
tuner.results_summary()


# score model, record loss
# models = []

# for model in best_model:
#     loss = model.evaluate(ts_test_windows)
#     print("Loss:", loss)
#best_model.

Results summary
Results in keras_tuner_dir\lstm_1
Showing 10 best trials
Objective(name="loss", direction="min")

Trial 0002 summary
Hyperparameters:
model_type: base_dropout
units: 16
Score: 0.06586642563343048

Trial 0003 summary
Hyperparameters:
model_type: base_dropout
units: 26
Score: 0.08675344288349152

Trial 0000 summary
Hyperparameters:
model_type: base
units: 16
Traceback (most recent call last):
  File "c:\Users\WulfN\Python Projects\virtual_env\Lib\site-packages\keras_tuner\src\engine\base_tuner.py", line 274, in _try_run_and_update_trial
    self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "c:\Users\WulfN\Python Projects\virtual_env\Lib\site-packages\keras_tuner\src\engine\base_tuner.py", line 239, in _run_and_update_trial
    results = self.run_trial(trial, *fit_args, **fit_kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\WulfN\Python Projects\virtual_env\Lib\site-packages\keras_tuner\src\engine\tuner.py", line 314, in

In [177]:
grid_num = 4

best_hp = tuner.get_best_hyperparameters(grid_num)

for i in range(0,grid_num):
    print(best_hp[i].values)

{'model_type': 'base_dropout', 'units': 26}
{'model_type': 'base_dropout', 'units': 16}
{'model_type': 'base', 'units': 16}
{'model_type': 'base', 'units': 26}


In [None]:
# compare predictions to actuals
import matplotlib.pyplot as plt

plt.plot(lstm_1.predict(generateTest), label = 'Predictions') # these need to be rescaled
plt.plot(y_test_1, label = 'Actuals')   
plt.legend()
plt.show()


In [None]:
# compare predictions to actuals
import matplotlib.pyplot as plt

plt.plot(lstm_1.predict(generateTest), label = 'Predictions') # these need to be rescaled
plt.plot(y_test_1, label = 'Actuals')   
plt.legend()
plt.show()
