In [1]:
import itertools
import pandas as pd
import numpy as np

from utils.loadData import loadData
from lstm.preprocessing import getDatasets, normalizeAtOnce
from lstm.model import getModel
from lstm.visualization import show_data_simple

np.random.seed(5)

In [2]:
df = loadData()

In [3]:
features = np.array(df["Close"]).reshape(-1, 1)
normalized_data, scaler = normalizeAtOnce(features)

In [4]:
def getModelBasicRegression(past, future, neurons):
  config = {
      "past": past,  
      "future": future,  
      "neurons": neurons,
      "batch_size": 128,  
      "epochs": 20,
      "features": 1
  }

  model = getModel(config)

  return model, config

In [5]:
def gridSearch(name):
  results = np.empty((0, 9))

  # define search space
  space = {
    "past": [10, 50, 100],
    "future": [10, 50],
    "neurons": [10, 50, 100]
  }

  pastFutureCombinations = list(itertools.product(space["past"], space["future"], space["neurons"])) # index 0 - past, 1 - future

  # loop through combinations of past and future
  for combination in pastFutureCombinations:
    tempResults = np.empty((0,3))
    # build each model 5 times and evaluate mean and std rmse
    for i in range(5):
      model, config = getModelBasicRegression(combination[0], combination[1], combination[2])
      
      dataset_train, dataset_val, _, _ = getDatasets(config, normalized_data)

      history = model.fit(
        dataset_train,
        epochs=config["epochs"],
        validation_data=dataset_val,
        verbose=0
      )

      rmse = min(history.history["val_rmse"])
      mae = min(history.history["val_mae"])
      mape = min(history.history["val_mape"])
      tempResults = np.vstack((tempResults, [rmse, mae, mape]))

    # record the mean and std of the 5 iterations above for this combination of parameters. Record the combination parameters as well
    mean_std_stamp = np.array(
      [np.mean(tempResults[:,0]), np.std(tempResults[:,0]), np.mean(tempResults[:,1]), np.std(tempResults[:,1]), np.mean(tempResults[:,2]), np.std(tempResults[:,2]), combination[0], combination[1], combination[2]]
    ).reshape(-1)
    results = np.vstack((results, mean_std_stamp))

  # choose the best one
  indexBest = np.argmin(results[:, 0])
  print(f"Best parameters are: past - {results[indexBest][2]}, future - {results[indexBest][3]}, neurons - {results[indexBest][4]}")

  # Record results in a csv file
  df = pd.DataFrame(results, columns=["RMSE mean", "RMSE std", "MAE mean", "MAE std", "MAPE mean", "MAPE std", "past", "future", "neurons"])
  df.to_csv(f"resultsGridSearch/{name}.csv")
  print("Done")

gridSearch("test")

2022-04-22 14:27:00.138909: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Best parameters are: past - 0.1746838092803955, future - 0.019317508135541856, neurons - 27.718617630004882
Done
