In [1]:
import glob
import warnings
from collections import Counter

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow
import tensorflow as tf
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.layers import (
    LSTM,
    BatchNormalization,
    Dense,
    Dropout,
    Input,
    RepeatVector,
    TimeDistributed,
)
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

warnings.filterwarnings("ignore")

In [2]:
def data(base):
    df = pd.read_csv(
        "/Users/hn/OneDrive/Doctorado/Tesis/Proyecto Tesis/Codigos/Yahoo/Dataset/A1Benchmark/"
        + base
    )
    values = df.iloc[:, 1:2]
    target = df["is_anomaly"]
    test = int(len(df) * 0.5)
    x_train = values[:test]
    x_test = values[test : len(df)]
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data_p = scaler.fit_transform(x_train)
    scaled_test = scaler.fit_transform(x_test)

    x_train_p, y_train_p = [], []
    for i in range(8, len(scaled_data_p)):
        x_train_p.append(scaled_data_p[i - 8 : i])
        y_train_p.append(scaled_data_p[i])
    x_train_p, y_train_p = np.array(x_train_p), np.array(y_train_p)

    x_test_p, y_test_p = [], []
    for i in range(8, len(scaled_test)):
        x_test_p.append(scaled_test[i - 8 : i])
        y_test_p.append(scaled_test[i])
    x_test_p, y_test_p = np.array(x_test_p), np.array(y_test_p)

    return x_train_p, y_train_p, x_test_p, y_test_p

In [3]:
def create_model(neurons, dropout, rec_drop, learning_rate=0.01):
    model = tf.keras.Sequential()
    model.add(
        tf.keras.layers.LSTM(neurons * 4, input_shape=(8, 1), return_sequences=True)
    )
    model.add(tf.keras.layers.ReLU())
    model.add(
        tf.keras.layers.LSTM(
            neurons * 2, recurrent_dropout=rec_drop, return_sequences=True
        )
    )
    model.add(tf.keras.layers.ReLU())
    model.add(tf.keras.layers.Dropout(dropout))
    model.add(
        tf.keras.layers.LSTM(neurons, recurrent_dropout=rec_drop, return_sequences=True)
    )
    model.add(tf.keras.layers.Dropout(dropout))
    model.add(tf.keras.layers.Dense(1))

    model.compile(
        optimizer=tf.optimizers.Adam(learning_rate=learning_rate),
        loss=tf.losses.MeanSquaredError(),
    )
    return model

In [4]:
x_train_p, y_train_p, x_test_p, y_test_p = data("real_65.csv")

In [None]:
model = KerasRegressor(build_fn=create_model, epochs=150)

callback = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss", patience=20, restore_best_weights=True
)

batch_size = [80, 100]
dropout = [0.15,0.2]
rec_drop = [0.15, 0.2]
neurons = [16, 32]
learning_rate = [0.01, 0.001]
param_grid = dict(
    neurons=neurons,
    dropout=dropout,
    rec_drop=rec_drop,
    learning_rate=learning_rate,
    batch_size=batch_size,
)

grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=4)


grid_result = grid.fit(x_train_p, y_train_p, validation_split=0.2, callbacks=[callback])

In [6]:
grid_result.best_score_

-0.023526855278760195

In [7]:
grid_result.best_params_

{'batch_size': 100,
 'dropout': 0.15,
 'learning_rate': 0.01,
 'neurons': 16,
 'rec_drop': 0.15}

In [8]:
resul = pd.DataFrame(grid_result.cv_results_)

In [9]:
#resul.to_excel("/Users/hn/OneDrive/Doctorado/Tesis/Proyecto Tesis/Codigos/Yahoo/Dataset/lstm_opt_65.xlsx")