In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

pd.options.mode.chained_assignment = None
import warnings
from collections import Counter

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import sklearn
from keras.wrappers.scikit_learn import KerasRegressor
from matplotlib.pylab import rcParams
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import (
    LSTM,
    Bidirectional,
    Dense,
    Dropout,
    RepeatVector,
    TimeDistributed,
)
from tensorflow.keras.models import Sequential

%matplotlib inline

warnings.filterwarnings("ignore")


sns.set(style="whitegrid", palette="muted")
rcParams["figure.figsize"] = 12, 6
np.random.seed(1)
tf.random.set_seed(1)

In [2]:
def data(base, seq_size):
    df = pd.read_csv(
        "/Users/hn/OneDrive/Doctorado/Tesis/Proyecto Tesis/Codigos/Yahoo/Dataset/A1Benchmark/"
        + base
    )
    values = df.iloc[:, 1:2]
    target = df["is_anomaly"]
    test = int(len(df) * 0.5)
    x_train = values[:test]
    x_test = values[test : len(df)]
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data_p = scaler.fit_transform(x_train)
    scaled_test = scaler.fit_transform(x_test)

    x_train_p = []
    for i in range(len(scaled_data_p) - seq_size):
        x_train_p.append(scaled_data_p[i : (i + seq_size)])
    x_train_p = np.array(x_train_p)

    x_test_p = []
    for i in range(round(len(scaled_test) / seq_size)):
        x_test_p.append(scaled_test[i * seq_size : ((1 + i) * seq_size)])
    x_test_p = np.array(x_test_p)

    return x_train_p, x_test_p

In [3]:
def create_model(neurons, dropout, rec_drop, learning_rate=0.01):
    model = tf.keras.Sequential()
    model.add(
        tf.keras.layers.LSTM(
            neurons * 4,
            input_shape=(x_train_p.shape[1], x_train_p.shape[2]),
            return_sequences=True,
        )
    )
    model.add(tf.keras.layers.ReLU())
    model.add(
        tf.keras.layers.LSTM(
            neurons * 2, recurrent_dropout=rec_drop, return_sequences=True
        )
    )
    model.add(tf.keras.layers.ReLU())
    model.add(tf.keras.layers.Dropout(dropout))
    
    model.add(
        tf.keras.layers.LSTM(
            neurons, recurrent_dropout=rec_drop, return_sequences=False
        )
    )
    model.add(tf.keras.layers.ReLU())
    model.add(tf.keras.layers.Dropout(dropout))
    model.add(tf.keras.layers.RepeatVector(x_train_p.shape[1]))
    model.add(
        tf.keras.layers.LSTM(neurons, recurrent_dropout=rec_drop, return_sequences=True)
    )
    model.add(tf.keras.layers.ReLU())
    model.add(tf.keras.layers.Dropout(dropout))
    
    model.add(
        tf.keras.layers.LSTM(
            neurons * 2, recurrent_dropout=rec_drop, return_sequences=True
        )
    )
    model.add(tf.keras.layers.ReLU())
    model.add(tf.keras.layers.Dropout(dropout))
    model.add(tf.keras.layers.LSTM(neurons * 4, return_sequences=True))
    model.add(tf.keras.layers.ReLU())
    model.add(TimeDistributed(Dense(x_train_p.shape[2])))

    model.compile(
        optimizer=tf.optimizers.Adam(learning_rate=learning_rate),
        loss=tf.losses.MeanSquaredError(),
    )
    return model

In [4]:
(x_train_p, x_test_p) = data("real_65.csv", 8)

In [5]:
model = KerasRegressor(build_fn=create_model, epochs=150)

callback = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss", min_delta=0.0001, patience=5
)

batch_size = [80, 100]
dropout = [0.15,0.2]
rec_drop = [0.15, 0.2]
neurons = [16, 32]
learning_rate = [0.01, 0.001]
param_grid = dict(
    neurons=neurons,
    dropout=dropout,
    rec_drop=rec_drop,
    learning_rate=learning_rate,
    batch_size=batch_size,
)

grid = GridSearchCV(
    estimator=model, param_grid=param_grid, n_jobs=-1, cv=4, return_train_score=True
)


grid_result = grid.fit(x_train_p, x_train_p, validation_split=0.2, callbacks=[callback])

Epoch 1/150
Epoch 1/150
Epoch 1/150
Epoch 1/150
Epoch 1/150
Epoch 1/150
Epoch 1/150
Epoch 1/150
Epoch 2/150
Epoch 2/150
Epoch 2/150
Epoch 2/150
Epoch 2/150
Epoch 2/150
Epoch 3/150
Epoch 3/150
Epoch 3/150
Epoch 3/150
Epoch 3/150
Epoch 3/150
Epoch 2/150
Epoch 2/150
Epoch 4/150
Epoch 4/150
Epoch 4/150
Epoch 4/150
Epoch 4/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 3/150
Epoch 5/150
1/6 [====>.........................] - ETA: 0s - loss: 0.0505Epoch 4/150
Epoch 5/150
Epoch 5/150
Epoch 5/150
Epoch 6/150
Epoch 6/150
Epoch 4/150
Epoch 6/150
Epoch 5/150
Epoch 6/150
Epoch 6/150
Epoch 6/150
Epoch 7/150
Epoch 7/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 7/150
1/6 [====>.........................] - ETA: 0s - loss: 0.0518Epoch 7/150
Epoch 7/150
Epoch 8/150
Epoch 8/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 8/150
Epoch 8/150
Epoch 8/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 9/150
Epoch 9/150
Epoch 10/150
Epoch 10/150
Epoch 8/150
Epoch 10/150
Epoch 10/150
Epoch 11/150
Epoch 11/150
Epoch 9/

In [6]:
grid_result.best_score_

-0.003187329799402505

In [7]:
grid_result.best_params_

{'batch_size': 80,
 'dropout': 0.2,
 'learning_rate': 0.01,
 'neurons': 32,
 'rec_drop': 0.15}

In [8]:
resul = pd.DataFrame(grid_result.cv_results_)

In [9]:
resul.to_excel(
    "/Users/hn/OneDrive/Doctorado/Tesis/Proyecto Tesis/Codigos/Yahoo/Dataset/aut_opt_65.xlsx"
)