In [1]:
import pandas as pd
import joblib
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import RootMeanSquaredError
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import keras_tuner as kt
from keras_tuner import Objective
import time

In [2]:
def createdf(number):
    nombres_columnas = ['unit', 'cycle', 'op_setting_1', 'op_setting_2', 'op_setting_3']
    nombres_columnas += [f'sensor_{i}' for i in range(1, 24)]
    df = pd.read_csv(rf"./CMAPSSData/train_FD00{number}.txt",sep = " ", header = None, index_col = None)
    df.columns = nombres_columnas
    df = df.iloc[:, :-2]
    return df

In [3]:
df = createdf(4)

In [4]:
max_cycle_by_unit = df.groupby('unit')['cycle'].transform('max')
df['RUL'] = max_cycle_by_unit - df['cycle']

In [5]:
X_total = df.drop(['unit', 'cycle', 'RUL'], axis = 1)
y = df.RUL

In [6]:
X_final = pd.read_csv('X_final.csv')
X_scaled = pd.read_csv('X_scaled.csv')

In [7]:
def LSTM_secuencia(df, df_valores ,ventana=30):

    max_cycle_by_unit = df.groupby('unit')['cycle'].transform('max')
    df['RUL'] = max_cycle_by_unit - df['cycle']

    sequencias = []
    etiquetas = []

    df_no_escaladas = df[['unit','cycle', 'RUL']]
    df_combinado = pd.concat([df_no_escaladas, df_valores], axis= 1)

    for motor, grupo in df_combinado.groupby('unit'):

        grupo = grupo.sort_values('cycle')
        valores = grupo[df_valores.columns].values
        rul_valores = grupo.RUL.values
        for i in range(len(grupo) - ventana + 1):
            X_ventana = valores[i:i+ventana]
            y_ventana = rul_valores[i+ventana - 1]
            sequencias.append(X_ventana)
            etiquetas.append(y_ventana)

    return np.array(sequencias), np.array(etiquetas)

In [8]:
def model_const(hp):
    model = Sequential()
    
    model.add(LSTM(
        units=hp.Int('units1', min_value=80, max_value=140, step=20),
        return_sequences=True,
        input_shape=(ventana, n_columnas)
    ))
    model.add(Dropout(hp.Float('dropout1', 0.2, 0.3, step=0.1)))

    model.add(LSTM(
        units=hp.Int('units2', min_value=20, max_value=80, step=20)
    ))

    model.add(Dense(1))

    model.compile(
        optimizer='adam',
        loss='mse',
        metrics = [RootMeanSquaredError(name='rmse')]
    )

    return model

def model_train(df, df_valores, ventana=30, max_trials=20, epochs=50, label = 'final'):
    start = time.time()
    
    X_seq, y_seq = LSTM_secuencia(df, df_valores, ventana=ventana)

    global n_columnas
    n_columnas = X_seq.shape[2]
    X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)

    tuner = kt.GridSearch(
        model_const,
        objective=Objective("val_rmse", direction="min"),
        max_trials=max_trials,
        overwrite=True,
        directory="lstm_tuning",
        project_name="CMAPSS_RUL",
    )

    early_stop = EarlyStopping(monitor='val_rmse', patience=2, restore_best_weights=True, mode='min')

    tuner.search(X_train, y_train,
                 validation_split=0.2,
                 epochs=epochs,
                 batch_size=64,
                 callbacks=[early_stop],
                 verbose=1,
)

    best_model = tuner.get_best_models(num_models=1)[0]
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

    y_pred = best_model.predict(X_test)
    rmse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    best_model.save(f"best_lstm_model_{label}_kt.h5")
    end = time.time()
    total_time = end - start
    return rmse, mae, r2, total_time, best_model, tuner, best_hps

In [9]:
global ventana
ventana = 40

In [10]:
rmse, mae, r2, total_time, best_model, tuner, best_hps = model_train(df, X_final, ventana = ventana)

Trial 16 Complete [00h 02m 04s]
val_rmse: 81.97135162353516

Best val_rmse So Far: 81.9712142944336
Total elapsed time: 00h 37m 58s
[1m  1/323[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m42s[0m 134ms/step

  saveable.load_own_variables(weights_store.get(inner_path))


[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step




In [11]:
print(rmse, mae, r2, total_time, best_model, tuner, best_hps)

6565.6220703125 65.2789306640625 -0.00022530555725097656 2280.748761177063 <Sequential name=sequential, built=True> <keras_tuner.src.tuners.gridsearch.GridSearch object at 0x000002A85D0AF210> <keras_tuner.src.engine.hyperparameters.hyperparameters.HyperParameters object at 0x000002A85F91E150>


In [12]:
rmse_scaled, mae_scaled, r2_scaled,  total_time_scaled, best_model_scaled, tuner_scaled, best_hps_scaled = model_train(df, X_scaled, ventana = ventana, label = 'scaled')

Trial 16 Complete [00h 02m 55s]
val_rmse: 81.97142791748047

Best val_rmse So Far: 81.97122955322266
Total elapsed time: 00h 47m 10s


  saveable.load_own_variables(weights_store.get(inner_path))


[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step




In [13]:
best_model.summary()

In [14]:
for param in best_hps.values:
    print(f"{param}: {best_hps.get(param)}")

units1: 80
dropout1: 0.2
units2: 80


In [15]:
best_model_scaled.summary()

In [16]:
for param in best_hps_scaled.values:
    print(f"{param}: {best_hps_scaled.get(param)}")

units1: 100
dropout1: 0.2
units2: 40


In [17]:
rmse_scaled

6565.732421875

In [18]:
rmse

6565.6220703125