In [None]:
import os
import time
from datetime import datetime
import multiprocessing

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sqlalchemy import create_engine
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import MeanAbsoluteError, RootMeanSquaredError
from keras.backend import clear_session
from keras.callbacks import EarlyStopping
from optuna.integration import TFKerasPruningCallback


from google.cloud import storage
from helpers import DataPreparator
import optuna


In [2]:
engine = create_engine("mysql://root:Ana.mysql.18@127.0.0.1/vitaldb_anesthesiaml")
df = pd.read_sql('SELECT * FROM vitaldb_preprocessed', con=engine)

In [3]:
df_clinical_info = pd.read_sql('SELECT * FROM vitaldb_clinical_info', con=engine)
df_ci = pd.read_sql('SELECT * FROM vitaldb_clinical_info', con=engine)

In [4]:
data_prep = DataPreparator(df,df_clinical_info,time_window_before=10,static_features=['age','sex','asa'])

In [5]:
time_window_before=10
static_features=['age','sex','asa']
caseids_less = df['caseid'].unique()[:10]
df_time_series = df.copy()
target_col = 'insp_sevo'

In [6]:
#Get the data in the required format for LSTM
X, y, caseids = data_prep.generate_sequences()

In [8]:

# Split the data into training and testing sets
X_train, X_test, y_train, y_test, train_mask, test_mask, train_ids, test_ids = data_prep.split_train_test()


In [None]:
def create_model(trial):
    # Clear the session to avoid memory issues
    clear_session()

    # Define the hyperparameters to be optimized
    batchsize = trial.suggest_int("batchsize", 64, 128, step=32)
    lstm_units = trial.suggest_int("lstm_units", 32, 64)
    dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.3)
    learning_rate = trial.suggest_categorical("learning_rate", [1e-4, 1e-3, 1e-2])
    num_lstm_layers = trial.suggest_int('num_lstm_layers', 1, 2)

    # Define the model architecture using the suggested hyperparameters
    # LSTM layers with dropout
    # The input shape is (timesteps, features), where timesteps is the length of the sequence and features is the number of features
    model = Sequential()
    model.add(LSTM(lstm_units, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=(num_lstm_layers > 1)))
    model.add(Dropout(dropout_rate))

    for _ in range(num_lstm_layers - 1):
        model.add(LSTM(lstm_units, return_sequences=False))
        model.add(Dropout(dropout_rate))

    model.add(Dense(1))  # Output layer

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse', metrics=['mae', RootMeanSquaredError(), 'mape'])

    # Define callbacks for early stopping and pruning
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    pruning_callback = TFKerasPruningCallback(trial, monitor='val_loss')

    # Train the model
    history = model.fit(
        X_train, y_train,
        validation_split=0.2,
        epochs=30,
        batch_size=batchsize,
        callbacks=[early_stopping, pruning_callback],
        verbose=1
    )

    # Return the validation loss for optimization
    val_loss = history.history['val_loss'][-1]
    return val_loss

In [None]:
# Avaliação do modelo
y_pred = model.predict(X_test)


In [None]:
from sklearn.metrics import mean_absolute_error,  mean_absolute_percentage_error, r2_score, accuracy_score

# Calculate MAE
mae_value = mean_absolute_error(y_test, y_pred)
print(f"Test MAE: {mae_value:.4f}")

# Calculate MAPE using sklearn
mape_value = mean_absolute_percentage_error(y_test, y_pred)
print(f"Test MAPE (sklearn): {mape_value:.4f}")

# Calculate R-squared
r2_value = r2_score(y_test, y_pred)
print(f"Test R-squared: {r2_value:.4f}")

In [None]:
#print(f"Test MAPE (LSTM): {mape(y_test, y_pred):.1f}%")
print()

# Plotting
for caseid in test_ids[:5]:
    case_mask = (caseids[test_mask] == caseid)
    case_len = np.sum(case_mask)
    if case_len == 0:
        continue

    print('CaseID {}, LSTM R^2={}'.format(caseid, 
      round(r2_score(y_test[case_mask], y_pred[case_mask]),3
      )))

    t = np.arange(0, case_len)
    plt.figure(figsize=(20, 5))
    plt.plot(t, y_test[case_mask], label='Real value of inspired sevo')  # Ground truth
    plt.plot(t, y_pred[case_mask], label='Predicted value of inspired sevo')  # LSTM model
    plt.legend()
    plt.xlim([0, case_len])
    #plt.ylim([0, 1])
    plt.show()