# Recurrent Neural Network

## Importation librairies

In [None]:
import numpy as np

from tensorflow.keras.utils import normalize, plot_model
from tensorflow.keras.layers import Input, LSTM, Dense
from tensorflow.keras import models, callbacks

import matplotlib.pyplot as plt
import time
from sklearn.metrics import confusion_matrix, roc_curve, auc
import seaborn as sns

## Data import and display

In [None]:
# Data recovery
train_data = np.loadtxt('data/ECG200_TRAIN.tsv', delimiter='\t')
test_data = np.loadtxt('data/ECG200_TEST.tsv', delimiter='\t')

# Show first time series
print("health:", train_data[0,0])
print(train_data[0,1:])

# Show data dimensions
print("Dimensions des données d'entrainnement:", train_data.shape)
print("Dimensions des données de test:", test_data.shape)

# Show first 5 time series
nb_series_a_afficher = 5
plt.figure(figsize=(27, 7))
for i in range(nb_series_a_afficher):
    serie_temporelle = train_data[i, 1:]
    etat_sante = train_data[i, 0]
    
    plage_temps = range(len(serie_temporelle))
    
    plt.subplot(2, nb_series_a_afficher, i+1)
    plt.plot(plage_temps, serie_temporelle, label='Série temporelle')
    plt.xlabel('Temps')
    plt.ylabel('Valeurs')
    plt.title(f'Série temporelle {i+1} (Santé: {"Bon" if etat_sante == -1 else "Mauvais"})')
    plt.legend()
    plt.grid(True)

plt.tight_layout()
plt.show()

## Data processing

In [None]:
# Normalizer classes
train_data[train_data[:, 0] == -1, 0] = 0
test_data[test_data[:, 0] == -1, 0] = 0

# Separation of training and testing data
X_train, y_train = train_data[:, 1:], train_data[:, 0]
X_test, y_test = test_data[:, 1:], test_data[:, 0]

# Normalize time series between 0 and 1 independently of each other
X_train = normalize(X_train, axis=1)
X_test = normalize(X_test, axis=1)

# Display first data
print("health:", y_train[0])
print(X_train[0])
print("Dimensions des données d'entrainnement:", (X_train.shape, y_train.shape))

## Choice of Hyperparameters

In [None]:
# Layers hyperparameters
timesteps = 1
nb_neurons = 8
nb_classes = 1
final_activation = 'sigmoid'

# Compil hyperparameters
optimizer_algo = 'adam'
cost_function = 'binary_crossentropy'

# Execution hyperparameters
nb_epochs = 500
mini_batch_size = 32
percentage_of_train_as_validation = 0.2



## Model compilation

In [None]:
# Input
input_shape = (96,1)
input_layer = Input(input_shape)

# Hidden block
lstm_layer = LSTM(units=nb_neurons)(input_layer)

# Output
output_layer = Dense(units=nb_classes, activation=final_activation)(lstm_layer)

model_rnn = models.Model(inputs=input_layer, outputs=output_layer)

## Model compilation

In [None]:
model_rnn.compile(loss=cost_function, optimizer='adam', metrics=['accuracy'])

model_rnn.summary()

## Model training

In [None]:
# Specify the model checkpoint (to save the best model for each epoch)
model_checkpoint = callbacks.ModelCheckpoint('best_model_RNN.keras', monitor='val_loss', save_best_only=True)

# Start training
start_training = time.time()
history = model_rnn.fit(X_train, y_train,
                    batch_size=mini_batch_size, 
                    epochs=nb_epochs,
                    validation_split=percentage_of_train_as_validation,
                    verbose=False,
                    callbacks=[model_checkpoint])
end_training = time.time()

## Model evaluation

In [None]:
# Load and evaluate best model
best_model_rnn = models.load_model('best_model_RNN.keras')

train_loss, train_accuracy = best_model_rnn.evaluate(X_train, y_train)

start_evaluate = time.time()
test_loss, test_accuracy = best_model_rnn.evaluate(X_test, y_test)
end_evaluate = time.time()


# Other calcul
y_pred = model_rnn.predict(X_test)
y_pred_classes = (y_pred > 0.5).astype(int).flatten()
conf_matrix = confusion_matrix(y_test, y_pred_classes)
class_names = ['Positif', 'Négatif']
fpr, tpr, thresholds = roc_curve(y_test, y_pred)
roc_auc = auc(fpr, tpr)

## Plot best result

In [None]:
# Result
training_time_seconds = end_training - start_training
evaluate_time_seconds = end_evaluate - start_evaluate
print(f"\nNombre total de paramètres : {best_model_rnn.count_params()}")
print(f"\nTemps d'entraînement : {training_time_seconds:.3f} secondes.")
print(f"Temps d'évaluation : {evaluate_time_seconds:.3f} secondes.")
print(f'\nMoyenne de train_accuracy_vals: {np.mean(train_accuracy) * 100:.2f}%')
print(f'Moyenne de train_loss_vals: {np.mean(train_loss) * 100:.2f}%')
print(f'\nMoyenne de test_accuracy_vals: {np.mean(test_accuracy) * 100:.2f}%')
print(f'Moyenne de test_loss_vals: {np.mean(test_loss) * 100:.2f}%')
print(f'\nAUC-ROC : {roc_auc * 100:.2f}%')

# Plot 
plt.figure(figsize=(16, 12))
    # Training and Validation Accuracy
plt.subplot(2, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.ylim(0, 1)
    # Training and Validation Loss
plt.subplot(2, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.ylim(0, 1)
    # Confusion Matrix
plt.subplot(2, 2, 3)
sns.heatmap(conf_matrix, annot=True, cmap="Blues", fmt='g', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
    # ROC Curve
plt.subplot(2, 2, 4)
plt.plot(fpr, tpr, color='blue', lw=2, label='ROC curve (AUC = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='gray', linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

## Cross-validation

In [None]:
train_data = np.loadtxt('data/ECG200_TRAIN.tsv', delimiter='\t')
test_data = np.loadtxt('data/ECG200_TEST.tsv', delimiter='\t')

test_data[test_data[:, 0] == -1, 0] = 0
X_test, y_test = test_data[:, 1:], test_data[:, 0]
X_test = normalize(X_test, axis=1)

# Hyperparameters
timesteps = 1
nb_neurons = 8
nb_classes = 1
final_activation = 'sigmoid'
optimizer_algo = 'adam'
cost_function = 'binary_crossentropy'
nb_epochs = 500
mini_batch_size = 32
percentage_of_train_as_validation = 0.2

num_splits = 5
test_accuracy_vals = []
test_loss_vals = []
train_accuracy_vals = []
train_loss_vals = []

for i in range(num_splits) :
    train_data_suffled = train_data
    np.random.shuffle(train_data_suffled)
    train_data_suffled[train_data_suffled[:, 0] == -1, 0] = 0
    X_train, y_train = train_data_suffled[:, 1:], train_data_suffled[:, 0]
    X_trai = normalize(X_train, axis=1)

    # build and compile model
    input_shape = (96,1)
    input_layer = Input(input_shape)
    lstm_layer = LSTM(units=nb_neurons)(input_layer)
    output_layer = Dense(units=nb_classes, activation=final_activation)(lstm_layer)
    model_rnn = models.Model(inputs=input_layer, outputs=output_layer)
    model_rnn.compile(loss=cost_function, optimizer='adam', metrics=['accuracy'])

    model_checkpoint = callbacks.ModelCheckpoint('best_model_RNN.keras', monitor='val_loss', save_best_only=True)

    history = model_rnn.fit(X_train, y_train,
                        batch_size=mini_batch_size, 
                        epochs=nb_epochs,
                        validation_split=percentage_of_train_as_validation,
                        verbose=False,
                        callbacks=[model_checkpoint])

    # evaluate best model
    best_model_rnn = models.load_model('best_model_RNN.keras')
    train_loss, train_accuracy = best_model_rnn.evaluate(X_train, y_train)
    test_loss, test_accuracy = best_model_rnn.evaluate(X_test, y_test)

    test_accuracy_vals.append(test_accuracy)
    test_loss_vals.append(test_loss)
    train_accuracy_vals.append(train_accuracy)
    train_loss_vals.append(train_loss)

# Display result
print("\nNombre d'essais :", num_splits)
print(f'\nMoyenne de train_accuracy_vals: {np.mean(train_accuracy_vals) * 100:.2f}%')
print(f'Moyenne de train_loss_vals: {np.mean(train_loss_vals) * 100:.2f}%')
print(f'\nMoyenne de test_accuracy_vals: {np.mean(test_accuracy_vals) * 100:.2f}%')
print(f'Moyenne de test_loss_vals: {np.mean(test_loss_vals) * 100:.2f}%')
print(f'\tLa variance associée de l\'accuracy: {np.var(test_accuracy_vals):.6f}')
print(f'\tLa variance associée de la loss: {np.var(test_loss_vals):.6f}')