# Import Data

In [None]:
import pickle
with open('y_train_encoded_binary', 'rb') as f:
    y_train_encoded = pickle.load(f)

with open('y_val_encoded_binary', 'rb') as f:
    y_val_encoded = pickle.load(f)

with open('y_test_encoded_binary', 'rb') as f:
    y_test_encoded = pickle.load(f)

with open('X_train_single', 'rb') as f:
    X_train = pickle.load(f)
with open('X_train_feat', 'rb') as f:
    X_train_feat = pickle.load(f)

with open('X_test_single', 'rb') as f:
    X_test = pickle.load(f)
with open('X_test_feat', 'rb') as f:
    X_test_feat = pickle.load(f)

with open('X_val_single', 'rb') as f:
    X_val = pickle.load(f)
with open('X_val_feat', 'rb') as f:
    X_val_feat = pickle.load(f)

## Build network input

In [None]:
import numpy as np

X_train = np.concatenate((X_train, X_train_feat), axis=1)
X_val = np.concatenate((X_val, X_val_feat), axis=1)
X_test = np.concatenate((X_test, X_test_feat), axis=1)

# Modeling

## Compute Class weights

In [None]:
from sklearn.utils import class_weight
import numpy as np
class_weights = class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(y_train_encoded.flatten()), y=y_train_encoded.flatten())
class_weights_dict = dict(enumerate(class_weights))

print(class_weights_dict)

## Utility Functions

In [None]:
from sklearn.metrics import confusion_matrix
from matplotlib import pyplot as plt
import seaborn as sns

def plot_learning_curves(model_history):
    """
    Plot learning curves for accuracy, loss, and learning rate.

    Parameters:
        model_history (tf.keras.callbacks.History): History object obtained during model training.
    """
    best_epoch = np.argmin(model_history['val_loss'])
    # show Loss curve
    plt.figure(figsize=(20,5))
    plt.plot(model_history['loss'], label='Cross Entropy Loss [train]', alpha=.8, color='#ff7f0e')
    plt.plot(model_history['val_loss'], label='Cross Entropy Loss [val]', alpha=.9, color='#5a9aa5')
    plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
    plt.title('Cross Entropy Loss')
    plt.xlabel('epoch')
    plt.ylabel('Cross Entropy')
    plt.legend()
    plt.grid(alpha=.3)
    # show learning rate curve
    plt.figure(figsize=(18,3))
    plt.plot(model_history['lr'], label='Learning Rate', alpha=.8, color='#ff7f0e')
    plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
    plt.title('Learning Rate')
    plt.xlabel('epoch')
    plt.ylabel('Learning rate')
    plt.legend()
    plt.grid(alpha=.3)
    
    plt.figure(figsize=(20,5))
    plt.plot(model_history['custom_metric_train'], label='custom', alpha=.8, color='#ff7f0e')
    plt.plot(model_history['custom_metric_val'], label='custom', alpha=.8, color='#5a9aa5')
    plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
    plt.title('custom metric')
    plt.xlabel('epoch')
    plt.ylabel('custom metric')
    plt.legend()
    plt.grid(alpha=.3)

    plt.show()

def plot_confusion_matrix(model, X, y, normalize=True):
    # Predict the values from the validation dataset
    y_pred = model.predict(X)
    print(f"Number of predictions: {y_pred.shape}")
    # Convert predictions classes to one hot vectors
    y_pred_classes = np.argmax(y_pred, axis = 1)
    # Convert validation observations to one hot vectors
    y_true = np.argmax(y, axis = 1)
    # compute the confusion matrix
    confusion_mtx = confusion_matrix(y_true, y_pred_classes, normalize='true' if normalize else None)

    # plot the confusion matrix
    plt.figure(figsize=(10,8))
    sns.heatmap(confusion_mtx, annot=True, fmt='.2f' if normalize else 'd', cmap='Blues')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.title('Confusion Matrix')
    plt.show()

from sklearn.metrics import classification_report

def compute_metrics(model, X, y):
    # Predict the values from the validation dataset
    y_pred = model.predict(X)
    print(f"Number of predictions: {y_pred.shape}")
    # Convert predictions classes to one hot vectors
    y_pred_classes = np.argmax(y_pred, axis = 1)
    # Convert validation observations to one hot vectors
    y_true = np.argmax(y, axis = 1)

    # Compute and print the classification report
    print(classification_report(y_true, y_pred_classes))

from sklearn.metrics import ConfusionMatrixDisplay

def plot_confusionMatrix(test_labels, test_predicted, clf):
      cm = confusion_matrix(test_labels, test_predicted, normalize='true', labels=clf.classes_)
      disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                                    display_labels=['N', 'V'])
      disp.plot()
      plt.show()
      return

In [None]:
from sklearn.metrics import recall_score

def custom_metric(y_true, y_pred):
    # Calculate recall for each class
    recall = recall_score(y_true, y_pred, average=None)

    # Define the minimum recall for 'N' beats
    min_recall_n = 0.9


    # If the recall for 'N' beats is below the threshold, return penalized normalized recall for 'S' and 'V'
    if recall[0] < min_recall_n:
        penalty = recall[0]
        return round(recall[1]*penalty,2)

    # Otherwise, return the normalized sum of recall for 'S' and 'V' beats
    return recall[1]

In [None]:
import tensorflow as tf
from tensorflow.keras.callbacks import Callback

class CustomCallback(Callback):
    def __init__(self, x_train, y_train, x_val, y_val):
        super().__init__()
        self.x_train = x_train
        self.y_train = y_train
        self.x_val = x_val
        self.y_val = y_val

    def on_epoch_end(self, epoch, logs={}):
        # Calculate custom metric for training set
        y_train_pred = np.round(self.model.predict(self.x_train))
        y_train_true = self.y_train
        custom_metric_train = custom_metric(y_train_true, y_train_pred)

        # Calculate custom metric for validation set
        y_val_pred = np.round(self.model.predict(self.x_val))
        y_val_true = self.y_val
        custom_metric_val = custom_metric(y_val_true, y_val_pred)

        # Include both metrics in the logs dictionary
        logs["custom_metric_train"] = custom_metric_train
        logs["custom_metric_val"] = custom_metric_val

In [None]:
custom_callback = CustomCallback(X_train, y_train_encoded, X_val, y_val_encoded)

# Models

In [None]:
from keras.models import Model

from keras.layers import Dense, Input
def build_FFNN(input_shape, classes):
  # Define the input layer
  input_layer = Input(shape=input_shape)

  # Add the first hidden layer connected to the input
  hidden_layer_1 = Dense(64, activation='relu')(input_layer)

  # Add the second hidden layer
  hidden_layer_2 = Dense(32, activation='relu')(hidden_layer_1)

  # Add the third hidden layer
  hidden_layer_3 = Dense(16, activation='relu')(hidden_layer_2)

  # Add the output layer
  output_layer = Dense(classes, activation='softmax')(hidden_layer_3)

  model = Model(inputs=input_layer, outputs=output_layer, name='FFNN_model')

  # Compile the model
  model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(), metrics=[tf.keras.metrics.Recall(class_id=0),
                                                                                                    tf.keras.metrics.Recall(class_id=1),
                                                                                                    tf.keras.metrics.Precision(class_id=0),
                                                                                                    tf.keras.metrics.Precision(class_id=1)
                                                                                                    ])


  # Return the model
  return model

In [None]:
num_classes = 2
model = build_FFNN(input_shape=(120,), classes=num_classes)
model.summary()

In [None]:
# create callbacks
lr_scheduler = tfk.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.9,
    patience=5,
    verbose=0,
    mode='min',
    min_delta=1e-7,
    min_lr=1e-6
)
callbacks = [lr_scheduler]

early_stopping = tfk.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=15,
    mode='min',
    min_delta=1e-7,
    restore_best_weights=True,
    start_from_epoch = 30
)

callbacks.append(early_stopping)
callbacks.append(custom_callback)
epochs = 1000
batch_size = 1024

# Train the model
model_history = model.fit(X_train, y_train_encoded,
                          epochs=epochs,
                          batch_size=batch_size,
                          validation_data=(X_val, y_val_encoded),
                          callbacks=callbacks,
                          class_weight=class_weights_dict, verbose=1
                          ).history

In [None]:
# Visualize the learning curves
plot_learning_curves(model_history)

In [None]:
# Visualize confusion matrix
plot_confusion_matrix(model, X_test, y_test_encoded)
# Visualize performance metrics
compute_metrics(model, X_test, y_test_encoded)