In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, GRU, Bidirectional, GlobalAveragePooling1D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.metrics import f1_score
from sklearn.model_selection import KFold
import statistics
import random
random.seed(999)
np.random.seed(999)
tf.random.set_seed(999)

In [None]:
#Extract audio and lexcial features as explained in the multimodal target model
x_audio_data = np.array([i[0] for i in df_list])  # Audio features
#x_text_data = np.array([i[1] for i in df_list])   # Text features
y_data = np.array([i[2] for i in df_list])        # Depression labels (binary: 0 for control, 1 for depressed)
unique_clip_ID_No = np.array([i[3] for i in df_list])  # Unique clip identifier
session_number = np.array([i[4] for i in df_list])     # Session number

## Target model - acoustic

In [None]:
# Constants
NUM_FOLDS = 5
ACOUSTIC_INPUT_SHAPE = (499, 512)
N_CONV_LAYERS = 2
N_CONV_KERNELS = 64
CONV_KERNEL_WIDTH = 3
MAX_POOLING_KERNEL_WIDTH = 2
N_GRU_LAYERS = 2
GRU_LAYER_WIDTH = 64
BATCH_SIZE = 64
EPOCHS = 200

# Data preparation
speaker_ids = np.unique(session_number)
k_fold = KFold(n_splits=NUM_FOLDS, shuffle=True)
training_index, testing_index = [], []
accuracies_best, loss_fold_best, F1_best = [], [], []
session_list=[]

# Define a custom early stopping callback to restore the best weights
class CustomEarlyStopping(tf.keras.callbacks.Callback):
    def __init__(self, patience=10, restore_best_weights=True, path=''):
        super().__init__()
        self.patience = patience
        self.restore_best_weights = restore_best_weights
        self.best_weights = None
        self.path = path

    def on_epoch_end(self, epoch, logs=None):
        current_loss = logs.get('val_loss')
        if current_loss < self.best:
            self.best = current_loss
            self.best_weights = self.model.get_weights()
        else:
            self.wait += 1
            if self.wait >= self.patience:
                self.model.stop_training = True
                if self.restore_best_weights:
                    self.model.set_weights(self.best_weights)
                    self.model.save(self.path)
                    print(f'Restoring model weights from the end of the best epoch to {self.path}')

# Split data for k-fold cross-validation
for train_idx, test_idx in k_fold.split(speaker_ids):
    training_index.append(np.where(np.isin(session_number, speaker_ids[train_idx])))
    testing_index.append(np.where(np.isin(session_number, speaker_ids[test_idx])))

# Model training and evaluation
for i, (train_idx, test_idx) in enumerate(zip(training_index, testing_index)):
    print(f'Processing Fold {i + 1}')
    x_train, y_train = x_audio_data[train_idx[0]], y_data[train_idx[0]]
    x_test, y_test = x_audio_data[test_idx[0]], y_data[test_idx[0]]
    session_no = np.unique(session_number[test_idx[0]])
    session_list.append(session_no)

    # Build and compile the model
    acoustic_input = Input(shape=ACOUSTIC_INPUT_SHAPE, name="acoustic_input")
    x = acoustic_input
    for _ in range(N_CONV_LAYERS):
        x = Conv1D(filters=N_CONV_KERNELS, kernel_size=CONV_KERNEL_WIDTH, activation='relu')(x)
        x = MaxPooling1D(pool_size=MAX_POOLING_KERNEL_WIDTH)(x)
    for _ in range(N_GRU_LAYERS):
        x = Bidirectional(GRU(GRU_LAYER_WIDTH, return_sequences=True))(x)
    x = GlobalAveragePooling1D()(x)
    output = Dense(2, activation='softmax')(Dense(1024, activation='relu')(x))
    model = Model(inputs=acoustic_input, outputs=output)
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Fit model with early stopping
    checkpoint_path = f'model_checkpoint_fold_{i + 1}.h5'
    early_stopping = CustomEarlyStopping(patience=5, restore_best_weights=True, path=checkpoint_path)
    model.fit(x_train, y_train, validation_split=0.2, epochs=EPOCHS, batch_size=BATCH_SIZE, callbacks=[early_stopping])

    # Evaluate the model
    loss, accuracy = model.evaluate(x_test, y_test)
    accuracies_best.append(accuracy)
    loss_fold_best.append(loss)

    # Predictions and F1 score
    y_pred = np.argmax(model.predict(x_test), axis=1)
    F1_score = f1_score(y_test, y_pred)
    F1_best.append(F1_score)
    print(f"Fold {i + 1}: Test Loss: {loss}, Test Accuracy: {accuracy}, F1 Score: {F1_score}")

# Summary statistics across all folds
print('--- Summary of Results ---')
for test_set, loss, acc, f1 in zip(session_list, loss_fold_best, accuracies_best, F1_best):
    print(f'Test set: {test_set}, Loss: {loss:.4f}, Accuracy: {acc:.4f}, F1 Score: {f1:.4f}')

# Calculate and print the average metrics across all folds to provide an overall assessment
print(' ------------------------- ')
print(f'Average accuracy per 10 sec chunk : {np.mean(accuracies_best):.4f}, std deviation: {statistics.stdev(accuracies_best):.4f}')
print(f'Average F1 per 10 sec chunk : {np.mean(F1_best):.4f}, std deviation: {statistics.stdev(F1_best):.4f}')