In [1]:
import os
import sys
import pickle
import pandas as pd
import numpy as np
from tqdm.auto import tqdm

# Add src directory to path
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Import the aggregation function from utils script
from src.utils import aggregate_interview_sequences

from src.data_loader import load_androids_corpus
from src.foundation_model_extractor import extract_wav2vec2_sequences

In [2]:
# Extract and Prepare All Sequence Data for DL Models
# Load metadata and extract clip-level sequences
BASE_DATA_PATH = 'E:/Dissertation_Data/Androids-Corpus' # Verify this path
print("Loading corpus metadata...")
reading_df, interview_df = load_androids_corpus(BASE_DATA_PATH, verbose=False)
participant_metadata = reading_df[['unique_participant_id', 'label', 'fold']].drop_duplicates().reset_index(drop=True)

SEQUENCES_READING_PATH = '../data/Processed_Features/features_wav2vec2_sequences_reading_task.pkl'
SEQUENCES_INTERVIEW_CLIPS_PATH = '../data/Processed_Features/features_wav2vec2_sequences_interview_clips.pkl'

# Run extraction for interview clips if the file doesn't already exist
if not os.path.exists(SEQUENCES_INTERVIEW_CLIPS_PATH):
    print("Extracting sequential embeddings for all interview clips...")
    interview_clip_sequences = extract_wav2vec2_sequences(interview_df)
    if interview_clip_sequences:
        print(f"Saving interview clip sequences to: {SEQUENCES_INTERVIEW_CLIPS_PATH}")
        with open(SEQUENCES_INTERVIEW_CLIPS_PATH, 'wb') as f: pickle.dump(interview_clip_sequences, f)
else:
    print(f"Interview clip sequences already exist. Loading from file.")

# Load all necessary sequence data
with open(SEQUENCES_READING_PATH, 'rb') as f: reading_sequences = pickle.load(f)
with open(SEQUENCES_INTERVIEW_CLIPS_PATH, 'rb') as f: interview_clip_sequences = pickle.load(f)
print(f"\nLoaded {len(reading_sequences)} Reading sequences and {len(interview_clip_sequences)} Interview clip sequences.")

# Aggregate interview clips into session-level sequences using the util function
interview_session_sequences = aggregate_interview_sequences(interview_clip_sequences, interview_df)
print(f"Aggregated clips for {len(interview_session_sequences)} participants.")

# Create the final Reading, Interview, and Combined datasets
# Remap reading sequences to be keyed by participant ID for consistency
reading_participant_map = reading_df.set_index('filename')['unique_participant_id']
reading_session_sequences = {reading_participant_map[fname]: seq for fname, seq in reading_sequences.items() if fname in reading_participant_map.index}

# Create combined sequences by concatenating reading and interview sequences
combined_session_sequences = {}
for participant_id in tqdm(participant_metadata['unique_participant_id'], desc="Creating Combined Sequences"):
    reading_seq = reading_session_sequences.get(participant_id)
    interview_seq = interview_session_sequences.get(participant_id)
    if reading_seq is not None and interview_seq is not None:
        combined_session_sequences[participant_id] = np.vstack([reading_seq, interview_seq])

# Store all prepared sequence sets in a final dictionary
sequence_sets = {
    'reading': reading_session_sequences,
    'interview': interview_session_sequences,
    'combined': combined_session_sequences
}
print("\n--- All sequence datasets are now prepared and ready for training ---")

Loading corpus metadata...
Successfully loaded 112 Read task and 116 Interview task fold assignments.
Interview clip sequences already exist. Loading from file.

Loaded 111 Reading sequences and 857 Interview clip sequences.

Aggregating interview clips into single sequences per participant...


Aggregating Sequences: 0it [00:00, ?it/s]

Aggregated clips for 114 participants.


Creating Combined Sequences:   0%|          | 0/111 [00:00<?, ?it/s]


--- All sequence datasets are now prepared and ready for training ---


In [3]:
# Load or Extract Reading Task Sequences

# Define the path to the reading task sequences file
SEQUENCES_READING_PATH = '../data/Processed_Features/features_wav2vec2_sequences_reading_task.pkl'

# Check if the file exists before running extraction
if not os.path.exists(SEQUENCES_READING_PATH):
    print("Reading task sequences not found. Running extraction...")
    
    # Check if the reading_df DataFrame is loaded
    if 'reading_df' in locals():
        print("\nExtracting sequential embeddings for the Reading Task...")
        
        # Call the extractor function
        reading_sequences = extract_wav2vec2_sequences(reading_df)
        
        # Save the new sequences to the pickle file
        if reading_sequences:
            print(f"Saving new reading task sequences to: {SEQUENCES_READING_PATH}")
            with open(SEQUENCES_READING_PATH, 'wb') as f:
                pickle.dump(reading_sequences, f)
            print("Extraction and saving complete.")
    else:
        print("ERROR: 'reading_df' not found. Cannot run extraction.")
else:
    print(f"Reading task sequences already exist. Loading from file: {SEQUENCES_READING_PATH}")

# Load the data regardless of whether it was just created or already existed
try:
    with open(SEQUENCES_READING_PATH, 'rb') as f:
        reading_sequences = pickle.load(f)
    
    print(f"\nSuccessfully loaded data for {len(reading_sequences)} reading files.")
    
    # Final Verification
    first_filename = list(reading_sequences.keys())[0]
    first_sequence = reading_sequences[first_filename]
    print(f"Verified sequence shape for '{first_filename}': {first_sequence.shape}")

except FileNotFoundError:
    print(f"ERROR: Could not load reading sequences from {SEQUENCES_READING_PATH}")

Reading task sequences already exist. Loading from file: ../data/Processed_Features/features_wav2vec2_sequences_reading_task.pkl

Successfully loaded data for 111 reading files.
Verified sequence shape for '01_CF56_1.wav': (4378, 768)


In [None]:
# Run CNN-LSTM Experiments for All Data Types

from src.dl_cv_strategies import run_pytorch_cv_with_early_stopping

# Dictionary to store the results of the DL experiments
dl_results = {}

# Define experiment parameters
N_EPOCHS = 50
PATIENCE = 10
BATCH_SIZE = 8
LEARNING_RATE = 0.0001
# The 'participant_metadata' DataFrame should be loaded from a previous cell.

# Loop through the three prepared sequence sets
for name, seq_dict in sequence_sets.items():
    experiment_name = f'wav2vec2_cnn_lstm_{name}'
    results_save_path = f'../data/Processed_Features/results_{experiment_name}.pkl'
    
    if not os.path.exists(results_save_path):
        print(f"\n--- Running experiment: {experiment_name.upper()} ---")
        
        # Call training function, handles the data alignment internally.
        # Pass the full participant_metadata DataFrame for reliable label lookup.
        results_df, predictions = run_pytorch_cv_with_early_stopping(
            sequences_dict=seq_dict,
            metadata_df=participant_metadata,
            epochs=N_EPOCHS,
            patience=PATIENCE,
            batch_size=BATCH_SIZE,
            learning_rate=LEARNING_RATE
        )
        
        # Save results
        results_to_save = {'results_df': results_df, 'predictions': predictions}
        with open(results_save_path, 'wb') as f:
            pickle.dump(results_to_save, f)
        print(f"Results saved to {results_save_path}")
        dl_results[experiment_name] = results_to_save
    else:
        print(f"\nLoading pre-computed results for {experiment_name.upper()}")
        with open(results_save_path, 'rb') as f:
            dl_results[experiment_name] = pickle.load(f)

print("\n--- All Deep Learning experiments are now complete! ---")


--- Running experiment: WAV2VEC2_CNN_LSTM_READING ---


Running 5-Fold CV:   0%|          | 0/5 [00:00<?, ?it/s]

  > Early stopping triggered at epoch 34
  > Early stopping triggered at epoch 19
  > Early stopping triggered at epoch 33
  > Early stopping triggered at epoch 28
  > Early stopping triggered at epoch 38
Results saved to ../data/Processed_Features/results_wav2vec2_cnn_lstm_reading.pkl

--- Running experiment: WAV2VEC2_CNN_LSTM_INTERVIEW ---


Running 5-Fold CV:   0%|          | 0/5 [00:00<?, ?it/s]

  > Early stopping triggered at epoch 44
  > Early stopping triggered at epoch 37
  > Early stopping triggered at epoch 48
  > Early stopping triggered at epoch 31
Results saved to ../data/Processed_Features/results_wav2vec2_cnn_lstm_interview.pkl

--- Running experiment: WAV2VEC2_CNN_LSTM_COMBINED ---


Running 5-Fold CV:   0%|          | 0/5 [00:00<?, ?it/s]

  > Early stopping triggered at epoch 35
  > Early stopping triggered at epoch 44
  > Early stopping triggered at epoch 35
  > Early stopping triggered at epoch 49
Results saved to ../data/Processed_Features/results_wav2vec2_cnn_lstm_combined.pkl

--- All Deep Learning experiments are now complete! ---


In [None]:
# Final Analysis - Compare SVM and DL Models

# Load SVM results from notebook 03, ensure they are saved them to a pkl file
SVM_RESULTS_PATH = '../data/Processed_Features/all_svm_results.pkl'
if os.path.exists(SVM_RESULTS_PATH):
    with open(SVM_RESULTS_PATH, 'rb') as f:
        all_results = pickle.load(f)
else:
    print("Warning: SVM results file not found. Final comparison will only show DL models.")
    all_results = {}

# Add the new DL results to the main dictionary
all_results.update(dl_results)

# re-run all the plotting and analysis cells from notebook 03
# e.g. generate final summary table:

final_summary_data = []
for experiment_name, data in all_results.items():
    results_df = data['results_df']
    final_summary_data.append({
        'Experiment': experiment_name,
        'Mean F1-Score': results_df['f1_score'].mean(),
        'Std Dev F1-Score': results_df['f1_score'].std(),
        'Mean AUC': results_df['auc'].mean(),
        'Std Dev AUC': results_df['auc'].std(),
        'Mean Accuracy': results_df['accuracy'].mean(),
        'Std Dev Accuracy': results_df['accuracy'].std()
    })

final_summary_df = pd.DataFrame(final_summary_data).set_index('Experiment')
display(final_summary_df.style.background_gradient(cmap='viridis', subset=[c for c in final_summary_df.columns if 'Mean' in c], axis=0)
                          .background_gradient(cmap='viridis_r', subset=[c for c in final_summary_df.columns if 'Std Dev' in c], axis=0)
                          .format("{:.3f}"))



Unnamed: 0_level_0,Mean F1-Score,Std Dev F1-Score,Mean AUC,Std Dev AUC,Mean Accuracy,Std Dev Accuracy
Experiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
wav2vec2_cnn_lstm_reading,0.683,0.063,0.77,0.079,0.694,0.059
wav2vec2_cnn_lstm_interview,0.739,0.075,0.815,0.076,0.744,0.074
wav2vec2_cnn_lstm_combined,0.71,0.124,0.809,0.086,0.717,0.124


In [None]:
# Run Tuned CNN-LSTM Experiments for All Data Types

from src.dl_cv_strategies import run_pytorch_nested_cv_with_optuna

# Dictionary to store the results of the tuned DL experiments
tuned_dl_results = {}

# Define experiment parameters
N_TRIALS = 5     
N_EPOCHS = 50      
PATIENCE = 10      
BATCH_SIZE = 8

# Loop through the three prepared sequence sets
for name, seq_dict in sequence_sets.items():
    experiment_name = f'wav2vec2_cnn_lstm_tuned_{name}'
    results_save_path = f'../data/Processed_Features/results_{experiment_name}.pkl'
    
    if not os.path.exists(results_save_path):
        print(f"\n--- Running experiment: {experiment_name.upper()} ---")
        
        # Filter metadata for participants in the current dataset
        current_participants = list(seq_dict.keys())
        current_metadata_df = participant_metadata[participant_metadata['unique_participant_id'].isin(current_participants)]
        
        # Run the full nested CV with Optuna tuning
        tuned_results_df, tuned_predictions = run_pytorch_nested_cv_with_optuna(
            sequences_dict=seq_dict,
            metadata_df=current_metadata_df,
            n_trials=N_TRIALS,
            epochs=N_EPOCHS,
            patience=PATIENCE,
            batch_size=BATCH_SIZE
        )
        
        # Save results for this experiment
        results_to_save = {'results_df': tuned_results_df, 'predictions': tuned_predictions}
        with open(results_save_path, 'wb') as f:
            pickle.dump(results_to_save, f)
        print(f"Results saved to {results_save_path}")
        tuned_dl_results[experiment_name] = results_to_save
    else:
        print(f"\nLoading pre-computed results for {experiment_name.upper()}")
        with open(results_save_path, 'rb') as f:
            tuned_dl_results[experiment_name] = pickle.load(f)

print("\n--- All Tuned Deep Learning experiments are now complete ---")


Loading pre-computed results for WAV2VEC2_CNN_LSTM_TUNED_READING

--- Running experiment: WAV2VEC2_CNN_LSTM_TUNED_INTERVIEW ---
--- Training on device: cuda ---


Outer CV Fold:   0%|          | 0/5 [00:00<?, ?it/s]

[I 2025-07-29 21:29:20,477] A new study created in memory with name: no-name-72f8e1fe-7d9a-4673-b8bc-ba760d9c0bc2
[I 2025-07-29 21:30:18,246] Trial 0 finished with value: 0.6799936204293872 and parameters: {'learning_rate': 0.0002608639984018562, 'dropout_rate': 0.2968266039938426, 'cnn_out_channels': 32, 'lstm_hidden_dim': 64, 'activation_fn': 'silu'}. Best is trial 0 with value: 0.6799936204293872.
[I 2025-07-29 21:31:15,330] Trial 1 finished with value: 0.6844363806128513 and parameters: {'learning_rate': 0.0001338027277329629, 'dropout_rate': 0.46433155599956777, 'cnn_out_channels': 32, 'lstm_hidden_dim': 64, 'activation_fn': 'gelu'}. Best is trial 1 with value: 0.6844363806128513.
[I 2025-07-29 21:32:10,788] Trial 2 finished with value: 0.6751003519545719 and parameters: {'learning_rate': 0.000475889952273976, 'dropout_rate': 0.4308789946216086, 'cnn_out_channels': 64, 'lstm_hidden_dim': 32, 'activation_fn': 'silu'}. Best is trial 1 with value: 0.6844363806128513.
[I 2025-07-29 21

Results saved to ../data/Processed_Features/results_wav2vec2_cnn_lstm_tuned_interview.pkl

--- Running experiment: WAV2VEC2_CNN_LSTM_TUNED_COMBINED ---
--- Training on device: cuda ---


Outer CV Fold:   0%|          | 0/5 [00:00<?, ?it/s]

[I 2025-07-29 22:08:54,610] A new study created in memory with name: no-name-d74742c1-ebc1-4ec8-8394-427ee1eb303d
[I 2025-07-29 22:10:05,447] Trial 0 finished with value: 0.6256060393093915 and parameters: {'learning_rate': 2.7339817934878648e-05, 'dropout_rate': 0.3939851811121062, 'cnn_out_channels': 64, 'lstm_hidden_dim': 64, 'activation_fn': 'gelu'}. Best is trial 0 with value: 0.6256060393093915.
[I 2025-07-29 22:11:12,213] Trial 1 finished with value: 0.5462970112970112 and parameters: {'learning_rate': 2.908534928765541e-05, 'dropout_rate': 0.38022325879168306, 'cnn_out_channels': 32, 'lstm_hidden_dim': 64, 'activation_fn': 'gelu'}. Best is trial 0 with value: 0.6256060393093915.
[I 2025-07-29 22:12:18,751] Trial 2 finished with value: 0.563961161387632 and parameters: {'learning_rate': 0.0001298988301909629, 'dropout_rate': 0.3099556711931442, 'cnn_out_channels': 64, 'lstm_hidden_dim': 32, 'activation_fn': 'silu'}. Best is trial 0 with value: 0.6256060393093915.
[I 2025-07-29 2

Results saved to ../data/Processed_Features/results_wav2vec2_cnn_lstm_tuned_combined.pkl

--- All Tuned Deep Learning experiments are now complete ---


In [None]:
# Train and Save the Single Best Model as a Final Artefact 
from src.dl_cv_strategies import SequenceDataset, collate_fn
from torch.utils.data import Dataset, DataLoader
import torch
from src.models import CNNLSTM
import torch.nn as nn
# Identify the best hyperparameters from the most important experiment (Combined)
try:
    combined_results_df = tuned_dl_results['wav2vec2_cnn_lstm_tuned_combined']['results_df']
    # Find the fold with the best F1-score to get its parameters
    best_fold_params = combined_results_df.loc[combined_results_df['f1_score'].idxmax()]['best_params']
    print(f"Identified best hyperparameters from 'Combined' experiment:\n{best_fold_params}")

    # Prepare the full dataset for final training
    full_combined_sequences_dict = sequence_sets['combined']
    participants_for_final_model = list(full_combined_sequences_dict.keys())
    metadata_for_final_model = participant_metadata[participant_metadata['unique_participant_id'].isin(participants_for_final_model)]

    # Align the data
    label_map_final = metadata_for_final_model.set_index('unique_participant_id')['label'].apply(lambda x: 1 if x == 'Patient' else 0)
    final_sequences = [full_combined_sequences_dict[pid] for pid in participants_for_final_model]
    final_labels = label_map_final.loc[participants_for_final_model].values

    final_dataset = SequenceDataset(final_sequences, final_labels)
    final_train_loader = DataLoader(final_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)

    # Build and train the final model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    final_model = CNNLSTM(
        input_dim=final_sequences[0].shape[1],
        dropout_rate=best_fold_params['dropout_rate'],
        cnn_out_channels=best_fold_params['cnn_out_channels'],
        lstm_hidden_dim=best_fold_params['lstm_hidden_dim'],
        activation_fn=best_fold_params['activation_fn']
    ).to(device)
    
    optimizer = torch.optim.Adam(final_model.parameters(), lr=best_fold_params['learning_rate'])
    loss_fn = nn.CrossEntropyLoss()

    print(f"\nTraining final model on all combined data for {N_EPOCHS} epochs...")
    final_model.train()
    for epoch in tqdm(range(N_EPOCHS), desc="Final Model Training"):
        for sequences, labels in final_train_loader:
            sequences, labels = sequences.to(device), labels.to(device)
            optimizer.zero_grad(); outputs = final_model(sequences); loss = loss_fn(outputs, labels); loss.backward(); optimizer.step()

    print("Final training complete.")

    # Save the model's state_dict and hyperparameters
    MODEL_SAVE_PATH = "../models/final_cnn_lstm_model.pt"
    os.makedirs(os.path.dirname(MODEL_SAVE_PATH), exist_ok=True)

    torch.save({
        'hyperparameters': best_fold_params,
        'model_state_dict': final_model.state_dict(),
    }, MODEL_SAVE_PATH)
    print(f"Final model and hyperparameters saved to: {MODEL_SAVE_PATH}")

except KeyError:
    print("Could not find tuned results for 'combined' dataset. Skipping final model training.")

Identified best hyperparameters from 'Combined' experiment:
{'learning_rate': 0.0002762832450991962, 'dropout_rate': 0.46773903528314986, 'cnn_out_channels': 64, 'lstm_hidden_dim': 64, 'activation_fn': 'silu'}

Training final model on all combined data for 50 epochs...


Final Model Training:   0%|          | 0/50 [00:00<?, ?it/s]

Final training complete.
Final model and hyperparameters saved to: ../models/final_cnn_lstm_model.pt


In [None]:
# Final Analysis and Comparison of ALL Models (SVM and DL) 

# Load all experiment results into one dictionary

# Load the SVM results you generated in notebook 03
SVM_RESULTS_PATH = '../data/Processed_Features/all_svm_results.pkl' 
if os.path.exists(SVM_RESULTS_PATH):
    with open(SVM_RESULTS_PATH, 'rb') as f:
        all_results = pickle.load(f)
else:
    print("Warning: SVM results file not found. Final comparison will only show DL models.")
    all_results = {}

# The 'tuned_dl_results' dictionary already exists in this notebook's memory
# from the previous cell. Add its contents to master dictionary.
all_results.update(tuned_dl_results)

print(f"Successfully loaded a total of {len(all_results)} experiment results for final comparison.")

# Create and Display Summary Table

final_summary_data = []
for experiment_name, data in all_results.items():
    results_df = data['results_df']
    final_summary_data.append({
        'Experiment': experiment_name,
        'Mean F1-Score': results_df['f1_score'].mean(),
        'Std Dev F1-Score': results_df['f1_score'].std(),
        'Mean AUC': results_df['auc'].mean(),
        'Std Dev AUC': results_df['auc'].std(),
        'Mean Accuracy': results_df['accuracy'].mean(),
        'Std Dev Accuracy': results_df['accuracy'].std()
    })

final_summary_df = pd.DataFrame(final_summary_data).set_index('Experiment')

print("\n--- Final Comprehensive Results Summary ---")
display(final_summary_df.style.background_gradient(cmap='viridis', subset=[c for c in final_summary_df.columns if 'Mean' in c], axis=0)
                          .background_gradient(cmap='viridis_r', subset=[c for c in final_summary_df.columns if 'Std Dev' in c], axis=0)
                          .format("{:.3f}"))

Successfully loaded a total of 3 experiment results for final comparison.

--- Final Comprehensive Results Summary ---


Unnamed: 0_level_0,Mean F1-Score,Std Dev F1-Score,Mean AUC,Std Dev AUC,Mean Accuracy,Std Dev Accuracy
Experiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
wav2vec2_cnn_lstm_tuned_reading,0.682,0.054,0.747,0.066,0.685,0.056
wav2vec2_cnn_lstm_tuned_interview,0.752,0.142,0.822,0.123,0.754,0.141
wav2vec2_cnn_lstm_tuned_combined,0.721,0.136,0.788,0.201,0.726,0.131


In [None]:
# Run Tuned CNN-LSTM Experiments for All Data Types

# Import the NEW function
from src.dl_cv_strategies import run_pytorch_nested_cv_with_optuna

# Dictionary to store the results of the tuned DL experiments
tuned_dl_results = {}

# Define experiment parameters
N_TRIALS = 25      # Number of hyperparameter sets to try in each outer fold
N_EPOCHS = 50      # Maximum number of epochs for the final model training in each fold
PATIENCE = 10      # Patience for early stopping
BATCH_SIZE = 8

# Loop through the three prepared sequence sets
for name, seq_dict in sequence_sets.items():
    experiment_name = f'wav2vec2_cnn_lstm_tuned_{name}'
    results_save_path = f'../data/Processed_Features/results_{experiment_name}.pkl'
    
    if not os.path.exists(results_save_path):
        print(f"\n--- Running experiment: {experiment_name.upper()} ---")
        
        # Filter metadata for participants in the current dataset
        current_participants = list(seq_dict.keys())
        current_metadata_df = participant_metadata[participant_metadata['unique_participant_id'].isin(current_participants)]
        
        # Run the full nested CV with Optuna tuning
        tuned_results_df, tuned_predictions = run_pytorch_nested_cv_with_optuna(
            sequences_dict=seq_dict,
            metadata_df=current_metadata_df,
            n_trials=N_TRIALS,
            epochs=N_EPOCHS,
            patience=PATIENCE,
            batch_size=BATCH_SIZE
        )
        
        # Save results for this experiment
        results_to_save = {'results_df': tuned_results_df, 'predictions': tuned_predictions}
        with open(results_save_path, 'wb') as f:
            pickle.dump(results_to_save, f)
        print(f"Results saved to {results_save_path}")
        tuned_dl_results[experiment_name] = results_to_save
    else:
        print(f"\nLoading pre-computed results for {experiment_name.upper()}")
        with open(results_save_path, 'rb') as f:
            tuned_dl_results[experiment_name] = pickle.load(f)

print("\n--- All Tuned Deep Learning experiments are now complete ---")


--- Running experiment: WAV2VEC2_CNN_LSTM_TUNED_READING ---
--- Training on device: cuda ---


Outer CV Fold:   0%|          | 0/5 [00:00<?, ?it/s]

[I 2025-07-30 08:27:26,726] A new study created in memory with name: no-name-d2704a45-09bb-4a91-b557-de250c499b40
[I 2025-07-30 08:27:59,477] Trial 0 finished with value: 0.660340661517132 and parameters: {'learning_rate': 0.00011027801676130937, 'dropout_rate': 0.34457253263760035, 'cnn_out_channels': 32, 'lstm_hidden_dim': 128, 'activation_fn': 'silu'}. Best is trial 0 with value: 0.660340661517132.
[I 2025-07-30 08:28:13,809] Trial 1 finished with value: 0.5870189995189995 and parameters: {'learning_rate': 0.00013608905943042588, 'dropout_rate': 0.26355427761828776, 'cnn_out_channels': 32, 'lstm_hidden_dim': 64, 'activation_fn': 'gelu'}. Best is trial 0 with value: 0.660340661517132.
[I 2025-07-30 08:28:45,668] Trial 2 finished with value: 0.5636604136604136 and parameters: {'learning_rate': 0.0001339387853326478, 'dropout_rate': 0.4197843105294192, 'cnn_out_channels': 32, 'lstm_hidden_dim': 128, 'activation_fn': 'gelu'}. Best is trial 0 with value: 0.660340661517132.
[I 2025-07-30 

Results saved to ../data/Processed_Features/results_wav2vec2_cnn_lstm_tuned_reading.pkl

--- Running experiment: WAV2VEC2_CNN_LSTM_TUNED_INTERVIEW ---
--- Training on device: cuda ---


Outer CV Fold:   0%|          | 0/5 [00:00<?, ?it/s]

[I 2025-07-30 09:12:22,232] A new study created in memory with name: no-name-d726a3f3-95f4-4873-9443-2948e9a39b1f
[I 2025-07-30 09:14:38,433] Trial 0 finished with value: 0.6520890937019969 and parameters: {'learning_rate': 1.8202174300751366e-05, 'dropout_rate': 0.47335611540118605, 'cnn_out_channels': 128, 'lstm_hidden_dim': 128, 'activation_fn': 'silu'}. Best is trial 0 with value: 0.6520890937019969.
[I 2025-07-30 09:15:33,564] Trial 1 finished with value: 0.6085717498875393 and parameters: {'learning_rate': 1.4809456018956788e-05, 'dropout_rate': 0.3702827157698344, 'cnn_out_channels': 64, 'lstm_hidden_dim': 64, 'activation_fn': 'silu'}. Best is trial 0 with value: 0.6520890937019969.
[I 2025-07-30 09:16:31,697] Trial 2 finished with value: 0.6348562848562849 and parameters: {'learning_rate': 0.000938075551771043, 'dropout_rate': 0.37192948417619587, 'cnn_out_channels': 128, 'lstm_hidden_dim': 64, 'activation_fn': 'gelu'}. Best is trial 0 with value: 0.6520890937019969.
[I 2025-07

Results saved to ../data/Processed_Features/results_wav2vec2_cnn_lstm_tuned_interview.pkl

--- Running experiment: WAV2VEC2_CNN_LSTM_TUNED_COMBINED ---
--- Training on device: cuda ---


Outer CV Fold:   0%|          | 0/5 [00:00<?, ?it/s]

[I 2025-07-30 13:01:36,691] A new study created in memory with name: no-name-b260e579-f073-4ead-9092-d19cec0503d1
[I 2025-07-30 13:02:45,969] Trial 0 finished with value: 0.671802773678169 and parameters: {'learning_rate': 1.6116431463020307e-05, 'dropout_rate': 0.29970356800582626, 'cnn_out_channels': 32, 'lstm_hidden_dim': 64, 'activation_fn': 'silu'}. Best is trial 0 with value: 0.671802773678169.
[I 2025-07-30 13:05:21,960] Trial 1 finished with value: 0.4746254993744234 and parameters: {'learning_rate': 0.0004534257902303487, 'dropout_rate': 0.4286339715039213, 'cnn_out_channels': 32, 'lstm_hidden_dim': 128, 'activation_fn': 'gelu'}. Best is trial 0 with value: 0.671802773678169.
[I 2025-07-30 13:07:57,086] Trial 2 finished with value: 0.5979414213822816 and parameters: {'learning_rate': 1.2310139625926329e-05, 'dropout_rate': 0.3067118594318168, 'cnn_out_channels': 32, 'lstm_hidden_dim': 128, 'activation_fn': 'gelu'}. Best is trial 0 with value: 0.671802773678169.
[I 2025-07-30 1

Results saved to ../data/Processed_Features/results_wav2vec2_cnn_lstm_tuned_combined.pkl

--- All Tuned Deep Learning experiments are now complete ---
