Connected to thesis (Python 3.11.4)

In [1]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from datetime import datetime
#from torch.utils.tensorboard import SummaryWriter
import torchaudio
from typing import List, Tuple
from pt_utils import *
from pt_dataset import *
from pt_models import *
from pt_utils import *
from tensorboardX import SummaryWriter
from transformers import AutoModel, AdamW, get_cosine_schedule_with_warmup

def create_run_directory():
    base_dir = "pt_runs_SOTA"
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir

def _calculate_flattened_accuracy(average, ground_truth_labels):
    s_acc = 0
    for b in range(len(ground_truth_labels)):
        s, _ = scipy.stats.pearsonr(average[b], ground_truth_labels[b])
        s_acc += s
    return s_acc / len(ground_truth_labels)

def _choose_real_labs_only_with_filenames(labels, filenames):
    return labels[labels['filename'].isin(filenames)]

def _get_ground_truth_labels(ground_truth_names, labels):
    ground_truth_labels = []
    for batch_name in ground_truth_names:
        ground_truth_label = _choose_real_labs_only_with_filenames(labels, [batch_name])
        ground_truth_labels.append(ground_truth_label)
    return np.array(ground_truth_labels)[:, :, -1].astype(np.float32)

# Function to gradually unfreeze layers
def gradually_unfreeze(model, epoch, total_epochs, initial_unfreeze=2):
    total_layers = len(model.wav_model.encoder.layers)
    layers_to_unfreeze = initial_unfreeze + (epoch * (total_layers - initial_unfreeze) // total_epochs)
    model.unfreeze_last_n_blocks(layers_to_unfreeze)
    

def train(path_to_data, path_to_labels, window_size=16, step_size=6, data_parts=4, epochs=100, batch_size=10, early_stopping_patience=20, config = None, processor = None):
    run_dir = create_run_directory()
    log_dir = os.path.join(run_dir, "logs")
    os.makedirs(log_dir, exist_ok=True)

    # Parameters
    length_sequence = window_size 
    step_sequence = step_size

    # Load and prepare data
    train_data, train_labels, train_dict, frame_rate = load_data(path_to_data, path_to_labels, 'train')
    devel_data, devel_labels, devel_dict, frame_rate = load_data(path_to_data, path_to_labels, 'devel')
    test_data, test_labels, test_dict, frame_rate = load_data(path_to_data, path_to_labels, 'test')

    # Combine train and devel data
    all_data = np.concatenate((train_data, devel_data), axis=0)
    all_labels = pd.concat([train_labels, devel_labels])
    all_dict = np.concatenate((list(train_dict.values()), list(devel_dict.values())), axis=0)

    # Prepare data
    prepared_data, prepared_labels, prepared_labels_timesteps = prepare_data(all_data, all_labels, all_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)
    prepared_test_data, prepared_test_labels, prepared_test_labels_timesteps = prepare_data(test_data, test_labels, test_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)

    # Create CSV file for storing fold indices
    fold_indices_df = pd.DataFrame(columns=['Fold', 'Train_Indices', 'Val_Indices'])

    # Cross-validation
    kf = KFold(n_splits=data_parts)
    fold_metrics = []
    # To accumulate metrics across folds for each epoch
    train_acc_epoch = []
    val_acc_epoch = []
    test_acc_epoch = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    config["output_size"] = prepared_labels.shape[-1]
    writer = SummaryWriter(log_dir=os.path.join(log_dir,config["model_name"]))

    for fold, (train_index, val_index) in enumerate(kf.split(prepared_data)):
        print(f"Fold {fold + 1}/{data_parts}")
        best_model_path = f"{run_dir}/best_model_fold{fold+1}"
        # Save fold indices
        fold_indices_df = fold_indices_df._append({
            'Fold': fold + 1,
            'Train_Indices': train_index.tolist(),
            'Val_Indices': val_index.tolist()
        }, ignore_index=True)

        # Split data
        train_d, val_d = prepared_data[train_index], prepared_data[val_index]
        train_lbs, val_lbs = prepared_labels[train_index], prepared_labels[val_index]
        train_timesteps, val_timesteps = prepared_labels_timesteps[train_index], prepared_labels_timesteps[val_index]
        
        # Reshape data
        train_d, train_lbs = reshaping_data_for_model(train_d, train_lbs)
        val_d, val_lbs = reshaping_data_for_model(val_d, val_lbs)
        test_d, test_lbs = reshaping_data_for_model(prepared_test_data, prepared_test_labels)
        
        print(train_d.shape)

        # Create datasets
        #train_dataset = BreathingDataset(train_d, train_lbs, processor, window_size, step_sequence, augment=True)
        train_dataset = BreathingDataset(train_d, train_lbs, processor,window_size, step_sequence)
        val_dataset = BreathingDataset(val_d, val_lbs, processor, window_size, step_sequence)
        test_dataset = BreathingDataset(test_d, test_lbs, processor, window_size, step_sequence)

        # Create DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size,num_workers=2, shuffle=True, collate_fn=train_dataset.collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, collate_fn=val_dataset.collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, collate_fn=test_dataset.collate_fn)
        print(config["output_size"])
        # Create and initialize model
        model = config["model"](config).to(device)
        
        #### training optimiser parameters fror apple
        #learning_rate = 0.005 
        #optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        #### training optimiser parameters fror harma_2023 VRB model 
        learning_rate = 0.01 # From the paper
        optimizer = optim.SGD(model.parameters(), lr=learning_rate)       
        
        best_val_loss = float('inf')
        best_val_loss_flat = float('inf')
        early_stopping_counter = 0
        # To accumulate metrics across folds for each epoch
        train_acc = []
        val_acc = []
        test_acc = []
        for epoch in range(epochs):

            model.train()
            train_loss = 0.0
            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            
            for batch_d, batch_lbs in progress_bar:
                optimizer.zero_grad()
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                progress_bar.set_postfix({'train_loss': f'{train_loss/(progress_bar.n+1):.4f}'})


            train_loss /= len(train_loader)

            # Combined validation loop
            model.eval()
            val_loss = 0.0
            val_pred = []
            with torch.no_grad():
                for batch_d, batch_lbs in val_loader:
                    input_values = batch_d.to(device)
                    batch_lbs = batch_lbs.to(device)
                    
                    outputs = model(input_values)
                    loss = correlation_coefficient_loss(outputs, batch_lbs)
                    val_loss += loss.item()
                    val_pred.extend(outputs.cpu().numpy())

            val_loss /= len(val_loader)

            # Calculate validation metrics
            val_pred = np.array(val_pred).reshape(val_timesteps.shape)
            val_ground_truth = _get_ground_truth_labels([all_dict[i] for i in val_index], all_labels)
            val_pred_flat = unsplit_data_ogsize(val_pred, window_size, step_sequence, 25, val_ground_truth.shape[-1])
            val_prc_coef = _calculate_flattened_accuracy(val_pred_flat, val_ground_truth)
            
            # Accumulate metrics for this fold and epoch
            train_acc.append(1- train_loss)
            val_acc.append(1- val_loss)

            # Log metrics
            writer.add_scalar(f"Loss/train_fold_{fold + 1}", train_loss, epoch)
            writer.add_scalar(f"Loss/val_fold_{fold + 1}", val_loss, epoch)
            writer.add_scalar(f"Pearson/val_fold_{fold + 1}", val_prc_coef, epoch)

            print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Pearson: {val_prc_coef:.4f}")

            # Check if validation loss improved
            if val_loss < best_val_loss:
                print(f"Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving best model...")
                best_val_loss = val_loss
                best_val_loss_flat = val_prc_coef
                early_stopping_counter = 0

                # Save the best model
                torch.save(model.state_dict(), best_model_path)
            else:
                early_stopping_counter += 1
                print(f"Validation loss did not improve for {early_stopping_counter} epochs.")
                #model.load_state_dict(torch.load(best_model_path))


            # Early stopping
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping triggered at epoch {epoch + 1}. Loading best model.")
                # Load the best model's weights
                model.load_state_dict(torch.load(best_model_path))
                break

        test_pred = []
        test_loss = 0.0
        with torch.no_grad():
            for batch_d, batch_lbs in test_loader:
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                test_loss += loss.item()
                test_pred.extend(outputs.cpu().numpy())

        test_loss /= len(test_loader)
        test_pred = np.array(test_pred).reshape(prepared_test_labels_timesteps.shape)
        test_ground_truth = _get_ground_truth_labels(list(test_dict.values()), test_labels)
        test_pred_flat = unsplit_data_ogsize(test_pred, window_size, step_sequence, 25, test_ground_truth.shape[-1])
        test_prc_coef = _calculate_flattened_accuracy(test_pred_flat, test_ground_truth)

        print(f"Fold {fold + 1}:")
        print(f"  Validation Pearson Coefficient  acc: {1- best_val_loss}")
        print(f"  Validation Pearson Coefficient flat acc: {best_val_loss_flat}")
        print(f"  Test acc: {1- test_loss}")
        print(f"  Test Pearson Coefficient acc(flattened): {test_prc_coef}")

        fold_metrics.append({
            'Fold': fold + 1,
            'val_prc_acc': 1- best_val_loss,
            'val_prc_acc_flat': best_val_loss_flat,
            'test_acc': 1- test_loss,
            'test_prc_flat': test_prc_coef
        })


                # Log fold-specific metrics as tables
        fold_table = f"| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                     f"|------|-----------------|------------------|----------|-------------------|\n" \
                     f"| {fold + 1} | {1 - val_loss:.4f} | {val_prc_coef:.4f} | {1 - test_loss:.4f} | {test_prc_coef:.4f} |\n"
        writer.add_text(f"Fold_{fold + 1}_Metrics", fold_table)
        # Accumulate fold metrics across all folds
        train_acc_epoch.append(train_acc)
        val_acc_epoch.append(train_acc)


    
        # After all folds, compute and log the average metrics per epoch across all folds
    for epoch in range(epochs):
        avg_train_loss = np.mean([fold_losses[epoch] for fold_losses in train_acc_epoch if len(fold_losses) > epoch])
        avg_val_loss = np.mean([fold_losses[epoch] for fold_losses in val_acc_epoch if len(fold_losses) > epoch])

        # Log the averaged metrics for the epoch across all folds
        writer.add_scalar("Average_acc/train", avg_train_loss, epoch)
        writer.add_scalar("Average_acc/val", avg_val_loss, epoch)
            

    # Calculate average metrics
    avg_metrics = {key: np.mean([fold[key] for fold in fold_metrics if key != 'Fold']) for key in fold_metrics[0].keys() if key != 'Fold'}
        # Log the final average table
    avg_table = "| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                "|------|-----------------|------------------|----------|-------------------|\n" \
                f"| Average | {avg_metrics['val_prc_acc']:.4f} | {avg_metrics['val_prc_acc_flat']:.4f} | {avg_metrics['test_acc']:.4f} | {avg_metrics['test_prc_flat']:.4f} |\n"
    writer.add_text("Average_Metrics", avg_table)
    # Add average metrics to results
    avg_metrics['Fold'] = 'Average'
    fold_metrics.append(avg_metrics)

    # save averga date to CSV
    results_df = pd.DataFrame(fold_metrics)
    csv_path = os.path.join(run_dir, 'fold_results.csv')
    results_df.to_csv(csv_path, index=False)
    
    # Save fold indices CSV
    fold_indices_df.to_csv(os.path.join(run_dir, 'fold_indices.csv'), index=False)
    
    writer.close()


    print("\nTraining completed.")
    print("Average metrics across all folds:")
    for key, value in avg_metrics.items():
        print(f"  {key}: {value}")


if __name__ == "__main__":
    ## Path to data
    #path = "/home/glenn/Downloads/"
    path = "../DATA/"


    # Model parameters
    model_config = {
        "VRBModel": {
            "model" : VRBModel,
            "model_name": "facebook/hubert-large-ls960-ft",
            "hidden_units": 64,
            "n_gru": 3,
            "output_size": None  # Will be set dynamically
        },
        "Wav2Vec2ConvLSTMModel": {
            "model" : Wav2Vec2ConvLSTMModel,
            "model_name": "facebook/wav2vec2-base",
            "hidden_units": 128,
            "n_lstm": 2,
            "output_size": None  # Will be set dynamically
        }
    }
    

    
    # Train and data parameters
    epochs = 128
    batch_size = 128
    window_size = 30
    step_size = 6
    data_parts = 4 # aka folds
    early_stopping_patience = 15
    
    config = model_config["VRBModel"]
    

    ## same wav2vec2 base model and pipeline used in the paper
    processor = AutoProcessor.from_pretrained(config["model_name"])
    
    train(
        path_to_data=path+"ComParE2020_Breathing/wav/",
        path_to_labels=path+"ComParE2020_Breathing/lab/",
        window_size=window_size,
        batch_size=batch_size,
        config = config,
        step_size=step_size,
        data_parts= data_parts ,
        early_stopping_patience= early_stopping_patience,
        epochs= epochs,
        processor = processor
    )

Fold 1/4
(864, 480000)
750


Epoch 1/128: 100%|██████████| 7/7 [00:35<00:00,  5.11s/it, train_loss=1.0063]


Epoch 1/128 - Train Loss: 1.0063, Val Loss: 1.0034, Val Pearson: -0.0086
Validation loss improved from inf to 1.0034. Saving best model...


RuntimeError: [enforce fail at inline_container.cc:603] . unexpected pos 98176 vs 98064

Restarted thesis (Python 3.11.4)

In [1]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from datetime import datetime
#from torch.utils.tensorboard import SummaryWriter
import torchaudio
from typing import List, Tuple
from pt_utils import *
from pt_dataset import *
from pt_models import *
from pt_utils import *
from tensorboardX import SummaryWriter
from transformers import get_cosine_schedule_with_warmup


def create_run_directory():
    base_dir = "pt_runs_SOTA"
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir

def _calculate_flattened_accuracy(average, ground_truth_labels):
    s_acc = 0
    for b in range(len(ground_truth_labels)):
        s, _ = scipy.stats.pearsonr(average[b], ground_truth_labels[b])
        s_acc += s
    return s_acc / len(ground_truth_labels)

def _choose_real_labs_only_with_filenames(labels, filenames):
    return labels[labels['filename'].isin(filenames)]

def _get_ground_truth_labels(ground_truth_names, labels):
    ground_truth_labels = []
    for batch_name in ground_truth_names:
        ground_truth_label = _choose_real_labs_only_with_filenames(labels, [batch_name])
        ground_truth_labels.append(ground_truth_label)
    return np.array(ground_truth_labels)[:, :, -1].astype(np.float32)

def train(path_to_data, path_to_labels, window_size=16, step_size=6, data_parts=4, epochs=100, batch_size=10, early_stopping_patience=20, config = None, processor = None):
    run_dir = create_run_directory()
    log_dir = os.path.join(run_dir, "logs")
    os.makedirs(log_dir, exist_ok=True)

    # Parameters
    length_sequence = window_size 
    step_sequence = step_size

    # Load and prepare data
    train_data, train_labels, train_dict, frame_rate = load_data(path_to_data, path_to_labels, 'train')
    devel_data, devel_labels, devel_dict, frame_rate = load_data(path_to_data, path_to_labels, 'devel')
    test_data, test_labels, test_dict, frame_rate = load_data(path_to_data, path_to_labels, 'test')

    # Combine train and devel data
    all_data = np.concatenate((train_data, devel_data), axis=0)
    all_labels = pd.concat([train_labels, devel_labels])
    all_dict = np.concatenate((list(train_dict.values()), list(devel_dict.values())), axis=0)

    # Prepare data
    prepared_data, prepared_labels, prepared_labels_timesteps = prepare_data(all_data, all_labels, all_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)
    prepared_test_data, prepared_test_labels, prepared_test_labels_timesteps = prepare_data(test_data, test_labels, test_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)

    # Create CSV file for storing fold indices
    fold_indices_df = pd.DataFrame(columns=['Fold', 'Train_Indices', 'Val_Indices'])

    # Cross-validation
    kf = KFold(n_splits=data_parts)
    fold_metrics = []
    # To accumulate metrics across folds for each epoch
    train_acc_epoch = []
    val_acc_epoch = []
    test_acc_epoch = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    config["output_size"] = prepared_labels.shape[-1]
    writer = SummaryWriter(log_dir=os.path.join(log_dir,config["model_name"]))

    for fold, (train_index, val_index) in enumerate(kf.split(prepared_data)):
        print(f"Fold {fold + 1}/{data_parts}")
        best_model_path = f"{run_dir}/best_model_fold{fold+1}"
        # Save fold indices
        fold_indices_df = fold_indices_df._append({
            'Fold': fold + 1,
            'Train_Indices': train_index.tolist(),
            'Val_Indices': val_index.tolist()
        }, ignore_index=True)

        # Split data
        train_d, val_d = prepared_data[train_index], prepared_data[val_index]
        train_lbs, val_lbs = prepared_labels[train_index], prepared_labels[val_index]
        train_timesteps, val_timesteps = prepared_labels_timesteps[train_index], prepared_labels_timesteps[val_index]
        
        # Reshape data
        train_d, train_lbs = reshaping_data_for_model(train_d, train_lbs)
        val_d, val_lbs = reshaping_data_for_model(val_d, val_lbs)
        test_d, test_lbs = reshaping_data_for_model(prepared_test_data, prepared_test_labels)
        
        print(train_d.shape)

        # Create datasets
        #train_dataset = BreathingDataset(train_d, train_lbs, processor, window_size, step_sequence, augment=True)
        train_dataset = BreathingDataset(train_d, train_lbs, processor,window_size, step_sequence)
        val_dataset = BreathingDataset(val_d, val_lbs, processor, window_size, step_sequence)
        test_dataset = BreathingDataset(test_d, test_lbs, processor, window_size, step_sequence)

        # Create DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size,num_workers=2, shuffle=True, collate_fn=train_dataset.collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, collate_fn=val_dataset.collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, collate_fn=test_dataset.collate_fn)
        print(config["output_size"])
        # Create and initialize model
        model = config["model"](config).to(device)
        
        #### training optimiser parameters fror apple
        #learning_rate = 0.005 
        #optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        #### training optimiser parameters fror harma_2023 VRB model 
        learning_rate = 0.01 # From the paper
        optimizer = optim.SGD(model.parameters(), lr=learning_rate)       
        
        best_val_loss = float('inf')
        best_val_loss_flat = float('inf')
        early_stopping_counter = 0
        # To accumulate metrics across folds for each epoch
        train_acc = []
        val_acc = []
        test_acc = []
        for epoch in range(epochs):

            model.train()
            train_loss = 0.0
            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            
            for batch_d, batch_lbs in progress_bar:
                optimizer.zero_grad()
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                progress_bar.set_postfix({'train_loss': f'{train_loss/(progress_bar.n+1):.4f}'})


            train_loss /= len(train_loader)

            # Combined validation loop
            model.eval()
            val_loss = 0.0
            val_pred = []
            with torch.no_grad():
                for batch_d, batch_lbs in val_loader:
                    input_values = batch_d.to(device)
                    batch_lbs = batch_lbs.to(device)
                    
                    outputs = model(input_values)
                    loss = correlation_coefficient_loss(outputs, batch_lbs)
                    val_loss += loss.item()
                    val_pred.extend(outputs.cpu().numpy())

            val_loss /= len(val_loader)

            # Calculate validation metrics
            val_pred = np.array(val_pred).reshape(val_timesteps.shape)
            val_ground_truth = _get_ground_truth_labels([all_dict[i] for i in val_index], all_labels)
            val_pred_flat = unsplit_data_ogsize(val_pred, window_size, step_sequence, 25, val_ground_truth.shape[-1])
            val_prc_coef = _calculate_flattened_accuracy(val_pred_flat, val_ground_truth)
            
            # Accumulate metrics for this fold and epoch
            train_acc.append(1- train_loss)
            val_acc.append(1- val_loss)

            # Log metrics
            writer.add_scalar(f"Loss/train_fold_{fold + 1}", train_loss, epoch)
            writer.add_scalar(f"Loss/val_fold_{fold + 1}", val_loss, epoch)
            writer.add_scalar(f"Pearson/val_fold_{fold + 1}", val_prc_coef, epoch)

            print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Pearson: {val_prc_coef:.4f}")

            # Check if validation loss improved
            if val_loss < best_val_loss:
                print(f"Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving best model...")
                best_val_loss = val_loss
                best_val_loss_flat = val_prc_coef
                early_stopping_counter = 0

                # Save the best model
                torch.save(model.state_dict(), best_model_path)
            else:
                early_stopping_counter += 1
                print(f"Validation loss did not improve for {early_stopping_counter} epochs.")
                #model.load_state_dict(torch.load(best_model_path))


            # Early stopping
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping triggered at epoch {epoch + 1}. Loading best model.")
                # Load the best model's weights
                model.load_state_dict(torch.load(best_model_path))
                break

        test_pred = []
        test_loss = 0.0
        with torch.no_grad():
            for batch_d, batch_lbs in test_loader:
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                test_loss += loss.item()
                test_pred.extend(outputs.cpu().numpy())

        test_loss /= len(test_loader)
        test_pred = np.array(test_pred).reshape(prepared_test_labels_timesteps.shape)
        test_ground_truth = _get_ground_truth_labels(list(test_dict.values()), test_labels)
        test_pred_flat = unsplit_data_ogsize(test_pred, window_size, step_sequence, 25, test_ground_truth.shape[-1])
        test_prc_coef = _calculate_flattened_accuracy(test_pred_flat, test_ground_truth)

        print(f"Fold {fold + 1}:")
        print(f"  Validation Pearson Coefficient  acc: {1- best_val_loss}")
        print(f"  Validation Pearson Coefficient flat acc: {best_val_loss_flat}")
        print(f"  Test acc: {1- test_loss}")
        print(f"  Test Pearson Coefficient acc(flattened): {test_prc_coef}")

        fold_metrics.append({
            'Fold': fold + 1,
            'val_prc_acc': 1- best_val_loss,
            'val_prc_acc_flat': best_val_loss_flat,
            'test_acc': 1- test_loss,
            'test_prc_flat': test_prc_coef
        })


                # Log fold-specific metrics as tables
        fold_table = f"| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                     f"|------|-----------------|------------------|----------|-------------------|\n" \
                     f"| {fold + 1} | {1 - val_loss:.4f} | {val_prc_coef:.4f} | {1 - test_loss:.4f} | {test_prc_coef:.4f} |\n"
        writer.add_text(f"Fold_{fold + 1}_Metrics", fold_table)
        # Accumulate fold metrics across all folds
        train_acc_epoch.append(train_acc)
        val_acc_epoch.append(train_acc)


    
        # After all folds, compute and log the average metrics per epoch across all folds
    for epoch in range(epochs):
        avg_train_loss = np.mean([fold_losses[epoch] for fold_losses in train_acc_epoch if len(fold_losses) > epoch])
        avg_val_loss = np.mean([fold_losses[epoch] for fold_losses in val_acc_epoch if len(fold_losses) > epoch])

        # Log the averaged metrics for the epoch across all folds
        writer.add_scalar("Average_acc/train", avg_train_loss, epoch)
        writer.add_scalar("Average_acc/val", avg_val_loss, epoch)
            

    # Calculate average metrics
    avg_metrics = {key: np.mean([fold[key] for fold in fold_metrics if key != 'Fold']) for key in fold_metrics[0].keys() if key != 'Fold'}
        # Log the final average table
    avg_table = "| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                "|------|-----------------|------------------|----------|-------------------|\n" \
                f"| Average | {avg_metrics['val_prc_acc']:.4f} | {avg_metrics['val_prc_acc_flat']:.4f} | {avg_metrics['test_acc']:.4f} | {avg_metrics['test_prc_flat']:.4f} |\n"
    writer.add_text("Average_Metrics", avg_table)
    # Add average metrics to results
    avg_metrics['Fold'] = 'Average'
    fold_metrics.append(avg_metrics)

    # save averga date to CSV
    results_df = pd.DataFrame(fold_metrics)
    csv_path = os.path.join(run_dir, 'fold_results.csv')
    results_df.to_csv(csv_path, index=False)
    
    # Save fold indices CSV
    fold_indices_df.to_csv(os.path.join(run_dir, 'fold_indices.csv'), index=False)
    
    writer.close()


    print("\nTraining completed.")
    print("Average metrics across all folds:")
    for key, value in avg_metrics.items():
        print(f"  {key}: {value}")


if __name__ == "__main__":
    ## Path to data
    #path = "/home/glenn/Downloads/"
    path = "../DATA/"


    # Model parameters
    model_config = {
        "VRBModel": {
            "model" : VRBModel,
            "model_name": "facebook/hubert-large-ls960-ft",
            "hidden_units": 64,
            "n_gru": 3,
            "output_size": None  # Will be set dynamically
        },
        "Wav2Vec2ConvLSTMModel": {
            "model" : Wav2Vec2ConvLSTMModel,
            "model_name": "facebook/wav2vec2-base",
            "hidden_units": 128,
            "n_lstm": 2,
            "output_size": None  # Will be set dynamically
        }
    }
    

    
    # Train and data parameters
    epochs = 128
    batch_size = 128
    window_size = 30
    step_size = 6
    data_parts = 4 # aka folds
    early_stopping_patience = 3
    
    config = model_config["VRBModel"]
    

    ## same wav2vec2 base model and pipeline used in the paper
    processor = AutoProcessor.from_pretrained(config["model_name"])
    
    train(
        path_to_data=path+"ComParE2020_Breathing/wav/",
        path_to_labels=path+"ComParE2020_Breathing/lab/",
        window_size=window_size,
        batch_size=batch_size,
        config = config,
        step_size=step_size,
        data_parts= data_parts ,
        early_stopping_patience= early_stopping_patience,
        epochs= epochs,
        processor = processor
    )

Fold 1/4
(864, 480000)
750


Epoch 1/128: 100%|██████████| 7/7 [00:37<00:00,  5.30s/it, train_loss=1.0028]


Epoch 1/128 - Train Loss: 1.0028, Val Loss: 1.0063, Val Pearson: -0.0148
Validation loss improved from inf to 1.0063. Saving best model...


Epoch 2/128: 100%|██████████| 7/7 [00:34<00:00,  4.87s/it, train_loss=0.9925]


Epoch 2/128 - Train Loss: 0.9925, Val Loss: 1.0088, Val Pearson: -0.0156
Validation loss did not improve for 1 epochs.


Epoch 3/128: 100%|██████████| 7/7 [00:35<00:00,  5.02s/it, train_loss=0.9882]


Epoch 3/128 - Train Loss: 0.9882, Val Loss: 1.0104, Val Pearson: -0.0165
Validation loss did not improve for 2 epochs.


Epoch 4/128: 100%|██████████| 7/7 [00:36<00:00,  5.16s/it, train_loss=0.9850]


Epoch 4/128 - Train Loss: 0.9850, Val Loss: 1.0110, Val Pearson: -0.0160
Validation loss did not improve for 3 epochs.
Early stopping triggered at epoch 4. Loading best model.


  model.load_state_dict(torch.load(best_model_path))


Fold 1:
  Validation Pearson Coefficient  acc: -0.0062912702560424805
  Validation Pearson Coefficient flat acc: -0.014798802708906985
  Test acc: -0.009982633590698287
  Test Pearson Coefficient acc(flattened): -0.01610076019429338
Fold 2/4
(900, 480000)
750


Epoch 1/128:  25%|██▌       | 2/8 [00:12<00:35,  5.99s/it, train_loss=1.0000]

Restarted thesis (Python 3.11.4)

In [1]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from datetime import datetime
#from torch.utils.tensorboard import SummaryWriter
import torchaudio
from typing import List, Tuple
from pt_utils import *
from pt_dataset import *
from pt_models import *
from pt_utils import *
from tensorboardX import SummaryWriter
from transformers import get_cosine_schedule_with_warmup


def create_run_directory():
    base_dir = "pt_runs_SOTA"
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir

def _calculate_flattened_accuracy(average, ground_truth_labels):
    s_acc = 0
    for b in range(len(ground_truth_labels)):
        s, _ = scipy.stats.pearsonr(average[b], ground_truth_labels[b])
        s_acc += s
    return s_acc / len(ground_truth_labels)

def _choose_real_labs_only_with_filenames(labels, filenames):
    return labels[labels['filename'].isin(filenames)]

def _get_ground_truth_labels(ground_truth_names, labels):
    ground_truth_labels = []
    for batch_name in ground_truth_names:
        ground_truth_label = _choose_real_labs_only_with_filenames(labels, [batch_name])
        ground_truth_labels.append(ground_truth_label)
    return np.array(ground_truth_labels)[:, :, -1].astype(np.float32)

def train(path_to_data, path_to_labels, window_size=16, step_size=6, data_parts=4, epochs=100, batch_size=10, early_stopping_patience=20, config = None, processor = None):
    run_dir = create_run_directory()
    log_dir = os.path.join(run_dir, "logs")
    os.makedirs(log_dir, exist_ok=True)

    # Parameters
    length_sequence = window_size 
    step_sequence = step_size

    # Load and prepare data
    train_data, train_labels, train_dict, frame_rate = load_data(path_to_data, path_to_labels, 'train')
    devel_data, devel_labels, devel_dict, frame_rate = load_data(path_to_data, path_to_labels, 'devel')
    test_data, test_labels, test_dict, frame_rate = load_data(path_to_data, path_to_labels, 'test')

    # Combine train and devel data
    all_data = np.concatenate((train_data, devel_data), axis=0)
    all_labels = pd.concat([train_labels, devel_labels])
    all_dict = np.concatenate((list(train_dict.values()), list(devel_dict.values())), axis=0)

    # Prepare data
    prepared_data, prepared_labels, prepared_labels_timesteps = prepare_data(all_data, all_labels, all_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)
    prepared_test_data, prepared_test_labels, prepared_test_labels_timesteps = prepare_data(test_data, test_labels, test_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)

    # Create CSV file for storing fold indices
    fold_indices_df = pd.DataFrame(columns=['Fold', 'Train_Indices', 'Val_Indices'])

    # Cross-validation
    kf = KFold(n_splits=data_parts)
    fold_metrics = []
    # To accumulate metrics across folds for each epoch
    train_acc_epoch = []
    val_acc_epoch = []
    test_acc_epoch = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    config["output_size"] = prepared_labels.shape[-1]
    writer = SummaryWriter(log_dir=os.path.join(log_dir,config["model_name"]))

    for fold, (train_index, val_index) in enumerate(kf.split(prepared_data)):
        print(f"Fold {fold + 1}/{data_parts}")
        best_model_path = f"{run_dir}/best_model_fold{fold+1}"
        # Save fold indices
        fold_indices_df = fold_indices_df._append({
            'Fold': fold + 1,
            'Train_Indices': train_index.tolist(),
            'Val_Indices': val_index.tolist()
        }, ignore_index=True)

        # Split data
        train_d, val_d = prepared_data[train_index], prepared_data[val_index]
        train_lbs, val_lbs = prepared_labels[train_index], prepared_labels[val_index]
        train_timesteps, val_timesteps = prepared_labels_timesteps[train_index], prepared_labels_timesteps[val_index]
        
        # Reshape data
        train_d, train_lbs = reshaping_data_for_model(train_d, train_lbs)
        val_d, val_lbs = reshaping_data_for_model(val_d, val_lbs)
        test_d, test_lbs = reshaping_data_for_model(prepared_test_data, prepared_test_labels)
        
        print(train_d.shape)

        # Create datasets
        #train_dataset = BreathingDataset(train_d, train_lbs, processor, window_size, step_sequence, augment=True)
        train_dataset = BreathingDataset(train_d, train_lbs, processor,window_size, step_sequence)
        val_dataset = BreathingDataset(val_d, val_lbs, processor, window_size, step_sequence)
        test_dataset = BreathingDataset(test_d, test_lbs, processor, window_size, step_sequence)

        # Create DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size,num_workers=2, shuffle=True, collate_fn=train_dataset.collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, collate_fn=val_dataset.collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, collate_fn=test_dataset.collate_fn)
        print(config["output_size"])
        # Create and initialize model
        model = config["model"](config).to(device)
        
        #### training optimiser parameters fror apple
        #learning_rate = 0.005 
        #optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        #### training optimiser parameters fror harma_2023 VRB model 
        learning_rate = 0.01 # From the paper
        optimizer = optim.SGD(model.parameters(), lr=learning_rate)       
        
        best_val_loss = float('inf')
        best_val_loss_flat = float('inf')
        early_stopping_counter = 0
        # To accumulate metrics across folds for each epoch
        train_acc = []
        val_acc = []
        test_acc = []
        for epoch in range(epochs):

            model.train()
            train_loss = 0.0
            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            
            for batch_d, batch_lbs in progress_bar:
                optimizer.zero_grad()
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                progress_bar.set_postfix({'train_loss': f'{train_loss/(progress_bar.n+1):.4f}'})


            train_loss /= len(train_loader)

            # Combined validation loop
            model.eval()
            val_loss = 0.0
            val_pred = []
            with torch.no_grad():
                for batch_d, batch_lbs in val_loader:
                    input_values = batch_d.to(device)
                    batch_lbs = batch_lbs.to(device)
                    
                    outputs = model(input_values)
                    loss = correlation_coefficient_loss(outputs, batch_lbs)
                    val_loss += loss.item()
                    val_pred.extend(outputs.cpu().numpy())

            val_loss /= len(val_loader)

            # Calculate validation metrics
            val_pred = np.array(val_pred).reshape(val_timesteps.shape)
            val_ground_truth = _get_ground_truth_labels([all_dict[i] for i in val_index], all_labels)
            val_pred_flat = unsplit_data_ogsize(val_pred, window_size, step_sequence, 25, val_ground_truth.shape[-1])
            val_prc_coef = _calculate_flattened_accuracy(val_pred_flat, val_ground_truth)
            
            # Accumulate metrics for this fold and epoch
            train_acc.append(1- train_loss)
            val_acc.append(1- val_loss)

            # Log metrics
            writer.add_scalar(f"Loss/train_fold_{fold + 1}", train_loss, epoch)
            writer.add_scalar(f"Loss/val_fold_{fold + 1}", val_loss, epoch)
            writer.add_scalar(f"Pearson/val_fold_{fold + 1}", val_prc_coef, epoch)

            print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Pearson: {val_prc_coef:.4f}")

            # Check if validation loss improved
            if val_loss < best_val_loss:
                print(f"Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving best model...")
                best_val_loss = val_loss
                best_val_loss_flat = val_prc_coef
                early_stopping_counter = 0

                # Save the best model
                torch.save(model.state_dict(), best_model_path)
            else:
                early_stopping_counter += 1
                print(f"Validation loss did not improve for {early_stopping_counter} epochs.")
                #model.load_state_dict(torch.load(best_model_path))


            # Early stopping
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping triggered at epoch {epoch + 1}. Loading best model.")
                # Load the best model's weights
                model.load_state_dict(torch.load(best_model_path))
                break

        test_pred = []
        test_loss = 0.0
        with torch.no_grad():
            for batch_d, batch_lbs in test_loader:
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                test_loss += loss.item()
                test_pred.extend(outputs.cpu().numpy())

        test_loss /= len(test_loader)
        test_pred = np.array(test_pred).reshape(prepared_test_labels_timesteps.shape)
        test_ground_truth = _get_ground_truth_labels(list(test_dict.values()), test_labels)
        test_pred_flat = unsplit_data_ogsize(test_pred, window_size, step_sequence, 25, test_ground_truth.shape[-1])
        test_prc_coef = _calculate_flattened_accuracy(test_pred_flat, test_ground_truth)

        print(f"Fold {fold + 1}:")
        print(f"  Validation Pearson Coefficient  acc: {1- best_val_loss}")
        print(f"  Validation Pearson Coefficient flat acc: {best_val_loss_flat}")
        print(f"  Test acc: {1- test_loss}")
        print(f"  Test Pearson Coefficient acc(flattened): {test_prc_coef}")

        fold_metrics.append({
            'Fold': fold + 1,
            'val_prc_acc': 1- best_val_loss,
            'val_prc_acc_flat': best_val_loss_flat,
            'test_acc': 1- test_loss,
            'test_prc_flat': test_prc_coef
        })


                # Log fold-specific metrics as tables
        fold_table = f"| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                     f"|------|-----------------|------------------|----------|-------------------|\n" \
                     f"| {fold + 1} | {1 - val_loss:.4f} | {val_prc_coef:.4f} | {1 - test_loss:.4f} | {test_prc_coef:.4f} |\n"
        writer.add_text(f"Fold_{fold + 1}_Metrics", fold_table)
        # Accumulate fold metrics across all folds
        train_acc_epoch.append(train_acc)
        val_acc_epoch.append(train_acc)


    
        # After all folds, compute and log the average metrics per epoch across all folds
    for epoch in range(epochs):
        avg_train_loss = np.mean([fold_losses[epoch] for fold_losses in train_acc_epoch if len(fold_losses) > epoch])
        avg_val_loss = np.mean([fold_losses[epoch] for fold_losses in val_acc_epoch if len(fold_losses) > epoch])

        # Log the averaged metrics for the epoch across all folds
        writer.add_scalar("Average_acc/train", avg_train_loss, epoch)
        writer.add_scalar("Average_acc/val", avg_val_loss, epoch)
            

    # Calculate average metrics
    avg_metrics = {key: np.mean([fold[key] for fold in fold_metrics if key != 'Fold']) for key in fold_metrics[0].keys() if key != 'Fold'}
        # Log the final average table
    avg_table = "| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                "|------|-----------------|------------------|----------|-------------------|\n" \
                f"| Average | {avg_metrics['val_prc_acc']:.4f} | {avg_metrics['val_prc_acc_flat']:.4f} | {avg_metrics['test_acc']:.4f} | {avg_metrics['test_prc_flat']:.4f} |\n"
    writer.add_text("Average_Metrics", avg_table)
    # Add average metrics to results
    avg_metrics['Fold'] = 'Average'
    fold_metrics.append(avg_metrics)

    # save averga date to CSV
    results_df = pd.DataFrame(fold_metrics)
    csv_path = os.path.join(run_dir, 'fold_results.csv')
    results_df.to_csv(csv_path, index=False)
    
    # Save fold indices CSV
    fold_indices_df.to_csv(os.path.join(run_dir, 'fold_indices.csv'), index=False)
    
    writer.close()


    print("\nTraining completed.")
    print("Average metrics across all folds:")
    for key, value in avg_metrics.items():
        print(f"  {key}: {value}")


if __name__ == "__main__":
    ## Path to data
    #path = "/home/glenn/Downloads/"
    path = "../DATA/"


    # Model parameters
    model_config = {
        "VRBModel": {
            "model" : VRBModel,
            "model_name": "facebook/hubert-large-ls960-ft",
            "hidden_units": 64,
            "n_gru": 3,
            "output_size": None  # Will be set dynamically
        },
        "Wav2Vec2ConvLSTMModel": {
            "model" : Wav2Vec2ConvLSTMModel,
            "model_name": "facebook/wav2vec2-base",
            "hidden_units": 128,
            "n_lstm": 2,
            "output_size": None  # Will be set dynamically
        }
    }
    

    
    # Train and data parameters
    epochs = 128
    batch_size = 128
    window_size = 30
    step_size = 6
    data_parts = 4 # aka folds
    early_stopping_patience = 10
    
    config = model_config["VRBModel"]
    

    ## same wav2vec2 base model and pipeline used in the paper
    processor = AutoProcessor.from_pretrained(config["model_name"])
    
    train(
        path_to_data=path+"ComParE2020_Breathing/wav/",
        path_to_labels=path+"ComParE2020_Breathing/lab/",
        window_size=window_size,
        batch_size=batch_size,
        config = config,
        step_size=step_size,
        data_parts= data_parts ,
        early_stopping_patience= early_stopping_patience,
        epochs= epochs,
        processor = processor
    )

Fold 1/4
(864, 480000)
750


Epoch 1/128: 100%|██████████| 7/7 [00:33<00:00,  4.85s/it, train_loss=1.0058]


Epoch 1/128 - Train Loss: 1.0058, Val Loss: 1.0051, Val Pearson: -0.0082
Validation loss improved from inf to 1.0051. Saving best model...


Epoch 2/128: 100%|██████████| 7/7 [00:30<00:00,  4.31s/it, train_loss=0.9950]


Epoch 2/128 - Train Loss: 0.9950, Val Loss: 1.0087, Val Pearson: -0.0122
Validation loss did not improve for 1 epochs.


Epoch 3/128: 100%|██████████| 7/7 [00:30<00:00,  4.42s/it, train_loss=0.9901]


Epoch 3/128 - Train Loss: 0.9901, Val Loss: 1.0102, Val Pearson: -0.0132
Validation loss did not improve for 2 epochs.


Epoch 4/128: 100%|██████████| 7/7 [00:31<00:00,  4.46s/it, train_loss=0.9857]


Epoch 4/128 - Train Loss: 0.9857, Val Loss: 1.0108, Val Pearson: -0.0135
Validation loss did not improve for 3 epochs.


Epoch 5/128: 100%|██████████| 7/7 [00:28<00:00,  4.10s/it, train_loss=0.9881]


ValueError: array must not contain infs or NaNs

Restarted thesis (Python 3.11.4)

In [1]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from datetime import datetime
#from torch.utils.tensorboard import SummaryWriter
import torchaudio
from typing import List, Tuple
from pt_utils import *
from pt_dataset import *
from pt_models import *
from pt_utils import *
from tensorboardX import SummaryWriter
from transformers import get_cosine_schedule_with_warmup


def create_run_directory():
    base_dir = "pt_runs_SOTA"
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir

def _calculate_flattened_accuracy(average, ground_truth_labels):
    s_acc = 0
    for b in range(len(ground_truth_labels)):
        s, _ = scipy.stats.pearsonr(average[b], ground_truth_labels[b])
        s_acc += s
    return s_acc / len(ground_truth_labels)

def _choose_real_labs_only_with_filenames(labels, filenames):
    return labels[labels['filename'].isin(filenames)]

def _get_ground_truth_labels(ground_truth_names, labels):
    ground_truth_labels = []
    for batch_name in ground_truth_names:
        ground_truth_label = _choose_real_labs_only_with_filenames(labels, [batch_name])
        ground_truth_labels.append(ground_truth_label)
    return np.array(ground_truth_labels)[:, :, -1].astype(np.float32)

def train(path_to_data, path_to_labels, window_size=16, step_size=6, data_parts=4, epochs=100, batch_size=10, early_stopping_patience=20, config = None, processor = None):
    run_dir = create_run_directory()
    log_dir = os.path.join(run_dir, "logs")
    os.makedirs(log_dir, exist_ok=True)

    # Parameters
    length_sequence = window_size 
    step_sequence = step_size

    # Load and prepare data
    train_data, train_labels, train_dict, frame_rate = load_data(path_to_data, path_to_labels, 'train')
    devel_data, devel_labels, devel_dict, frame_rate = load_data(path_to_data, path_to_labels, 'devel')
    test_data, test_labels, test_dict, frame_rate = load_data(path_to_data, path_to_labels, 'test')

    # Combine train and devel data
    all_data = np.concatenate((train_data, devel_data), axis=0)
    all_labels = pd.concat([train_labels, devel_labels])
    all_dict = np.concatenate((list(train_dict.values()), list(devel_dict.values())), axis=0)

    # Prepare data
    prepared_data, prepared_labels, prepared_labels_timesteps = prepare_data(all_data, all_labels, all_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)
    prepared_test_data, prepared_test_labels, prepared_test_labels_timesteps = prepare_data(test_data, test_labels, test_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)

    # Create CSV file for storing fold indices
    fold_indices_df = pd.DataFrame(columns=['Fold', 'Train_Indices', 'Val_Indices'])

    # Cross-validation
    kf = KFold(n_splits=data_parts)
    fold_metrics = []
    # To accumulate metrics across folds for each epoch
    train_acc_epoch = []
    val_acc_epoch = []
    test_acc_epoch = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    config["output_size"] = prepared_labels.shape[-1]
    writer = SummaryWriter(log_dir=os.path.join(log_dir,config["model_name"]))

    for fold, (train_index, val_index) in enumerate(kf.split(prepared_data)):
        print(f"Fold {fold + 1}/{data_parts}")
        best_model_path = f"{run_dir}/best_model_fold{fold+1}"
        # Save fold indices
        fold_indices_df = fold_indices_df._append({
            'Fold': fold + 1,
            'Train_Indices': train_index.tolist(),
            'Val_Indices': val_index.tolist()
        }, ignore_index=True)

        # Split data
        train_d, val_d = prepared_data[train_index], prepared_data[val_index]
        train_lbs, val_lbs = prepared_labels[train_index], prepared_labels[val_index]
        train_timesteps, val_timesteps = prepared_labels_timesteps[train_index], prepared_labels_timesteps[val_index]
        
        # Reshape data
        train_d, train_lbs = reshaping_data_for_model(train_d, train_lbs)
        val_d, val_lbs = reshaping_data_for_model(val_d, val_lbs)
        test_d, test_lbs = reshaping_data_for_model(prepared_test_data, prepared_test_labels)
        
        print(train_d.shape)

        # Create datasets
        #train_dataset = BreathingDataset(train_d, train_lbs, processor, window_size, step_sequence, augment=True)
        train_dataset = BreathingDataset(train_d, train_lbs, processor,window_size, step_sequence)
        val_dataset = BreathingDataset(val_d, val_lbs, processor, window_size, step_sequence)
        test_dataset = BreathingDataset(test_d, test_lbs, processor, window_size, step_sequence)

        # Create DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size,num_workers=2, shuffle=True, collate_fn=train_dataset.collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, collate_fn=val_dataset.collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, collate_fn=test_dataset.collate_fn)
        print(config["output_size"])
        # Create and initialize model
        model = config["model"](config).to(device)
        
        #### training optimiser parameters fror apple
        #learning_rate = 0.005 
        #optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        #### training optimiser parameters fror harma_2023 VRB model 
        learning_rate = 0.01 # From the paper
        optimizer = optim.SGD(model.parameters(), lr=learning_rate)       
        
        best_val_loss = float('inf')
        best_val_loss_flat = float('inf')
        early_stopping_counter = 0
        # To accumulate metrics across folds for each epoch
        train_acc = []
        val_acc = []
        test_acc = []
        for epoch in range(epochs):

            model.train()
            train_loss = 0.0
            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            
            for batch_d, batch_lbs in progress_bar:
                optimizer.zero_grad()
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                progress_bar.set_postfix({'train_loss': f'{train_loss/(progress_bar.n+1):.4f}'})


            train_loss /= len(train_loader)

            # Combined validation loop
            model.eval()
            val_loss = 0.0
            val_pred = []
            with torch.no_grad():
                for batch_d, batch_lbs in val_loader:
                    input_values = batch_d.to(device)
                    batch_lbs = batch_lbs.to(device)
                    
                    outputs = model(input_values)
                    loss = correlation_coefficient_loss(outputs, batch_lbs)
                    val_loss += loss.item()
                    val_pred.extend(outputs.cpu().numpy())

            val_loss /= len(val_loader)

            # Calculate validation metrics
            val_pred = np.array(val_pred).reshape(val_timesteps.shape)
            val_ground_truth = _get_ground_truth_labels([all_dict[i] for i in val_index], all_labels)
            val_pred_flat = unsplit_data_ogsize(val_pred, window_size, step_sequence, 25, val_ground_truth.shape[-1])
            val_prc_coef = _calculate_flattened_accuracy(val_pred_flat, val_ground_truth)
            
            # Accumulate metrics for this fold and epoch
            train_acc.append(1- train_loss)
            val_acc.append(1- val_loss)

            # Log metrics
            writer.add_scalar(f"Loss/train_fold_{fold + 1}", train_loss, epoch)
            writer.add_scalar(f"Loss/val_fold_{fold + 1}", val_loss, epoch)
            writer.add_scalar(f"Pearson/val_fold_{fold + 1}", val_prc_coef, epoch)

            print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Pearson: {val_prc_coef:.4f}")

            # Check if validation loss improved
            if val_loss < best_val_loss:
                print(f"Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving best model...")
                best_val_loss = val_loss
                best_val_loss_flat = val_prc_coef
                early_stopping_counter = 0

                # Save the best model
                torch.save(model.state_dict(), best_model_path)
            else:
                early_stopping_counter += 1
                print(f"Validation loss did not improve for {early_stopping_counter} epochs.")
                #model.load_state_dict(torch.load(best_model_path))


            # Early stopping
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping triggered at epoch {epoch + 1}. Loading best model.")
                # Load the best model's weights
                model.load_state_dict(torch.load(best_model_path))
                break

        test_pred = []
        test_loss = 0.0
        with torch.no_grad():
            for batch_d, batch_lbs in test_loader:
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                test_loss += loss.item()
                test_pred.extend(outputs.cpu().numpy())

        test_loss /= len(test_loader)
        test_pred = np.array(test_pred).reshape(prepared_test_labels_timesteps.shape)
        test_ground_truth = _get_ground_truth_labels(list(test_dict.values()), test_labels)
        test_pred_flat = unsplit_data_ogsize(test_pred, window_size, step_sequence, 25, test_ground_truth.shape[-1])
        test_prc_coef = _calculate_flattened_accuracy(test_pred_flat, test_ground_truth)

        print(f"Fold {fold + 1}:")
        print(f"  Validation Pearson Coefficient  acc: {1- best_val_loss}")
        print(f"  Validation Pearson Coefficient flat acc: {best_val_loss_flat}")
        print(f"  Test acc: {1- test_loss}")
        print(f"  Test Pearson Coefficient acc(flattened): {test_prc_coef}")

        fold_metrics.append({
            'Fold': fold + 1,
            'val_prc_acc': 1- best_val_loss,
            'val_prc_acc_flat': best_val_loss_flat,
            'test_acc': 1- test_loss,
            'test_prc_flat': test_prc_coef
        })


                # Log fold-specific metrics as tables
        fold_table = f"| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                     f"|------|-----------------|------------------|----------|-------------------|\n" \
                     f"| {fold + 1} | {1 - val_loss:.4f} | {val_prc_coef:.4f} | {1 - test_loss:.4f} | {test_prc_coef:.4f} |\n"
        writer.add_text(f"Fold_{fold + 1}_Metrics", fold_table)
        # Accumulate fold metrics across all folds
        train_acc_epoch.append(train_acc)
        val_acc_epoch.append(train_acc)


    
        # After all folds, compute and log the average metrics per epoch across all folds
    for epoch in range(epochs):
        avg_train_loss = np.mean([fold_losses[epoch] for fold_losses in train_acc_epoch if len(fold_losses) > epoch])
        avg_val_loss = np.mean([fold_losses[epoch] for fold_losses in val_acc_epoch if len(fold_losses) > epoch])

        # Log the averaged metrics for the epoch across all folds
        writer.add_scalar("Average_acc/train", avg_train_loss, epoch)
        writer.add_scalar("Average_acc/val", avg_val_loss, epoch)
            

    # Calculate average metrics
    avg_metrics = {key: np.mean([fold[key] for fold in fold_metrics if key != 'Fold']) for key in fold_metrics[0].keys() if key != 'Fold'}
        # Log the final average table
    avg_table = "| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                "|------|-----------------|------------------|----------|-------------------|\n" \
                f"| Average | {avg_metrics['val_prc_acc']:.4f} | {avg_metrics['val_prc_acc_flat']:.4f} | {avg_metrics['test_acc']:.4f} | {avg_metrics['test_prc_flat']:.4f} |\n"
    writer.add_text("Average_Metrics", avg_table)
    # Add average metrics to results
    avg_metrics['Fold'] = 'Average'
    fold_metrics.append(avg_metrics)

    # save averga date to CSV
    results_df = pd.DataFrame(fold_metrics)
    csv_path = os.path.join(run_dir, 'fold_results.csv')
    results_df.to_csv(csv_path, index=False)
    
    # Save fold indices CSV
    fold_indices_df.to_csv(os.path.join(run_dir, 'fold_indices.csv'), index=False)
    
    writer.close()


    print("\nTraining completed.")
    print("Average metrics across all folds:")
    for key, value in avg_metrics.items():
        print(f"  {key}: {value}")


if __name__ == "__main__":
    ## Path to data
    #path = "/home/glenn/Downloads/"
    path = "../DATA/"


    # Model parameters
    model_config = {
        "VRBModel": {
            "model" : VRBModel,
            "model_name": "facebook/hubert-large-ls960-ft",
            "hidden_units": 64,
            "n_gru": 3,
            "output_size": None  # Will be set dynamically
        },
        "Wav2Vec2ConvLSTMModel": {
            "model" : Wav2Vec2ConvLSTMModel,
            "model_name": "facebook/wav2vec2-base",
            "hidden_units": 128,
            "n_lstm": 2,
            "output_size": None  # Will be set dynamically
        }
    }
    

    
    # Train and data parameters
    epochs = 128
    batch_size = 64
    window_size = 30
    step_size = 6
    data_parts = 4 # aka folds
    early_stopping_patience = 10
    
    config = model_config["VRBModel"]
    

    ## same wav2vec2 base model and pipeline used in the paper
    processor = AutoProcessor.from_pretrained(config["model_name"])
    
    train(
        path_to_data=path+"ComParE2020_Breathing/wav/",
        path_to_labels=path+"ComParE2020_Breathing/lab/",
        window_size=window_size,
        batch_size=batch_size,
        config = config,
        step_size=step_size,
        data_parts= data_parts ,
        early_stopping_patience= early_stopping_patience,
        epochs= epochs,
        processor = processor
    )

Fold 1/4
(864, 480000)
750


Epoch 1/128: 100%|██████████| 14/14 [00:30<00:00,  2.16s/it, train_loss=1.0073]


Epoch 1/128 - Train Loss: 1.0073, Val Loss: 0.9901, Val Pearson: -0.0009
Validation loss improved from inf to 0.9901. Saving best model...


Epoch 2/128: 100%|██████████| 14/14 [00:29<00:00,  2.13s/it, train_loss=0.9927]


Restarted thesis (Python 3.11.4)

In [1]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from datetime import datetime
#from torch.utils.tensorboard import SummaryWriter
import torchaudio
from typing import List, Tuple
from pt_utils import *
from pt_dataset import *
from pt_models import *
from pt_utils import *
from tensorboardX import SummaryWriter
from transformers import get_cosine_schedule_with_warmup


def create_run_directory():
    base_dir = "pt_runs_SOTA"
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir

def _calculate_flattened_accuracy(average, ground_truth_labels):
    s_acc = 0
    for b in range(len(ground_truth_labels)):
        s, _ = scipy.stats.pearsonr(average[b], ground_truth_labels[b])
        s_acc += s
    return s_acc / len(ground_truth_labels)

def _choose_real_labs_only_with_filenames(labels, filenames):
    return labels[labels['filename'].isin(filenames)]

def _get_ground_truth_labels(ground_truth_names, labels):
    ground_truth_labels = []
    for batch_name in ground_truth_names:
        ground_truth_label = _choose_real_labs_only_with_filenames(labels, [batch_name])
        ground_truth_labels.append(ground_truth_label)
    return np.array(ground_truth_labels)[:, :, -1].astype(np.float32)

def train(path_to_data, path_to_labels, window_size=16, step_size=6, data_parts=4, epochs=100, batch_size=10, early_stopping_patience=20, config = None, processor = None):
    run_dir = create_run_directory()
    log_dir = os.path.join(run_dir, "logs")
    os.makedirs(log_dir, exist_ok=True)

    # Parameters
    length_sequence = window_size 
    step_sequence = step_size

    # Load and prepare data
    train_data, train_labels, train_dict, frame_rate = load_data(path_to_data, path_to_labels, 'train')
    devel_data, devel_labels, devel_dict, frame_rate = load_data(path_to_data, path_to_labels, 'devel')
    test_data, test_labels, test_dict, frame_rate = load_data(path_to_data, path_to_labels, 'test')

    # Combine train and devel data
    all_data = np.concatenate((train_data, devel_data), axis=0)
    all_labels = pd.concat([train_labels, devel_labels])
    all_dict = np.concatenate((list(train_dict.values()), list(devel_dict.values())), axis=0)

    # Prepare data
    prepared_data, prepared_labels, prepared_labels_timesteps = prepare_data(all_data, all_labels, all_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)
    prepared_test_data, prepared_test_labels, prepared_test_labels_timesteps = prepare_data(test_data, test_labels, test_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)

    # Create CSV file for storing fold indices
    fold_indices_df = pd.DataFrame(columns=['Fold', 'Train_Indices', 'Val_Indices'])

    # Cross-validation
    kf = KFold(n_splits=data_parts)
    fold_metrics = []
    # To accumulate metrics across folds for each epoch
    train_acc_epoch = []
    val_acc_epoch = []
    test_acc_epoch = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    config["output_size"] = prepared_labels.shape[-1]
    writer = SummaryWriter(log_dir=os.path.join(log_dir,config["model_name"]))

    for fold, (train_index, val_index) in enumerate(kf.split(prepared_data)):
        print(f"Fold {fold + 1}/{data_parts}")
        best_model_path = f"{run_dir}/best_model_fold{fold+1}"
        # Save fold indices
        fold_indices_df = fold_indices_df._append({
            'Fold': fold + 1,
            'Train_Indices': train_index.tolist(),
            'Val_Indices': val_index.tolist()
        }, ignore_index=True)

        # Split data
        train_d, val_d = prepared_data[train_index], prepared_data[val_index]
        train_lbs, val_lbs = prepared_labels[train_index], prepared_labels[val_index]
        train_timesteps, val_timesteps = prepared_labels_timesteps[train_index], prepared_labels_timesteps[val_index]
        
        # Reshape data
        train_d, train_lbs = reshaping_data_for_model(train_d, train_lbs)
        val_d, val_lbs = reshaping_data_for_model(val_d, val_lbs)
        test_d, test_lbs = reshaping_data_for_model(prepared_test_data, prepared_test_labels)
        
        print(train_d.shape)

        # Create datasets
        #train_dataset = BreathingDataset(train_d, train_lbs, processor, window_size, step_sequence, augment=True)
        train_dataset = BreathingDataset(train_d, train_lbs, processor,window_size, step_sequence)
        val_dataset = BreathingDataset(val_d, val_lbs, processor, window_size, step_sequence)
        test_dataset = BreathingDataset(test_d, test_lbs, processor, window_size, step_sequence)

        # Create DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size,num_workers=2, shuffle=True, collate_fn=train_dataset.collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, collate_fn=val_dataset.collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, collate_fn=test_dataset.collate_fn)
        print(config["output_size"])
        # Create and initialize model
        model = config["model"](config).to(device)
        
        #### training optimiser parameters fror apple
        #learning_rate = 0.005 
        #optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        #### training optimiser parameters fror harma_2023 VRB model 
        learning_rate = 0.01 # From the paper
        optimizer = optim.Adam(model.parameters())       
        
        best_val_loss = float('inf')
        best_val_loss_flat = float('inf')
        early_stopping_counter = 0
        # To accumulate metrics across folds for each epoch
        train_acc = []
        val_acc = []
        test_acc = []
        for epoch in range(epochs):

            model.train()
            train_loss = 0.0
            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            
            for batch_d, batch_lbs in progress_bar:
                optimizer.zero_grad()
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                progress_bar.set_postfix({'train_loss': f'{train_loss/(progress_bar.n+1):.4f}'})


            train_loss /= len(train_loader)

            # Combined validation loop
            model.eval()
            val_loss = 0.0
            val_pred = []
            with torch.no_grad():
                for batch_d, batch_lbs in val_loader:
                    input_values = batch_d.to(device)
                    batch_lbs = batch_lbs.to(device)
                    
                    outputs = model(input_values)
                    loss = correlation_coefficient_loss(outputs, batch_lbs)
                    val_loss += loss.item()
                    val_pred.extend(outputs.cpu().numpy())

            val_loss /= len(val_loader)

            # Calculate validation metrics
            val_pred = np.array(val_pred).reshape(val_timesteps.shape)
            val_ground_truth = _get_ground_truth_labels([all_dict[i] for i in val_index], all_labels)
            val_pred_flat = unsplit_data_ogsize(val_pred, window_size, step_sequence, 25, val_ground_truth.shape[-1])
            val_prc_coef = _calculate_flattened_accuracy(val_pred_flat, val_ground_truth)
            
            # Accumulate metrics for this fold and epoch
            train_acc.append(1- train_loss)
            val_acc.append(1- val_loss)

            # Log metrics
            writer.add_scalar(f"Loss/train_fold_{fold + 1}", train_loss, epoch)
            writer.add_scalar(f"Loss/val_fold_{fold + 1}", val_loss, epoch)
            writer.add_scalar(f"Pearson/val_fold_{fold + 1}", val_prc_coef, epoch)

            print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Pearson: {val_prc_coef:.4f}")

            # Check if validation loss improved
            if val_loss < best_val_loss:
                print(f"Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving best model...")
                best_val_loss = val_loss
                best_val_loss_flat = val_prc_coef
                early_stopping_counter = 0

                # Save the best model
                torch.save(model.state_dict(), best_model_path)
            else:
                early_stopping_counter += 1
                print(f"Validation loss did not improve for {early_stopping_counter} epochs.")
                #model.load_state_dict(torch.load(best_model_path))


            # Early stopping
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping triggered at epoch {epoch + 1}. Loading best model.")
                # Load the best model's weights
                model.load_state_dict(torch.load(best_model_path))
                break

        test_pred = []
        test_loss = 0.0
        with torch.no_grad():
            for batch_d, batch_lbs in test_loader:
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                test_loss += loss.item()
                test_pred.extend(outputs.cpu().numpy())

        test_loss /= len(test_loader)
        test_pred = np.array(test_pred).reshape(prepared_test_labels_timesteps.shape)
        test_ground_truth = _get_ground_truth_labels(list(test_dict.values()), test_labels)
        test_pred_flat = unsplit_data_ogsize(test_pred, window_size, step_sequence, 25, test_ground_truth.shape[-1])
        test_prc_coef = _calculate_flattened_accuracy(test_pred_flat, test_ground_truth)

        print(f"Fold {fold + 1}:")
        print(f"  Validation Pearson Coefficient  acc: {1- best_val_loss}")
        print(f"  Validation Pearson Coefficient flat acc: {best_val_loss_flat}")
        print(f"  Test acc: {1- test_loss}")
        print(f"  Test Pearson Coefficient acc(flattened): {test_prc_coef}")

        fold_metrics.append({
            'Fold': fold + 1,
            'val_prc_acc': 1- best_val_loss,
            'val_prc_acc_flat': best_val_loss_flat,
            'test_acc': 1- test_loss,
            'test_prc_flat': test_prc_coef
        })


                # Log fold-specific metrics as tables
        fold_table = f"| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                     f"|------|-----------------|------------------|----------|-------------------|\n" \
                     f"| {fold + 1} | {1 - val_loss:.4f} | {val_prc_coef:.4f} | {1 - test_loss:.4f} | {test_prc_coef:.4f} |\n"
        writer.add_text(f"Fold_{fold + 1}_Metrics", fold_table)
        # Accumulate fold metrics across all folds
        train_acc_epoch.append(train_acc)
        val_acc_epoch.append(train_acc)


    
        # After all folds, compute and log the average metrics per epoch across all folds
    for epoch in range(epochs):
        avg_train_loss = np.mean([fold_losses[epoch] for fold_losses in train_acc_epoch if len(fold_losses) > epoch])
        avg_val_loss = np.mean([fold_losses[epoch] for fold_losses in val_acc_epoch if len(fold_losses) > epoch])

        # Log the averaged metrics for the epoch across all folds
        writer.add_scalar("Average_acc/train", avg_train_loss, epoch)
        writer.add_scalar("Average_acc/val", avg_val_loss, epoch)
            

    # Calculate average metrics
    avg_metrics = {key: np.mean([fold[key] for fold in fold_metrics if key != 'Fold']) for key in fold_metrics[0].keys() if key != 'Fold'}
        # Log the final average table
    avg_table = "| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                "|------|-----------------|------------------|----------|-------------------|\n" \
                f"| Average | {avg_metrics['val_prc_acc']:.4f} | {avg_metrics['val_prc_acc_flat']:.4f} | {avg_metrics['test_acc']:.4f} | {avg_metrics['test_prc_flat']:.4f} |\n"
    writer.add_text("Average_Metrics", avg_table)
    # Add average metrics to results
    avg_metrics['Fold'] = 'Average'
    fold_metrics.append(avg_metrics)

    # save averga date to CSV
    results_df = pd.DataFrame(fold_metrics)
    csv_path = os.path.join(run_dir, 'fold_results.csv')
    results_df.to_csv(csv_path, index=False)
    
    # Save fold indices CSV
    fold_indices_df.to_csv(os.path.join(run_dir, 'fold_indices.csv'), index=False)
    
    writer.close()


    print("\nTraining completed.")
    print("Average metrics across all folds:")
    for key, value in avg_metrics.items():
        print(f"  {key}: {value}")


if __name__ == "__main__":
    ## Path to data
    #path = "/home/glenn/Downloads/"
    path = "../DATA/"


    # Model parameters
    model_config = {
        "VRBModel": {
            "model" : VRBModel,
            "model_name": "facebook/hubert-large-ls960-ft",
            "hidden_units": 64,
            "n_gru": 3,
            "output_size": None  # Will be set dynamically
        },
        "Wav2Vec2ConvLSTMModel": {
            "model" : Wav2Vec2ConvLSTMModel,
            "model_name": "facebook/wav2vec2-base",
            "hidden_units": 128,
            "n_lstm": 2,
            "output_size": None  # Will be set dynamically
        }
    }
    

    
    # Train and data parameters
    epochs = 128
    batch_size = 64
    window_size = 30
    step_size = 6
    data_parts = 4 # aka folds
    early_stopping_patience = 10
    
    config = model_config["VRBModel"]
    

    ## same wav2vec2 base model and pipeline used in the paper
    processor = AutoProcessor.from_pretrained(config["model_name"])
    
    train(
        path_to_data=path+"ComParE2020_Breathing/wav/",
        path_to_labels=path+"ComParE2020_Breathing/lab/",
        window_size=window_size,
        batch_size=batch_size,
        config = config,
        step_size=step_size,
        data_parts= data_parts ,
        early_stopping_patience= early_stopping_patience,
        epochs= epochs,
        processor = processor
    )

Fold 1/4
(864, 480000)
750


Epoch 1/128: 100%|██████████| 14/14 [00:29<00:00,  2.09s/it, train_loss=1.0013]


Epoch 1/128 - Train Loss: 1.0013, Val Loss: 0.9151, Val Pearson: 0.1194
Validation loss improved from inf to 0.9151. Saving best model...


Epoch 2/128: 100%|██████████| 14/14 [00:29<00:00,  2.13s/it, train_loss=0.7097]


Epoch 2/128 - Train Loss: 0.7097, Val Loss: 0.5928, Val Pearson: 0.4455
Validation loss improved from 0.9151 to 0.5928. Saving best model...


Epoch 3/128: 100%|██████████| 14/14 [00:29<00:00,  2.08s/it, train_loss=0.5055]


Epoch 3/128 - Train Loss: 0.5055, Val Loss: 0.4834, Val Pearson: 0.5210
Validation loss improved from 0.5928 to 0.4834. Saving best model...


Epoch 4/128: 100%|██████████| 14/14 [00:30<00:00,  2.16s/it, train_loss=0.4334]


Epoch 4/128 - Train Loss: 0.4334, Val Loss: 0.4171, Val Pearson: 0.5944
Validation loss improved from 0.4834 to 0.4171. Saving best model...


Epoch 5/128: 100%|██████████| 14/14 [00:29<00:00,  2.12s/it, train_loss=0.3624]


Epoch 5/128 - Train Loss: 0.3624, Val Loss: 0.2834, Val Pearson: 0.7105
Validation loss improved from 0.4171 to 0.2834. Saving best model...


Epoch 6/128: 100%|██████████| 14/14 [00:29<00:00,  2.14s/it, train_loss=0.3349]


Epoch 6/128 - Train Loss: 0.3349, Val Loss: 0.2685, Val Pearson: 0.7146
Validation loss improved from 0.2834 to 0.2685. Saving best model...


Epoch 7/128: 100%|██████████| 14/14 [00:30<00:00,  2.17s/it, train_loss=0.2979]


Epoch 7/128 - Train Loss: 0.2979, Val Loss: 0.2365, Val Pearson: 0.7526
Validation loss improved from 0.2685 to 0.2365. Saving best model...


Epoch 8/128: 100%|██████████| 14/14 [00:29<00:00,  2.11s/it, train_loss=0.3526]


Epoch 8/128 - Train Loss: 0.3526, Val Loss: 0.3089, Val Pearson: 0.7217
Validation loss did not improve for 1 epochs.


Epoch 9/128: 100%|██████████| 14/14 [00:30<00:00,  2.15s/it, train_loss=0.3324]


Epoch 9/128 - Train Loss: 0.3324, Val Loss: 0.3153, Val Pearson: 0.6836
Validation loss did not improve for 2 epochs.


Epoch 10/128: 100%|██████████| 14/14 [00:29<00:00,  2.14s/it, train_loss=0.3091]


Epoch 10/128 - Train Loss: 0.3091, Val Loss: 0.2332, Val Pearson: 0.7531
Validation loss improved from 0.2365 to 0.2332. Saving best model...


Epoch 11/128: 100%|██████████| 14/14 [00:29<00:00,  2.10s/it, train_loss=0.2709]


Epoch 11/128 - Train Loss: 0.2709, Val Loss: 0.2260, Val Pearson: 0.7607
Validation loss improved from 0.2332 to 0.2260. Saving best model...


Epoch 12/128: 100%|██████████| 14/14 [00:30<00:00,  2.16s/it, train_loss=0.2591]


Epoch 12/128 - Train Loss: 0.2591, Val Loss: 0.2187, Val Pearson: 0.7679
Validation loss improved from 0.2260 to 0.2187. Saving best model...


Epoch 13/128: 100%|██████████| 14/14 [00:29<00:00,  2.11s/it, train_loss=0.2449]


Epoch 13/128 - Train Loss: 0.2449, Val Loss: 0.2199, Val Pearson: 0.7693
Validation loss did not improve for 1 epochs.


Epoch 14/128: 100%|██████████| 14/14 [00:29<00:00,  2.11s/it, train_loss=0.2332]


Epoch 14/128 - Train Loss: 0.2332, Val Loss: 0.2211, Val Pearson: 0.7729
Validation loss did not improve for 2 epochs.


Epoch 15/128: 100%|██████████| 14/14 [00:29<00:00,  2.09s/it, train_loss=0.2242]


Epoch 15/128 - Train Loss: 0.2242, Val Loss: 0.2265, Val Pearson: 0.7667
Validation loss did not improve for 3 epochs.


Epoch 16/128: 100%|██████████| 14/14 [00:29<00:00,  2.09s/it, train_loss=0.2087]


Epoch 16/128 - Train Loss: 0.2087, Val Loss: 0.2305, Val Pearson: 0.7674
Validation loss did not improve for 4 epochs.


Epoch 17/128: 100%|██████████| 14/14 [00:30<00:00,  2.16s/it, train_loss=0.1980]


Epoch 17/128 - Train Loss: 0.1980, Val Loss: 0.2346, Val Pearson: 0.7650
Validation loss did not improve for 5 epochs.


Epoch 18/128: 100%|██████████| 14/14 [00:29<00:00,  2.09s/it, train_loss=0.1787]


Epoch 18/128 - Train Loss: 0.1787, Val Loss: 0.2376, Val Pearson: 0.7634
Validation loss did not improve for 6 epochs.


Epoch 19/128: 100%|██████████| 14/14 [00:29<00:00,  2.09s/it, train_loss=0.1893]


Epoch 19/128 - Train Loss: 0.1893, Val Loss: 0.2489, Val Pearson: 0.7555
Validation loss did not improve for 7 epochs.


Epoch 20/128: 100%|██████████| 14/14 [00:30<00:00,  2.15s/it, train_loss=0.1655]


Epoch 20/128 - Train Loss: 0.1655, Val Loss: 0.2619, Val Pearson: 0.7570
Validation loss did not improve for 8 epochs.


Epoch 21/128: 100%|██████████| 14/14 [00:30<00:00,  2.15s/it, train_loss=0.1469]


Epoch 21/128 - Train Loss: 0.1469, Val Loss: 0.2542, Val Pearson: 0.7544
Validation loss did not improve for 9 epochs.


Epoch 22/128: 100%|██████████| 14/14 [00:35<00:00,  2.51s/it, train_loss=0.1565]


Epoch 22/128 - Train Loss: 0.1565, Val Loss: 0.3305, Val Pearson: 0.6708
Validation loss did not improve for 10 epochs.
Early stopping triggered at epoch 22. Loading best model.


  model.load_state_dict(torch.load(best_model_path))


Fold 1:
  Validation Pearson Coefficient  acc: 0.7812866866588593
  Validation Pearson Coefficient flat acc: 0.7679455613693423
  Test acc: 0.7636429203881158
  Test Pearson Coefficient acc(flattened): 0.7713440330556783
Fold 2/4
(900, 480000)
750


Epoch 1/128: 100%|██████████| 15/15 [00:36<00:00,  2.44s/it, train_loss=0.9922]


Epoch 1/128 - Train Loss: 0.9922, Val Loss: 0.8500, Val Pearson: 0.2306
Validation loss improved from inf to 0.8500. Saving best model...


Epoch 2/128: 100%|██████████| 15/15 [00:36<00:00,  2.42s/it, train_loss=0.7321]


Epoch 2/128 - Train Loss: 0.7321, Val Loss: 0.6291, Val Pearson: 0.4245
Validation loss improved from 0.8500 to 0.6291. Saving best model...


Epoch 3/128: 100%|██████████| 15/15 [00:35<00:00,  2.40s/it, train_loss=0.5548]


Epoch 3/128 - Train Loss: 0.5548, Val Loss: 0.6478, Val Pearson: 0.4089
Validation loss did not improve for 1 epochs.


Epoch 4/128: 100%|██████████| 15/15 [00:36<00:00,  2.41s/it, train_loss=0.4988]


Epoch 4/128 - Train Loss: 0.4988, Val Loss: 0.5645, Val Pearson: 0.4823
Validation loss improved from 0.6291 to 0.5645. Saving best model...


Epoch 5/128: 100%|██████████| 15/15 [00:36<00:00,  2.41s/it, train_loss=0.4019]


Epoch 5/128 - Train Loss: 0.4019, Val Loss: 0.4358, Val Pearson: 0.6042
Validation loss improved from 0.5645 to 0.4358. Saving best model...


Epoch 6/128: 100%|██████████| 15/15 [00:36<00:00,  2.43s/it, train_loss=0.3162]


Epoch 6/128 - Train Loss: 0.3162, Val Loss: 0.3799, Val Pearson: 0.6587
Validation loss improved from 0.4358 to 0.3799. Saving best model...


Epoch 7/128: 100%|██████████| 15/15 [00:35<00:00,  2.36s/it, train_loss=0.3066]


Epoch 7/128 - Train Loss: 0.3066, Val Loss: 0.3795, Val Pearson: 0.6652
Validation loss improved from 0.3799 to 0.3795. Saving best model...


Epoch 8/128: 100%|██████████| 15/15 [00:36<00:00,  2.42s/it, train_loss=0.3028]


Epoch 8/128 - Train Loss: 0.3028, Val Loss: 0.3596, Val Pearson: 0.6771
Validation loss improved from 0.3795 to 0.3596. Saving best model...


Epoch 9/128: 100%|██████████| 15/15 [00:35<00:00,  2.39s/it, train_loss=0.2726]


Epoch 9/128 - Train Loss: 0.2726, Val Loss: 0.3581, Val Pearson: 0.6799
Validation loss improved from 0.3596 to 0.3581. Saving best model...


Epoch 10/128: 100%|██████████| 15/15 [00:35<00:00,  2.39s/it, train_loss=0.2742]


Epoch 10/128 - Train Loss: 0.2742, Val Loss: 0.3493, Val Pearson: 0.6840
Validation loss improved from 0.3581 to 0.3493. Saving best model...


Epoch 11/128: 100%|██████████| 15/15 [00:35<00:00,  2.40s/it, train_loss=0.2616]


Epoch 11/128 - Train Loss: 0.2616, Val Loss: 0.3404, Val Pearson: 0.6952
Validation loss improved from 0.3493 to 0.3404. Saving best model...


Epoch 12/128: 100%|██████████| 15/15 [00:35<00:00,  2.40s/it, train_loss=0.2467]


Epoch 12/128 - Train Loss: 0.2467, Val Loss: 0.3356, Val Pearson: 0.6991
Validation loss improved from 0.3404 to 0.3356. Saving best model...


Epoch 13/128: 100%|██████████| 15/15 [00:36<00:00,  2.42s/it, train_loss=0.2388]


Epoch 13/128 - Train Loss: 0.2388, Val Loss: 0.3318, Val Pearson: 0.7035
Validation loss improved from 0.3356 to 0.3318. Saving best model...


Epoch 14/128: 100%|██████████| 15/15 [00:35<00:00,  2.35s/it, train_loss=0.2334]


Epoch 14/128 - Train Loss: 0.2334, Val Loss: 0.3385, Val Pearson: 0.6996
Validation loss did not improve for 1 epochs.


Epoch 15/128: 100%|██████████| 15/15 [00:35<00:00,  2.38s/it, train_loss=0.2372]


Epoch 15/128 - Train Loss: 0.2372, Val Loss: 0.3337, Val Pearson: 0.7060
Validation loss did not improve for 2 epochs.


Epoch 16/128: 100%|██████████| 15/15 [00:36<00:00,  2.42s/it, train_loss=0.2222]


Epoch 16/128 - Train Loss: 0.2222, Val Loss: 0.3364, Val Pearson: 0.7023
Validation loss did not improve for 3 epochs.


Epoch 17/128: 100%|██████████| 15/15 [00:35<00:00,  2.39s/it, train_loss=0.2006]


Epoch 17/128 - Train Loss: 0.2006, Val Loss: 0.3350, Val Pearson: 0.7040
Validation loss did not improve for 4 epochs.


Epoch 18/128: 100%|██████████| 15/15 [00:36<00:00,  2.43s/it, train_loss=0.1979]


Epoch 18/128 - Train Loss: 0.1979, Val Loss: 0.3448, Val Pearson: 0.6972
Validation loss did not improve for 5 epochs.


Epoch 19/128: 100%|██████████| 15/15 [00:36<00:00,  2.44s/it, train_loss=0.1853]


Epoch 19/128 - Train Loss: 0.1853, Val Loss: 0.3313, Val Pearson: 0.7106
Validation loss improved from 0.3318 to 0.3313. Saving best model...


Epoch 20/128: 100%|██████████| 15/15 [00:35<00:00,  2.39s/it, train_loss=0.1714]


Epoch 20/128 - Train Loss: 0.1714, Val Loss: 0.3334, Val Pearson: 0.7085
Validation loss did not improve for 1 epochs.


Epoch 21/128: 100%|██████████| 15/15 [00:36<00:00,  2.45s/it, train_loss=0.1797]


Epoch 21/128 - Train Loss: 0.1797, Val Loss: 0.3330, Val Pearson: 0.7110
Validation loss did not improve for 2 epochs.


Epoch 22/128: 100%|██████████| 15/15 [00:36<00:00,  2.40s/it, train_loss=0.1717]


Epoch 22/128 - Train Loss: 0.1717, Val Loss: 0.3511, Val Pearson: 0.6980
Validation loss did not improve for 3 epochs.


Epoch 23/128: 100%|██████████| 15/15 [00:36<00:00,  2.41s/it, train_loss=0.1531]


Epoch 23/128 - Train Loss: 0.1531, Val Loss: 0.3450, Val Pearson: 0.7057
Validation loss did not improve for 4 epochs.


Epoch 24/128: 100%|██████████| 15/15 [00:36<00:00,  2.43s/it, train_loss=0.1405]


Epoch 24/128 - Train Loss: 0.1405, Val Loss: 0.3492, Val Pearson: 0.6911
Validation loss did not improve for 5 epochs.


Epoch 25/128: 100%|██████████| 15/15 [00:36<00:00,  2.42s/it, train_loss=0.1402]


Epoch 25/128 - Train Loss: 0.1402, Val Loss: 0.3417, Val Pearson: 0.7076
Validation loss did not improve for 6 epochs.


Epoch 26/128: 100%|██████████| 15/15 [00:35<00:00,  2.38s/it, train_loss=0.1256]


Epoch 26/128 - Train Loss: 0.1256, Val Loss: 0.3577, Val Pearson: 0.6935
Validation loss did not improve for 7 epochs.


Epoch 27/128: 100%|██████████| 15/15 [00:36<00:00,  2.45s/it, train_loss=0.1106]


Epoch 27/128 - Train Loss: 0.1106, Val Loss: 0.3616, Val Pearson: 0.6930
Validation loss did not improve for 8 epochs.


Epoch 28/128: 100%|██████████| 15/15 [00:35<00:00,  2.38s/it, train_loss=0.1042]


Epoch 28/128 - Train Loss: 0.1042, Val Loss: 0.3602, Val Pearson: 0.6950
Validation loss did not improve for 9 epochs.


Epoch 29/128: 100%|██████████| 15/15 [00:36<00:00,  2.42s/it, train_loss=0.0941]


Epoch 29/128 - Train Loss: 0.0941, Val Loss: 0.3685, Val Pearson: 0.6895
Validation loss did not improve for 10 epochs.
Early stopping triggered at epoch 29. Loading best model.


  model.load_state_dict(torch.load(best_model_path))


Fold 2:
  Validation Pearson Coefficient  acc: 0.6686662793159485
  Validation Pearson Coefficient flat acc: 0.7105616516151114
  Test acc: 0.7758767339918349
  Test Pearson Coefficient acc(flattened): 0.7932222368250055
Fold 3/4
(900, 480000)
750


Epoch 1/128: 100%|██████████| 15/15 [00:35<00:00,  2.39s/it, train_loss=0.9908]


Epoch 1/128 - Train Loss: 0.9908, Val Loss: 0.9003, Val Pearson: 0.1474
Validation loss improved from inf to 0.9003. Saving best model...


Epoch 2/128: 100%|██████████| 15/15 [00:36<00:00,  2.45s/it, train_loss=0.7278]


Epoch 2/128 - Train Loss: 0.7278, Val Loss: 0.6272, Val Pearson: 0.4103
Validation loss improved from 0.9003 to 0.6272. Saving best model...


Epoch 3/128: 100%|██████████| 15/15 [00:37<00:00,  2.50s/it, train_loss=0.4942]


Epoch 3/128 - Train Loss: 0.4942, Val Loss: 0.5335, Val Pearson: 0.4629
Validation loss improved from 0.6272 to 0.5335. Saving best model...


Epoch 4/128: 100%|██████████| 15/15 [00:37<00:00,  2.47s/it, train_loss=0.4095]


Epoch 4/128 - Train Loss: 0.4095, Val Loss: 0.4726, Val Pearson: 0.5416
Validation loss improved from 0.5335 to 0.4726. Saving best model...


Epoch 5/128: 100%|██████████| 15/15 [00:36<00:00,  2.43s/it, train_loss=0.4843]


Epoch 5/128 - Train Loss: 0.4843, Val Loss: 0.6561, Val Pearson: 0.4332
Validation loss did not improve for 1 epochs.


Epoch 6/128: 100%|██████████| 15/15 [00:36<00:00,  2.42s/it, train_loss=0.4372]


Epoch 6/128 - Train Loss: 0.4372, Val Loss: 0.4900, Val Pearson: 0.5405
Validation loss did not improve for 2 epochs.


Epoch 7/128: 100%|██████████| 15/15 [00:35<00:00,  2.37s/it, train_loss=0.3221]


Epoch 7/128 - Train Loss: 0.3221, Val Loss: 0.4006, Val Pearson: 0.5904
Validation loss improved from 0.4726 to 0.4006. Saving best model...


Epoch 8/128: 100%|██████████| 15/15 [00:36<00:00,  2.42s/it, train_loss=0.2770]


Epoch 8/128 - Train Loss: 0.2770, Val Loss: 0.3753, Val Pearson: 0.6115
Validation loss improved from 0.4006 to 0.3753. Saving best model...


Epoch 9/128: 100%|██████████| 15/15 [00:36<00:00,  2.41s/it, train_loss=0.2571]


Epoch 9/128 - Train Loss: 0.2571, Val Loss: 0.3599, Val Pearson: 0.6273
Validation loss improved from 0.3753 to 0.3599. Saving best model...


Epoch 10/128: 100%|██████████| 15/15 [00:36<00:00,  2.41s/it, train_loss=0.2526]


Epoch 10/128 - Train Loss: 0.2526, Val Loss: 0.3531, Val Pearson: 0.6344
Validation loss improved from 0.3599 to 0.3531. Saving best model...


Epoch 11/128: 100%|██████████| 15/15 [00:36<00:00,  2.41s/it, train_loss=0.2343]


Epoch 11/128 - Train Loss: 0.2343, Val Loss: 0.3542, Val Pearson: 0.6351
Validation loss did not improve for 1 epochs.


Epoch 12/128: 100%|██████████| 15/15 [00:37<00:00,  2.47s/it, train_loss=0.2339]


Epoch 12/128 - Train Loss: 0.2339, Val Loss: 0.3478, Val Pearson: 0.6449
Validation loss improved from 0.3531 to 0.3478. Saving best model...


Epoch 13/128: 100%|██████████| 15/15 [00:36<00:00,  2.41s/it, train_loss=0.2336]


Epoch 13/128 - Train Loss: 0.2336, Val Loss: 0.3458, Val Pearson: 0.6485
Validation loss improved from 0.3478 to 0.3458. Saving best model...


Epoch 14/128: 100%|██████████| 15/15 [00:36<00:00,  2.45s/it, train_loss=0.2237]


Epoch 14/128 - Train Loss: 0.2237, Val Loss: 0.3425, Val Pearson: 0.6523
Validation loss improved from 0.3458 to 0.3425. Saving best model...


Epoch 15/128: 100%|██████████| 15/15 [00:35<00:00,  2.37s/it, train_loss=0.2124]


Epoch 15/128 - Train Loss: 0.2124, Val Loss: 0.3416, Val Pearson: 0.6525
Validation loss improved from 0.3425 to 0.3416. Saving best model...


Epoch 16/128: 100%|██████████| 15/15 [00:36<00:00,  2.42s/it, train_loss=0.2225]


Epoch 16/128 - Train Loss: 0.2225, Val Loss: 0.3420, Val Pearson: 0.6531
Validation loss did not improve for 1 epochs.


Epoch 17/128: 100%|██████████| 15/15 [00:36<00:00,  2.42s/it, train_loss=0.2128]


Epoch 17/128 - Train Loss: 0.2128, Val Loss: 0.3448, Val Pearson: 0.6511
Validation loss did not improve for 2 epochs.


Epoch 18/128: 100%|██████████| 15/15 [00:35<00:00,  2.39s/it, train_loss=0.2123]


Epoch 18/128 - Train Loss: 0.2123, Val Loss: 0.3431, Val Pearson: 0.6546
Validation loss did not improve for 3 epochs.


Epoch 19/128: 100%|██████████| 15/15 [00:36<00:00,  2.42s/it, train_loss=0.2047]


Epoch 19/128 - Train Loss: 0.2047, Val Loss: 0.3364, Val Pearson: 0.6595
Validation loss improved from 0.3416 to 0.3364. Saving best model...


Epoch 20/128: 100%|██████████| 15/15 [00:35<00:00,  2.37s/it, train_loss=0.1929]


Epoch 20/128 - Train Loss: 0.1929, Val Loss: 0.3379, Val Pearson: 0.6662
Validation loss did not improve for 1 epochs.


Epoch 21/128: 100%|██████████| 15/15 [00:35<00:00,  2.36s/it, train_loss=0.1795]


Epoch 21/128 - Train Loss: 0.1795, Val Loss: 0.3397, Val Pearson: 0.6624
Validation loss did not improve for 2 epochs.


Epoch 22/128: 100%|██████████| 15/15 [00:36<00:00,  2.45s/it, train_loss=0.1752]


Epoch 22/128 - Train Loss: 0.1752, Val Loss: 0.3454, Val Pearson: 0.6566
Validation loss did not improve for 3 epochs.


Epoch 23/128: 100%|██████████| 15/15 [00:36<00:00,  2.43s/it, train_loss=0.1756]


Epoch 23/128 - Train Loss: 0.1756, Val Loss: 0.3769, Val Pearson: 0.6356
Validation loss did not improve for 4 epochs.


Epoch 24/128: 100%|██████████| 15/15 [00:35<00:00,  2.37s/it, train_loss=0.1678]


Epoch 24/128 - Train Loss: 0.1678, Val Loss: 0.3370, Val Pearson: 0.6628
Validation loss did not improve for 5 epochs.


Epoch 25/128: 100%|██████████| 15/15 [00:36<00:00,  2.41s/it, train_loss=0.1656]


Epoch 25/128 - Train Loss: 0.1656, Val Loss: 0.3354, Val Pearson: 0.6664
Validation loss improved from 0.3364 to 0.3354. Saving best model...


Epoch 26/128: 100%|██████████| 15/15 [00:37<00:00,  2.50s/it, train_loss=0.1451]


Epoch 26/128 - Train Loss: 0.1451, Val Loss: 0.3412, Val Pearson: 0.6618
Validation loss did not improve for 1 epochs.


Epoch 27/128: 100%|██████████| 15/15 [00:36<00:00,  2.41s/it, train_loss=0.1349]


Epoch 27/128 - Train Loss: 0.1349, Val Loss: 0.3427, Val Pearson: 0.6603
Validation loss did not improve for 2 epochs.


Epoch 28/128: 100%|██████████| 15/15 [00:36<00:00,  2.42s/it, train_loss=0.1283]


Epoch 28/128 - Train Loss: 0.1283, Val Loss: 0.3331, Val Pearson: 0.6651
Validation loss improved from 0.3354 to 0.3331. Saving best model...


Epoch 29/128: 100%|██████████| 15/15 [00:36<00:00,  2.40s/it, train_loss=0.1265]


Epoch 29/128 - Train Loss: 0.1265, Val Loss: 0.3496, Val Pearson: 0.6577
Validation loss did not improve for 1 epochs.


Epoch 30/128: 100%|██████████| 15/15 [00:37<00:00,  2.48s/it, train_loss=0.1280]


Epoch 30/128 - Train Loss: 0.1280, Val Loss: 0.3427, Val Pearson: 0.6614
Validation loss did not improve for 2 epochs.


Epoch 31/128: 100%|██████████| 15/15 [00:36<00:00,  2.41s/it, train_loss=0.1146]


Epoch 31/128 - Train Loss: 0.1146, Val Loss: 0.3466, Val Pearson: 0.6623
Validation loss did not improve for 3 epochs.


Epoch 32/128: 100%|██████████| 15/15 [00:36<00:00,  2.44s/it, train_loss=0.0984]


Epoch 32/128 - Train Loss: 0.0984, Val Loss: 0.3477, Val Pearson: 0.6585
Validation loss did not improve for 4 epochs.


Epoch 33/128: 100%|██████████| 15/15 [00:36<00:00,  2.43s/it, train_loss=0.0873]


Epoch 33/128 - Train Loss: 0.0873, Val Loss: 0.3773, Val Pearson: 0.6421
Validation loss did not improve for 5 epochs.


Epoch 34/128: 100%|██████████| 15/15 [00:37<00:00,  2.48s/it, train_loss=0.0736]


Epoch 34/128 - Train Loss: 0.0736, Val Loss: 0.3635, Val Pearson: 0.6510
Validation loss did not improve for 6 epochs.


Epoch 35/128: 100%|██████████| 15/15 [00:36<00:00,  2.40s/it, train_loss=0.0707]


Epoch 35/128 - Train Loss: 0.0707, Val Loss: 0.3683, Val Pearson: 0.6471
Validation loss did not improve for 7 epochs.


Epoch 36/128: 100%|██████████| 15/15 [00:36<00:00,  2.45s/it, train_loss=0.0618]


Epoch 36/128 - Train Loss: 0.0618, Val Loss: 0.3879, Val Pearson: 0.6327
Validation loss did not improve for 8 epochs.


Epoch 37/128: 100%|██████████| 15/15 [00:37<00:00,  2.48s/it, train_loss=0.0549]


Epoch 37/128 - Train Loss: 0.0549, Val Loss: 0.3663, Val Pearson: 0.6465
Validation loss did not improve for 9 epochs.


Epoch 38/128: 100%|██████████| 15/15 [00:35<00:00,  2.37s/it, train_loss=0.0516]


Epoch 38/128 - Train Loss: 0.0516, Val Loss: 0.3829, Val Pearson: 0.6403
Validation loss did not improve for 10 epochs.
Early stopping triggered at epoch 38. Loading best model.


  model.load_state_dict(torch.load(best_model_path))


Fold 3:
  Validation Pearson Coefficient  acc: 0.6668947458267211
  Validation Pearson Coefficient flat acc: 0.6650927818843155
  Test acc: 0.7909515698750814
  Test Pearson Coefficient acc(flattened): 0.8041059949908728
Fold 4/4
(900, 480000)
750


Epoch 1/128: 100%|██████████| 15/15 [00:34<00:00,  2.32s/it, train_loss=0.9675]


Epoch 1/128 - Train Loss: 0.9675, Val Loss: 0.8588, Val Pearson: 0.2659
Validation loss improved from inf to 0.8588. Saving best model...


Epoch 2/128: 100%|██████████| 15/15 [00:35<00:00,  2.37s/it, train_loss=0.7091]


Epoch 2/128 - Train Loss: 0.7091, Val Loss: 0.6727, Val Pearson: 0.3818
Validation loss improved from 0.8588 to 0.6727. Saving best model...


Epoch 3/128: 100%|██████████| 15/15 [00:35<00:00,  2.39s/it, train_loss=0.5363]


Epoch 3/128 - Train Loss: 0.5363, Val Loss: 0.6341, Val Pearson: 0.4141
Validation loss improved from 0.6727 to 0.6341. Saving best model...


Epoch 4/128: 100%|██████████| 15/15 [00:35<00:00,  2.39s/it, train_loss=0.4162]


Epoch 4/128 - Train Loss: 0.4162, Val Loss: 0.5406, Val Pearson: 0.5075
Validation loss improved from 0.6341 to 0.5406. Saving best model...


Epoch 5/128: 100%|██████████| 15/15 [00:35<00:00,  2.35s/it, train_loss=0.3520]


Epoch 5/128 - Train Loss: 0.3520, Val Loss: 0.5273, Val Pearson: 0.5164
Validation loss improved from 0.5406 to 0.5273. Saving best model...


Epoch 6/128: 100%|██████████| 15/15 [00:35<00:00,  2.39s/it, train_loss=0.3031]


Epoch 6/128 - Train Loss: 0.3031, Val Loss: 0.4498, Val Pearson: 0.5769
Validation loss improved from 0.5273 to 0.4498. Saving best model...


Epoch 7/128: 100%|██████████| 15/15 [00:35<00:00,  2.37s/it, train_loss=0.2726]


Epoch 7/128 - Train Loss: 0.2726, Val Loss: 0.4319, Val Pearson: 0.6041
Validation loss improved from 0.4498 to 0.4319. Saving best model...


Epoch 8/128: 100%|██████████| 15/15 [00:36<00:00,  2.44s/it, train_loss=0.2738]


Epoch 8/128 - Train Loss: 0.2738, Val Loss: 0.4199, Val Pearson: 0.6159
Validation loss improved from 0.4319 to 0.4199. Saving best model...


Epoch 9/128: 100%|██████████| 15/15 [00:36<00:00,  2.40s/it, train_loss=0.2493]


Epoch 9/128 - Train Loss: 0.2493, Val Loss: 0.4152, Val Pearson: 0.6256
Validation loss improved from 0.4199 to 0.4152. Saving best model...


Epoch 10/128: 100%|██████████| 15/15 [00:36<00:00,  2.42s/it, train_loss=0.2543]


Epoch 10/128 - Train Loss: 0.2543, Val Loss: 0.4160, Val Pearson: 0.6242
Validation loss did not improve for 1 epochs.


Epoch 11/128: 100%|██████████| 15/15 [00:36<00:00,  2.44s/it, train_loss=0.2315]


Epoch 11/128 - Train Loss: 0.2315, Val Loss: 0.4006, Val Pearson: 0.6403
Validation loss improved from 0.4152 to 0.4006. Saving best model...


Epoch 12/128: 100%|██████████| 15/15 [00:35<00:00,  2.39s/it, train_loss=0.2203]


Epoch 12/128 - Train Loss: 0.2203, Val Loss: 0.3950, Val Pearson: 0.6450
Validation loss improved from 0.4006 to 0.3950. Saving best model...


Epoch 13/128: 100%|██████████| 15/15 [00:35<00:00,  2.35s/it, train_loss=0.2086]


Epoch 13/128 - Train Loss: 0.2086, Val Loss: 0.3828, Val Pearson: 0.6546
Validation loss improved from 0.3950 to 0.3828. Saving best model...


Epoch 14/128: 100%|██████████| 15/15 [00:35<00:00,  2.35s/it, train_loss=0.2138]


Epoch 14/128 - Train Loss: 0.2138, Val Loss: 0.3822, Val Pearson: 0.6506
Validation loss improved from 0.3828 to 0.3822. Saving best model...


Epoch 15/128: 100%|██████████| 15/15 [00:36<00:00,  2.44s/it, train_loss=0.1954]


Epoch 15/128 - Train Loss: 0.1954, Val Loss: 0.3806, Val Pearson: 0.6593
Validation loss improved from 0.3822 to 0.3806. Saving best model...


Epoch 16/128: 100%|██████████| 15/15 [00:36<00:00,  2.42s/it, train_loss=0.1996]


Epoch 16/128 - Train Loss: 0.1996, Val Loss: 0.3808, Val Pearson: 0.6603
Validation loss did not improve for 1 epochs.


Epoch 17/128: 100%|██████████| 15/15 [00:36<00:00,  2.43s/it, train_loss=0.1992]


Epoch 17/128 - Train Loss: 0.1992, Val Loss: 0.3763, Val Pearson: 0.6625
Validation loss improved from 0.3806 to 0.3763. Saving best model...


Epoch 18/128: 100%|██████████| 15/15 [00:36<00:00,  2.44s/it, train_loss=0.1822]


Epoch 18/128 - Train Loss: 0.1822, Val Loss: 0.3845, Val Pearson: 0.6575
Validation loss did not improve for 1 epochs.


Epoch 19/128: 100%|██████████| 15/15 [00:35<00:00,  2.39s/it, train_loss=0.1748]


Epoch 19/128 - Train Loss: 0.1748, Val Loss: 0.3805, Val Pearson: 0.6609
Validation loss did not improve for 2 epochs.


Epoch 20/128: 100%|██████████| 15/15 [00:35<00:00,  2.34s/it, train_loss=0.1695]


Epoch 20/128 - Train Loss: 0.1695, Val Loss: 0.3999, Val Pearson: 0.6485
Validation loss did not improve for 3 epochs.


Epoch 21/128: 100%|██████████| 15/15 [00:35<00:00,  2.37s/it, train_loss=0.1524]


Epoch 21/128 - Train Loss: 0.1524, Val Loss: 0.3797, Val Pearson: 0.6619
Validation loss did not improve for 4 epochs.


Epoch 22/128: 100%|██████████| 15/15 [00:36<00:00,  2.41s/it, train_loss=0.1395]


Epoch 22/128 - Train Loss: 0.1395, Val Loss: 0.4204, Val Pearson: 0.6246
Validation loss did not improve for 5 epochs.


Epoch 23/128: 100%|██████████| 15/15 [00:35<00:00,  2.36s/it, train_loss=0.1589]


Epoch 23/128 - Train Loss: 0.1589, Val Loss: 0.3991, Val Pearson: 0.6439
Validation loss did not improve for 6 epochs.


Epoch 24/128: 100%|██████████| 15/15 [00:35<00:00,  2.40s/it, train_loss=0.1287]


Epoch 24/128 - Train Loss: 0.1287, Val Loss: 0.3980, Val Pearson: 0.6455
Validation loss did not improve for 7 epochs.


Epoch 25/128: 100%|██████████| 15/15 [00:35<00:00,  2.39s/it, train_loss=0.1109]


Epoch 25/128 - Train Loss: 0.1109, Val Loss: 0.4080, Val Pearson: 0.6368
Validation loss did not improve for 8 epochs.


Epoch 26/128: 100%|██████████| 15/15 [00:36<00:00,  2.40s/it, train_loss=0.0946]


Epoch 26/128 - Train Loss: 0.0946, Val Loss: 0.4155, Val Pearson: 0.6314
Validation loss did not improve for 9 epochs.


Epoch 27/128: 100%|██████████| 15/15 [00:35<00:00,  2.38s/it, train_loss=0.0880]


Epoch 27/128 - Train Loss: 0.0880, Val Loss: 0.4218, Val Pearson: 0.6279
Validation loss did not improve for 10 epochs.
Early stopping triggered at epoch 27. Loading best model.


  model.load_state_dict(torch.load(best_model_path))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or In

Fold 4:
  Validation Pearson Coefficient  acc: 0.6237318634986877
  Validation Pearson Coefficient flat acc: 0.6625138473549642
  Test acc: 0.7788847287495931
  Test Pearson Coefficient acc(flattened): 0.7945983461230314

Training completed.
Average metrics across all folds:
  val_prc_acc: 0.6851448938250542
  val_prc_acc_flat: 0.7015284605559333
  test_acc: 0.7773389882511563
  test_prc_flat: 0.7908176527486469
  Fold: Average


In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from datetime import datetime
#from torch.utils.tensorboard import SummaryWriter
import torchaudio
from typing import List, Tuple
from pt_utils import *
from pt_dataset import *
from pt_models import *
from pt_utils import *
from tensorboardX import SummaryWriter
from transformers import get_cosine_schedule_with_warmup


def create_run_directory():
    base_dir = "pt_runs_SOTA"
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir

def _calculate_flattened_accuracy(average, ground_truth_labels):
    s_acc = 0
    for b in range(len(ground_truth_labels)):
        s, _ = scipy.stats.pearsonr(average[b], ground_truth_labels[b])
        s_acc += s
    return s_acc / len(ground_truth_labels)

def _choose_real_labs_only_with_filenames(labels, filenames):
    return labels[labels['filename'].isin(filenames)]

def _get_ground_truth_labels(ground_truth_names, labels):
    ground_truth_labels = []
    for batch_name in ground_truth_names:
        ground_truth_label = _choose_real_labs_only_with_filenames(labels, [batch_name])
        ground_truth_labels.append(ground_truth_label)
    return np.array(ground_truth_labels)[:, :, -1].astype(np.float32)

def train(path_to_data, path_to_labels, window_size=16, step_size=6, data_parts=4, epochs=100, batch_size=10, early_stopping_patience=20, config = None, processor = None):
    run_dir = create_run_directory()
    log_dir = os.path.join(run_dir, "logs")
    os.makedirs(log_dir, exist_ok=True)

    # Parameters
    length_sequence = window_size 
    step_sequence = step_size

    # Load and prepare data
    train_data, train_labels, train_dict, frame_rate = load_data(path_to_data, path_to_labels, 'train')
    devel_data, devel_labels, devel_dict, frame_rate = load_data(path_to_data, path_to_labels, 'devel')
    test_data, test_labels, test_dict, frame_rate = load_data(path_to_data, path_to_labels, 'test')

    # Combine train and devel data
    all_data = np.concatenate((train_data, devel_data), axis=0)
    all_labels = pd.concat([train_labels, devel_labels])
    all_dict = np.concatenate((list(train_dict.values()), list(devel_dict.values())), axis=0)

    # Prepare data
    prepared_data, prepared_labels, prepared_labels_timesteps = prepare_data(all_data, all_labels, all_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)
    prepared_test_data, prepared_test_labels, prepared_test_labels_timesteps = prepare_data(test_data, test_labels, test_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)

    # Create CSV file for storing fold indices
    fold_indices_df = pd.DataFrame(columns=['Fold', 'Train_Indices', 'Val_Indices'])

    # Cross-validation
    kf = KFold(n_splits=data_parts)
    fold_metrics = []
    # To accumulate metrics across folds for each epoch
    train_acc_epoch = []
    val_acc_epoch = []
    test_acc_epoch = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    config["output_size"] = prepared_labels.shape[-1]
    writer = SummaryWriter(log_dir=os.path.join(log_dir,config["model_name"]))

    for fold, (train_index, val_index) in enumerate(kf.split(prepared_data)):
        print(f"Fold {fold + 1}/{data_parts}")
        best_model_path = f"{run_dir}/best_model_fold{fold+1}"
        # Save fold indices
        fold_indices_df = fold_indices_df._append({
            'Fold': fold + 1,
            'Train_Indices': train_index.tolist(),
            'Val_Indices': val_index.tolist()
        }, ignore_index=True)

        # Split data
        train_d, val_d = prepared_data[train_index], prepared_data[val_index]
        train_lbs, val_lbs = prepared_labels[train_index], prepared_labels[val_index]
        train_timesteps, val_timesteps = prepared_labels_timesteps[train_index], prepared_labels_timesteps[val_index]
        
        # Reshape data
        train_d, train_lbs = reshaping_data_for_model(train_d, train_lbs)
        val_d, val_lbs = reshaping_data_for_model(val_d, val_lbs)
        test_d, test_lbs = reshaping_data_for_model(prepared_test_data, prepared_test_labels)
        
        print(train_d.shape)

        # Create datasets
        #train_dataset = BreathingDataset(train_d, train_lbs, processor, window_size, step_sequence, augment=True)
        train_dataset = BreathingDataset(train_d, train_lbs, processor,window_size, step_sequence)
        val_dataset = BreathingDataset(val_d, val_lbs, processor, window_size, step_sequence)
        test_dataset = BreathingDataset(test_d, test_lbs, processor, window_size, step_sequence)

        # Create DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size,num_workers=2, shuffle=True, collate_fn=train_dataset.collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, collate_fn=val_dataset.collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, collate_fn=test_dataset.collate_fn)
        print(config["output_size"])
        # Create and initialize model
        model = config["model"](config).to(device)
        
        #### training optimiser parameters fror apple
        #learning_rate = 0.005 
        #optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        #### training optimiser parameters fror harma_2023 VRB model 
        learning_rate = 0.01 # From the paper
        optimizer = optim.Adam(model.parameters())       
        
        best_val_loss = float('inf')
        best_val_loss_flat = float('inf')
        early_stopping_counter = 0
        # To accumulate metrics across folds for each epoch
        train_acc = []
        val_acc = []
        test_acc = []
        for epoch in range(epochs):

            model.train()
            train_loss = 0.0
            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            
            for batch_d, batch_lbs in progress_bar:
                optimizer.zero_grad()
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                progress_bar.set_postfix({'train_loss': f'{train_loss/(progress_bar.n+1):.4f}'})


            train_loss /= len(train_loader)

            # Combined validation loop
            model.eval()
            val_loss = 0.0
            val_pred = []
            with torch.no_grad():
                for batch_d, batch_lbs in val_loader:
                    input_values = batch_d.to(device)
                    batch_lbs = batch_lbs.to(device)
                    
                    outputs = model(input_values)
                    loss = correlation_coefficient_loss(outputs, batch_lbs)
                    val_loss += loss.item()
                    val_pred.extend(outputs.cpu().numpy())

            val_loss /= len(val_loader)

            # Calculate validation metrics
            val_pred = np.array(val_pred).reshape(val_timesteps.shape)
            val_ground_truth = _get_ground_truth_labels([all_dict[i] for i in val_index], all_labels)
            val_pred_flat = unsplit_data_ogsize(val_pred, window_size, step_sequence, 25, val_ground_truth.shape[-1])
            val_prc_coef = _calculate_flattened_accuracy(val_pred_flat, val_ground_truth)
            
            # Accumulate metrics for this fold and epoch
            train_acc.append(1- train_loss)
            val_acc.append(1- val_loss)

            # Log metrics
            writer.add_scalar(f"Loss/train_fold_{fold + 1}", train_loss, epoch)
            writer.add_scalar(f"Loss/val_fold_{fold + 1}", val_loss, epoch)
            writer.add_scalar(f"Pearson/val_fold_{fold + 1}", val_prc_coef, epoch)

            print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Pearson: {val_prc_coef:.4f}")

            # Check if validation loss improved
            if val_loss < best_val_loss:
                print(f"Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving best model...")
                best_val_loss = val_loss
                best_val_loss_flat = val_prc_coef
                early_stopping_counter = 0

                # Save the best model
                torch.save(model.state_dict(), best_model_path)
            else:
                early_stopping_counter += 1
                print(f"Validation loss did not improve for {early_stopping_counter} epochs.")
                #model.load_state_dict(torch.load(best_model_path))


            # Early stopping
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping triggered at epoch {epoch + 1}. Loading best model.")
                # Load the best model's weights
                model.load_state_dict(torch.load(best_model_path))
                break

        test_pred = []
        test_loss = 0.0
        with torch.no_grad():
            for batch_d, batch_lbs in test_loader:
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                test_loss += loss.item()
                test_pred.extend(outputs.cpu().numpy())

        test_loss /= len(test_loader)
        test_pred = np.array(test_pred).reshape(prepared_test_labels_timesteps.shape)
        test_ground_truth = _get_ground_truth_labels(list(test_dict.values()), test_labels)
        test_pred_flat = unsplit_data_ogsize(test_pred, window_size, step_sequence, 25, test_ground_truth.shape[-1])
        test_prc_coef = _calculate_flattened_accuracy(test_pred_flat, test_ground_truth)

        print(f"Fold {fold + 1}:")
        print(f"  Validation Pearson Coefficient  acc: {1- best_val_loss}")
        print(f"  Validation Pearson Coefficient flat acc: {best_val_loss_flat}")
        print(f"  Test acc: {1- test_loss}")
        print(f"  Test Pearson Coefficient acc(flattened): {test_prc_coef}")

        fold_metrics.append({
            'Fold': fold + 1,
            'val_prc_acc': 1- best_val_loss,
            'val_prc_acc_flat': best_val_loss_flat,
            'test_acc': 1- test_loss,
            'test_prc_flat': test_prc_coef
        })


                # Log fold-specific metrics as tables
        fold_table = f"| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                     f"|------|-----------------|------------------|----------|-------------------|\n" \
                     f"| {fold + 1} | {1 - val_loss:.4f} | {val_prc_coef:.4f} | {1 - test_loss:.4f} | {test_prc_coef:.4f} |\n"
        writer.add_text(f"Fold_{fold + 1}_Metrics", fold_table)
        # Accumulate fold metrics across all folds
        train_acc_epoch.append(train_acc)
        val_acc_epoch.append(train_acc)


    
        # After all folds, compute and log the average metrics per epoch across all folds
    for epoch in range(epochs):
        avg_train_loss = np.mean([fold_losses[epoch] for fold_losses in train_acc_epoch if len(fold_losses) > epoch])
        avg_val_loss = np.mean([fold_losses[epoch] for fold_losses in val_acc_epoch if len(fold_losses) > epoch])

        # Log the averaged metrics for the epoch across all folds
        writer.add_scalar("Average_acc/train", avg_train_loss, epoch)
        writer.add_scalar("Average_acc/val", avg_val_loss, epoch)
            

    # Calculate average metrics
    avg_metrics = {key: np.mean([fold[key] for fold in fold_metrics if key != 'Fold']) for key in fold_metrics[0].keys() if key != 'Fold'}
        # Log the final average table
    avg_table = "| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                "|------|-----------------|------------------|----------|-------------------|\n" \
                f"| Average | {avg_metrics['val_prc_acc']:.4f} | {avg_metrics['val_prc_acc_flat']:.4f} | {avg_metrics['test_acc']:.4f} | {avg_metrics['test_prc_flat']:.4f} |\n"
    writer.add_text("Average_Metrics", avg_table)
    # Add average metrics to results
    avg_metrics['Fold'] = 'Average'
    fold_metrics.append(avg_metrics)

    # save averga date to CSV
    results_df = pd.DataFrame(fold_metrics)
    csv_path = os.path.join(run_dir, 'fold_results.csv')
    results_df.to_csv(csv_path, index=False)
    
    # Save fold indices CSV
    fold_indices_df.to_csv(os.path.join(run_dir, 'fold_indices.csv'), index=False)
    
    writer.close()


    print("\nTraining completed.")
    print("Average metrics across all folds:")
    for key, value in avg_metrics.items():
        print(f"  {key}: {value}")


if __name__ == "__main__":
    ## Path to data
    #path = "/home/glenn/Downloads/"
    path = "../DATA/"


    # Model parameters
    model_config = {
        "VRBModel": {
            "model" : VRBModel,
            "model_name": "facebook/hubert-large-ls960-ft",
            "hidden_units": 64,
            "n_gru": 3,
            "output_size": None  # Will be set dynamically
        },
        "Wav2Vec2ConvLSTMModel": {
            "model" : Wav2Vec2ConvLSTMModel,
            "model_name": "facebook/wav2vec2-base",
            "hidden_units": 128,
            "n_lstm": 2,
            "output_size": None  # Will be set dynamically
        },
            "RespBertCNNModelV2": {
            'model' : RespBertCNNModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "output_size": None  
        }
    }
    

    
    # Train and data parameters
    epochs = 60
    batch_size = 25
    window_size = 30
    step_size = 6
    data_parts = 4 # aka folds
    early_stopping_patience = 10
    
    config = model_config["RespBertCNNModelV2"]
    

    ## same wav2vec2 base model and pipeline used in the paper
    processor = AutoProcessor.from_pretrained(config["model_name"])
    
    train(
        path_to_data=path+"ComParE2020_Breathing/wav/",
        path_to_labels=path+"ComParE2020_Breathing/lab/",
        window_size=window_size,
        batch_size=batch_size,
        config = config,
        step_size=step_size,
        data_parts= data_parts ,
        early_stopping_patience= early_stopping_patience,
        epochs= epochs,
        processor = processor
    )



OSError: Can't load tokenizer for 'microsoft/wavlm-large'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'microsoft/wavlm-large' is the correct path to a directory containing all relevant files for a Wav2Vec2CTCTokenizer tokenizer.

In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from datetime import datetime
#from torch.utils.tensorboard import SummaryWriter
import torchaudio
from typing import List, Tuple
from pt_utils import *
from pt_dataset import *
from pt_models import *
from pt_utils import *
from tensorboardX import SummaryWriter
from transformers import get_cosine_schedule_with_warmup


def create_run_directory():
    base_dir = "pt_runs_SOTA"
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir

def _calculate_flattened_accuracy(average, ground_truth_labels):
    s_acc = 0
    for b in range(len(ground_truth_labels)):
        s, _ = scipy.stats.pearsonr(average[b], ground_truth_labels[b])
        s_acc += s
    return s_acc / len(ground_truth_labels)

def _choose_real_labs_only_with_filenames(labels, filenames):
    return labels[labels['filename'].isin(filenames)]

def _get_ground_truth_labels(ground_truth_names, labels):
    ground_truth_labels = []
    for batch_name in ground_truth_names:
        ground_truth_label = _choose_real_labs_only_with_filenames(labels, [batch_name])
        ground_truth_labels.append(ground_truth_label)
    return np.array(ground_truth_labels)[:, :, -1].astype(np.float32)

def train(path_to_data, path_to_labels, window_size=16, step_size=6, data_parts=4, epochs=100, batch_size=10, early_stopping_patience=20, config = None, processor = None):
    run_dir = create_run_directory()
    log_dir = os.path.join(run_dir, "logs")
    os.makedirs(log_dir, exist_ok=True)

    # Parameters
    length_sequence = window_size 
    step_sequence = step_size

    # Load and prepare data
    train_data, train_labels, train_dict, frame_rate = load_data(path_to_data, path_to_labels, 'train')
    devel_data, devel_labels, devel_dict, frame_rate = load_data(path_to_data, path_to_labels, 'devel')
    test_data, test_labels, test_dict, frame_rate = load_data(path_to_data, path_to_labels, 'test')

    # Combine train and devel data
    all_data = np.concatenate((train_data, devel_data), axis=0)
    all_labels = pd.concat([train_labels, devel_labels])
    all_dict = np.concatenate((list(train_dict.values()), list(devel_dict.values())), axis=0)

    # Prepare data
    prepared_data, prepared_labels, prepared_labels_timesteps = prepare_data(all_data, all_labels, all_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)
    prepared_test_data, prepared_test_labels, prepared_test_labels_timesteps = prepare_data(test_data, test_labels, test_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)

    # Create CSV file for storing fold indices
    fold_indices_df = pd.DataFrame(columns=['Fold', 'Train_Indices', 'Val_Indices'])

    # Cross-validation
    kf = KFold(n_splits=data_parts)
    fold_metrics = []
    # To accumulate metrics across folds for each epoch
    train_acc_epoch = []
    val_acc_epoch = []
    test_acc_epoch = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    config["output_size"] = prepared_labels.shape[-1]
    writer = SummaryWriter(log_dir=os.path.join(log_dir,config["model_name"]))

    for fold, (train_index, val_index) in enumerate(kf.split(prepared_data)):
        print(f"Fold {fold + 1}/{data_parts}")
        best_model_path = f"{run_dir}/best_model_fold{fold+1}"
        # Save fold indices
        fold_indices_df = fold_indices_df._append({
            'Fold': fold + 1,
            'Train_Indices': train_index.tolist(),
            'Val_Indices': val_index.tolist()
        }, ignore_index=True)

        # Split data
        train_d, val_d = prepared_data[train_index], prepared_data[val_index]
        train_lbs, val_lbs = prepared_labels[train_index], prepared_labels[val_index]
        train_timesteps, val_timesteps = prepared_labels_timesteps[train_index], prepared_labels_timesteps[val_index]
        
        # Reshape data
        train_d, train_lbs = reshaping_data_for_model(train_d, train_lbs)
        val_d, val_lbs = reshaping_data_for_model(val_d, val_lbs)
        test_d, test_lbs = reshaping_data_for_model(prepared_test_data, prepared_test_labels)
        
        print(train_d.shape)

        # Create datasets
        #train_dataset = BreathingDataset(train_d, train_lbs, processor, window_size, step_sequence, augment=True)
        train_dataset = BreathingDataset(train_d, train_lbs, processor,window_size, step_sequence)
        val_dataset = BreathingDataset(val_d, val_lbs, processor, window_size, step_sequence)
        test_dataset = BreathingDataset(test_d, test_lbs, processor, window_size, step_sequence)

        # Create DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size,num_workers=2, shuffle=True, collate_fn=train_dataset.collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, collate_fn=val_dataset.collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, collate_fn=test_dataset.collate_fn)
        print(config["output_size"])
        # Create and initialize model
        model = config["model"](config).to(device)
        
        #### training optimiser parameters fror apple
        #learning_rate = 0.005 
        #optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        #### training optimiser parameters fror harma_2023 VRB model 
        learning_rate = 0.01 # From the paper
        optimizer = optim.Adam(model.parameters())       
        
        best_val_loss = float('inf')
        best_val_loss_flat = float('inf')
        early_stopping_counter = 0
        # To accumulate metrics across folds for each epoch
        train_acc = []
        val_acc = []
        test_acc = []
        for epoch in range(epochs):

            model.train()
            train_loss = 0.0
            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            
            for batch_d, batch_lbs in progress_bar:
                optimizer.zero_grad()
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                progress_bar.set_postfix({'train_loss': f'{train_loss/(progress_bar.n+1):.4f}'})


            train_loss /= len(train_loader)

            # Combined validation loop
            model.eval()
            val_loss = 0.0
            val_pred = []
            with torch.no_grad():
                for batch_d, batch_lbs in val_loader:
                    input_values = batch_d.to(device)
                    batch_lbs = batch_lbs.to(device)
                    
                    outputs = model(input_values)
                    loss = correlation_coefficient_loss(outputs, batch_lbs)
                    val_loss += loss.item()
                    val_pred.extend(outputs.cpu().numpy())

            val_loss /= len(val_loader)

            # Calculate validation metrics
            val_pred = np.array(val_pred).reshape(val_timesteps.shape)
            val_ground_truth = _get_ground_truth_labels([all_dict[i] for i in val_index], all_labels)
            val_pred_flat = unsplit_data_ogsize(val_pred, window_size, step_sequence, 25, val_ground_truth.shape[-1])
            val_prc_coef = _calculate_flattened_accuracy(val_pred_flat, val_ground_truth)
            
            # Accumulate metrics for this fold and epoch
            train_acc.append(1- train_loss)
            val_acc.append(1- val_loss)

            # Log metrics
            writer.add_scalar(f"Loss/train_fold_{fold + 1}", train_loss, epoch)
            writer.add_scalar(f"Loss/val_fold_{fold + 1}", val_loss, epoch)
            writer.add_scalar(f"Pearson/val_fold_{fold + 1}", val_prc_coef, epoch)

            print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Pearson: {val_prc_coef:.4f}")

            # Check if validation loss improved
            if val_loss < best_val_loss:
                print(f"Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving best model...")
                best_val_loss = val_loss
                best_val_loss_flat = val_prc_coef
                early_stopping_counter = 0

                # Save the best model
                torch.save(model.state_dict(), best_model_path)
            else:
                early_stopping_counter += 1
                print(f"Validation loss did not improve for {early_stopping_counter} epochs.")
                #model.load_state_dict(torch.load(best_model_path))


            # Early stopping
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping triggered at epoch {epoch + 1}. Loading best model.")
                # Load the best model's weights
                model.load_state_dict(torch.load(best_model_path))
                break

        test_pred = []
        test_loss = 0.0
        with torch.no_grad():
            for batch_d, batch_lbs in test_loader:
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                test_loss += loss.item()
                test_pred.extend(outputs.cpu().numpy())

        test_loss /= len(test_loader)
        test_pred = np.array(test_pred).reshape(prepared_test_labels_timesteps.shape)
        test_ground_truth = _get_ground_truth_labels(list(test_dict.values()), test_labels)
        test_pred_flat = unsplit_data_ogsize(test_pred, window_size, step_sequence, 25, test_ground_truth.shape[-1])
        test_prc_coef = _calculate_flattened_accuracy(test_pred_flat, test_ground_truth)

        print(f"Fold {fold + 1}:")
        print(f"  Validation Pearson Coefficient  acc: {1- best_val_loss}")
        print(f"  Validation Pearson Coefficient flat acc: {best_val_loss_flat}")
        print(f"  Test acc: {1- test_loss}")
        print(f"  Test Pearson Coefficient acc(flattened): {test_prc_coef}")

        fold_metrics.append({
            'Fold': fold + 1,
            'val_prc_acc': 1- best_val_loss,
            'val_prc_acc_flat': best_val_loss_flat,
            'test_acc': 1- test_loss,
            'test_prc_flat': test_prc_coef
        })


                # Log fold-specific metrics as tables
        fold_table = f"| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                     f"|------|-----------------|------------------|----------|-------------------|\n" \
                     f"| {fold + 1} | {1 - val_loss:.4f} | {val_prc_coef:.4f} | {1 - test_loss:.4f} | {test_prc_coef:.4f} |\n"
        writer.add_text(f"Fold_{fold + 1}_Metrics", fold_table)
        # Accumulate fold metrics across all folds
        train_acc_epoch.append(train_acc)
        val_acc_epoch.append(train_acc)


    
        # After all folds, compute and log the average metrics per epoch across all folds
    for epoch in range(epochs):
        avg_train_loss = np.mean([fold_losses[epoch] for fold_losses in train_acc_epoch if len(fold_losses) > epoch])
        avg_val_loss = np.mean([fold_losses[epoch] for fold_losses in val_acc_epoch if len(fold_losses) > epoch])

        # Log the averaged metrics for the epoch across all folds
        writer.add_scalar("Average_acc/train", avg_train_loss, epoch)
        writer.add_scalar("Average_acc/val", avg_val_loss, epoch)
            

    # Calculate average metrics
    avg_metrics = {key: np.mean([fold[key] for fold in fold_metrics if key != 'Fold']) for key in fold_metrics[0].keys() if key != 'Fold'}
        # Log the final average table
    avg_table = "| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                "|------|-----------------|------------------|----------|-------------------|\n" \
                f"| Average | {avg_metrics['val_prc_acc']:.4f} | {avg_metrics['val_prc_acc_flat']:.4f} | {avg_metrics['test_acc']:.4f} | {avg_metrics['test_prc_flat']:.4f} |\n"
    writer.add_text("Average_Metrics", avg_table)
    # Add average metrics to results
    avg_metrics['Fold'] = 'Average'
    fold_metrics.append(avg_metrics)

    # save averga date to CSV
    results_df = pd.DataFrame(fold_metrics)
    csv_path = os.path.join(run_dir, 'fold_results.csv')
    results_df.to_csv(csv_path, index=False)
    
    # Save fold indices CSV
    fold_indices_df.to_csv(os.path.join(run_dir, 'fold_indices.csv'), index=False)
    
    writer.close()


    print("\nTraining completed.")
    print("Average metrics across all folds:")
    for key, value in avg_metrics.items():
        print(f"  {key}: {value}")


if __name__ == "__main__":
    ## Path to data
    #path = "/home/glenn/Downloads/"
    path = "../DATA/"


    # Model parameters
    model_config = {
        "VRBModel": {
            "model" : VRBModel,
            "model_name": "facebook/hubert-large-ls960-ft",
            "hidden_units": 64,
            "n_gru": 3,
            "output_size": None  # Will be set dynamically
        },
        "Wav2Vec2ConvLSTMModel": {
            "model" : Wav2Vec2ConvLSTMModel,
            "model_name": "facebook/wav2vec2-base",
            "hidden_units": 128,
            "n_lstm": 2,
            "output_size": None  # Will be set dynamically
        },
            "RespBertCNNModelV2": {
            'model' : RespBertCNNModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "output_size": None  
        }
    }
    

    
    # Train and data parameters
    epochs = 60
    batch_size = 25
    window_size = 30
    step_size = 6
    data_parts = 4 # aka folds
    early_stopping_patience = 10
    
    config = model_config["RespBertCNNModelV2"]
    

    ## same wav2vec2 base model and pipeline used in the paper
    #processor = AutoProcessor.from_pretrained(config["model_name"])
    processor = Wav2Vec2FeatureExtractor.from_pretrained(config["model_name"])

    train(
        path_to_data=path+"ComParE2020_Breathing/wav/",
        path_to_labels=path+"ComParE2020_Breathing/lab/",
        window_size=window_size,
        batch_size=batch_size,
        config = config,
        step_size=step_size,
        data_parts= data_parts ,
        early_stopping_patience= early_stopping_patience,
        epochs= epochs,
        processor = processor
    )

Fold 1/4
(864, 480000)
750


Epoch 1/60:   0%|          | 0/35 [00:02<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 3.36 GiB. GPU 0 has a total capacity of 93.12 GiB of which 2.34 GiB is free. Including non-PyTorch memory, this process has 90.77 GiB memory in use. Of the allocated memory 89.21 GiB is allocated by PyTorch, and 912.60 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from datetime import datetime
#from torch.utils.tensorboard import SummaryWriter
import torchaudio
from typing import List, Tuple
from pt_utils import *
from pt_dataset import *
from pt_models import *
from pt_utils import *
from tensorboardX import SummaryWriter
from transformers import get_cosine_schedule_with_warmup


def create_run_directory():
    base_dir = "pt_runs_SOTA"
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir

def _calculate_flattened_accuracy(average, ground_truth_labels):
    s_acc = 0
    for b in range(len(ground_truth_labels)):
        s, _ = scipy.stats.pearsonr(average[b], ground_truth_labels[b])
        s_acc += s
    return s_acc / len(ground_truth_labels)

def _choose_real_labs_only_with_filenames(labels, filenames):
    return labels[labels['filename'].isin(filenames)]

def _get_ground_truth_labels(ground_truth_names, labels):
    ground_truth_labels = []
    for batch_name in ground_truth_names:
        ground_truth_label = _choose_real_labs_only_with_filenames(labels, [batch_name])
        ground_truth_labels.append(ground_truth_label)
    return np.array(ground_truth_labels)[:, :, -1].astype(np.float32)

def train(path_to_data, path_to_labels, window_size=16, step_size=6, data_parts=4, epochs=100, batch_size=10, early_stopping_patience=20, config = None, processor = None):
    run_dir = create_run_directory()
    log_dir = os.path.join(run_dir, "logs")
    os.makedirs(log_dir, exist_ok=True)

    # Parameters
    length_sequence = window_size 
    step_sequence = step_size

    # Load and prepare data
    train_data, train_labels, train_dict, frame_rate = load_data(path_to_data, path_to_labels, 'train')
    devel_data, devel_labels, devel_dict, frame_rate = load_data(path_to_data, path_to_labels, 'devel')
    test_data, test_labels, test_dict, frame_rate = load_data(path_to_data, path_to_labels, 'test')

    # Combine train and devel data
    all_data = np.concatenate((train_data, devel_data), axis=0)
    all_labels = pd.concat([train_labels, devel_labels])
    all_dict = np.concatenate((list(train_dict.values()), list(devel_dict.values())), axis=0)

    # Prepare data
    prepared_data, prepared_labels, prepared_labels_timesteps = prepare_data(all_data, all_labels, all_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)
    prepared_test_data, prepared_test_labels, prepared_test_labels_timesteps = prepare_data(test_data, test_labels, test_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)

    # Create CSV file for storing fold indices
    fold_indices_df = pd.DataFrame(columns=['Fold', 'Train_Indices', 'Val_Indices'])

    # Cross-validation
    kf = KFold(n_splits=data_parts)
    fold_metrics = []
    # To accumulate metrics across folds for each epoch
    train_acc_epoch = []
    val_acc_epoch = []
    test_acc_epoch = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    config["output_size"] = prepared_labels.shape[-1]
    writer = SummaryWriter(log_dir=os.path.join(log_dir,config["model_name"]))

    for fold, (train_index, val_index) in enumerate(kf.split(prepared_data)):
        print(f"Fold {fold + 1}/{data_parts}")
        best_model_path = f"{run_dir}/best_model_fold{fold+1}"
        # Save fold indices
        fold_indices_df = fold_indices_df._append({
            'Fold': fold + 1,
            'Train_Indices': train_index.tolist(),
            'Val_Indices': val_index.tolist()
        }, ignore_index=True)

        # Split data
        train_d, val_d = prepared_data[train_index], prepared_data[val_index]
        train_lbs, val_lbs = prepared_labels[train_index], prepared_labels[val_index]
        train_timesteps, val_timesteps = prepared_labels_timesteps[train_index], prepared_labels_timesteps[val_index]
        
        # Reshape data
        train_d, train_lbs = reshaping_data_for_model(train_d, train_lbs)
        val_d, val_lbs = reshaping_data_for_model(val_d, val_lbs)
        test_d, test_lbs = reshaping_data_for_model(prepared_test_data, prepared_test_labels)
        
        print(train_d.shape)

        # Create datasets
        #train_dataset = BreathingDataset(train_d, train_lbs, processor, window_size, step_sequence, augment=True)
        train_dataset = BreathingDataset(train_d, train_lbs, processor,window_size, step_sequence)
        val_dataset = BreathingDataset(val_d, val_lbs, processor, window_size, step_sequence)
        test_dataset = BreathingDataset(test_d, test_lbs, processor, window_size, step_sequence)

        # Create DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size,num_workers=2, shuffle=True, collate_fn=train_dataset.collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, collate_fn=val_dataset.collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, collate_fn=test_dataset.collate_fn)
        print(config["output_size"])
        # Create and initialize model
        model = config["model"](config).to(device)
        
        #### training optimiser parameters fror apple
        #learning_rate = 0.005 
        #optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        #### training optimiser parameters fror harma_2023 VRB model 
        learning_rate = 0.01 # From the paper
        optimizer = optim.Adam(model.parameters())       
        
        best_val_loss = float('inf')
        best_val_loss_flat = float('inf')
        early_stopping_counter = 0
        # To accumulate metrics across folds for each epoch
        train_acc = []
        val_acc = []
        test_acc = []
        for epoch in range(epochs):

            model.train()
            train_loss = 0.0
            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            
            for batch_d, batch_lbs in progress_bar:
                optimizer.zero_grad()
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                progress_bar.set_postfix({'train_loss': f'{train_loss/(progress_bar.n+1):.4f}'})


            train_loss /= len(train_loader)

            # Combined validation loop
            model.eval()
            val_loss = 0.0
            val_pred = []
            with torch.no_grad():
                for batch_d, batch_lbs in val_loader:
                    input_values = batch_d.to(device)
                    batch_lbs = batch_lbs.to(device)
                    
                    outputs = model(input_values)
                    loss = correlation_coefficient_loss(outputs, batch_lbs)
                    val_loss += loss.item()
                    val_pred.extend(outputs.cpu().numpy())

            val_loss /= len(val_loader)

            # Calculate validation metrics
            val_pred = np.array(val_pred).reshape(val_timesteps.shape)
            val_ground_truth = _get_ground_truth_labels([all_dict[i] for i in val_index], all_labels)
            val_pred_flat = unsplit_data_ogsize(val_pred, window_size, step_sequence, 25, val_ground_truth.shape[-1])
            val_prc_coef = _calculate_flattened_accuracy(val_pred_flat, val_ground_truth)
            
            # Accumulate metrics for this fold and epoch
            train_acc.append(1- train_loss)
            val_acc.append(1- val_loss)

            # Log metrics
            writer.add_scalar(f"Loss/train_fold_{fold + 1}", train_loss, epoch)
            writer.add_scalar(f"Loss/val_fold_{fold + 1}", val_loss, epoch)
            writer.add_scalar(f"Pearson/val_fold_{fold + 1}", val_prc_coef, epoch)

            print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Pearson: {val_prc_coef:.4f}")

            # Check if validation loss improved
            if val_loss < best_val_loss:
                print(f"Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving best model...")
                best_val_loss = val_loss
                best_val_loss_flat = val_prc_coef
                early_stopping_counter = 0

                # Save the best model
                torch.save(model.state_dict(), best_model_path)
            else:
                early_stopping_counter += 1
                print(f"Validation loss did not improve for {early_stopping_counter} epochs.")
                #model.load_state_dict(torch.load(best_model_path))


            # Early stopping
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping triggered at epoch {epoch + 1}. Loading best model.")
                # Load the best model's weights
                model.load_state_dict(torch.load(best_model_path))
                break

        test_pred = []
        test_loss = 0.0
        with torch.no_grad():
            for batch_d, batch_lbs in test_loader:
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                test_loss += loss.item()
                test_pred.extend(outputs.cpu().numpy())

        test_loss /= len(test_loader)
        test_pred = np.array(test_pred).reshape(prepared_test_labels_timesteps.shape)
        test_ground_truth = _get_ground_truth_labels(list(test_dict.values()), test_labels)
        test_pred_flat = unsplit_data_ogsize(test_pred, window_size, step_sequence, 25, test_ground_truth.shape[-1])
        test_prc_coef = _calculate_flattened_accuracy(test_pred_flat, test_ground_truth)

        print(f"Fold {fold + 1}:")
        print(f"  Validation Pearson Coefficient  acc: {1- best_val_loss}")
        print(f"  Validation Pearson Coefficient flat acc: {best_val_loss_flat}")
        print(f"  Test acc: {1- test_loss}")
        print(f"  Test Pearson Coefficient acc(flattened): {test_prc_coef}")

        fold_metrics.append({
            'Fold': fold + 1,
            'val_prc_acc': 1- best_val_loss,
            'val_prc_acc_flat': best_val_loss_flat,
            'test_acc': 1- test_loss,
            'test_prc_flat': test_prc_coef
        })


                # Log fold-specific metrics as tables
        fold_table = f"| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                     f"|------|-----------------|------------------|----------|-------------------|\n" \
                     f"| {fold + 1} | {1 - val_loss:.4f} | {val_prc_coef:.4f} | {1 - test_loss:.4f} | {test_prc_coef:.4f} |\n"
        writer.add_text(f"Fold_{fold + 1}_Metrics", fold_table)
        # Accumulate fold metrics across all folds
        train_acc_epoch.append(train_acc)
        val_acc_epoch.append(train_acc)


    
        # After all folds, compute and log the average metrics per epoch across all folds
    for epoch in range(epochs):
        avg_train_loss = np.mean([fold_losses[epoch] for fold_losses in train_acc_epoch if len(fold_losses) > epoch])
        avg_val_loss = np.mean([fold_losses[epoch] for fold_losses in val_acc_epoch if len(fold_losses) > epoch])

        # Log the averaged metrics for the epoch across all folds
        writer.add_scalar("Average_acc/train", avg_train_loss, epoch)
        writer.add_scalar("Average_acc/val", avg_val_loss, epoch)
            

    # Calculate average metrics
    avg_metrics = {key: np.mean([fold[key] for fold in fold_metrics if key != 'Fold']) for key in fold_metrics[0].keys() if key != 'Fold'}
        # Log the final average table
    avg_table = "| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                "|------|-----------------|------------------|----------|-------------------|\n" \
                f"| Average | {avg_metrics['val_prc_acc']:.4f} | {avg_metrics['val_prc_acc_flat']:.4f} | {avg_metrics['test_acc']:.4f} | {avg_metrics['test_prc_flat']:.4f} |\n"
    writer.add_text("Average_Metrics", avg_table)
    # Add average metrics to results
    avg_metrics['Fold'] = 'Average'
    fold_metrics.append(avg_metrics)

    # save averga date to CSV
    results_df = pd.DataFrame(fold_metrics)
    csv_path = os.path.join(run_dir, 'fold_results.csv')
    results_df.to_csv(csv_path, index=False)
    
    # Save fold indices CSV
    fold_indices_df.to_csv(os.path.join(run_dir, 'fold_indices.csv'), index=False)
    
    writer.close()


    print("\nTraining completed.")
    print("Average metrics across all folds:")
    for key, value in avg_metrics.items():
        print(f"  {key}: {value}")


if __name__ == "__main__":
    ## Path to data
    #path = "/home/glenn/Downloads/"
    path = "../DATA/"


    # Model parameters
    model_config = {
        "VRBModel": {
            "model" : VRBModel,
            "model_name": "facebook/hubert-large-ls960-ft",
            "hidden_units": 64,
            "n_gru": 3,
            "output_size": None  # Will be set dynamically
        },
        "Wav2Vec2ConvLSTMModel": {
            "model" : Wav2Vec2ConvLSTMModel,
            "model_name": "facebook/wav2vec2-base",
            "hidden_units": 128,
            "n_lstm": 2,
            "output_size": None  # Will be set dynamically
        },
            "RespBertCNNModelV2": {
            'model' : RespBertCNNModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "output_size": None  
        }
    }
    

    
    # Train and data parameters
    epochs = 60
    batch_size = 15
    window_size = 30
    step_size = 6
    data_parts = 4 # aka folds
    early_stopping_patience = 10
    
    config = model_config["RespBertCNNModelV2"]
    

    ## same wav2vec2 base model and pipeline used in the paper
    #processor = AutoProcessor.from_pretrained(config["model_name"])
    processor = Wav2Vec2FeatureExtractor.from_pretrained(config["model_name"])

    train(
        path_to_data=path+"ComParE2020_Breathing/wav/",
        path_to_labels=path+"ComParE2020_Breathing/lab/",
        window_size=window_size,
        batch_size=batch_size,
        config = config,
        step_size=step_size,
        data_parts= data_parts ,
        early_stopping_patience= early_stopping_patience,
        epochs= epochs,
        processor = processor
    )

Fold 1/4
(864, 480000)
750


Epoch 1/60:   0%|          | 0/58 [00:01<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 2.75 GiB. GPU 0 has a total capacity of 93.12 GiB of which 1.41 GiB is free. Including non-PyTorch memory, this process has 91.70 GiB memory in use. Of the allocated memory 90.99 GiB is allocated by PyTorch, and 40.32 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from datetime import datetime
#from torch.utils.tensorboard import SummaryWriter
import torchaudio
from typing import List, Tuple
from pt_utils import *
from pt_dataset import *
from pt_models import *
from pt_utils import *
from tensorboardX import SummaryWriter
from transformers import get_cosine_schedule_with_warmup


def create_run_directory():
    base_dir = "pt_runs_SOTA"
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir

def _calculate_flattened_accuracy(average, ground_truth_labels):
    s_acc = 0
    for b in range(len(ground_truth_labels)):
        s, _ = scipy.stats.pearsonr(average[b], ground_truth_labels[b])
        s_acc += s
    return s_acc / len(ground_truth_labels)

def _choose_real_labs_only_with_filenames(labels, filenames):
    return labels[labels['filename'].isin(filenames)]

def _get_ground_truth_labels(ground_truth_names, labels):
    ground_truth_labels = []
    for batch_name in ground_truth_names:
        ground_truth_label = _choose_real_labs_only_with_filenames(labels, [batch_name])
        ground_truth_labels.append(ground_truth_label)
    return np.array(ground_truth_labels)[:, :, -1].astype(np.float32)

def train(path_to_data, path_to_labels, window_size=16, step_size=6, data_parts=4, epochs=100, batch_size=10, early_stopping_patience=20, config = None, processor = None):
    run_dir = create_run_directory()
    log_dir = os.path.join(run_dir, "logs")
    os.makedirs(log_dir, exist_ok=True)

    # Parameters
    length_sequence = window_size 
    step_sequence = step_size

    # Load and prepare data
    train_data, train_labels, train_dict, frame_rate = load_data(path_to_data, path_to_labels, 'train')
    devel_data, devel_labels, devel_dict, frame_rate = load_data(path_to_data, path_to_labels, 'devel')
    test_data, test_labels, test_dict, frame_rate = load_data(path_to_data, path_to_labels, 'test')

    # Combine train and devel data
    all_data = np.concatenate((train_data, devel_data), axis=0)
    all_labels = pd.concat([train_labels, devel_labels])
    all_dict = np.concatenate((list(train_dict.values()), list(devel_dict.values())), axis=0)

    # Prepare data
    prepared_data, prepared_labels, prepared_labels_timesteps = prepare_data(all_data, all_labels, all_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)
    prepared_test_data, prepared_test_labels, prepared_test_labels_timesteps = prepare_data(test_data, test_labels, test_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)

    # Create CSV file for storing fold indices
    fold_indices_df = pd.DataFrame(columns=['Fold', 'Train_Indices', 'Val_Indices'])

    # Cross-validation
    kf = KFold(n_splits=data_parts)
    fold_metrics = []
    # To accumulate metrics across folds for each epoch
    train_acc_epoch = []
    val_acc_epoch = []
    test_acc_epoch = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    config["output_size"] = prepared_labels.shape[-1]
    writer = SummaryWriter(log_dir=os.path.join(log_dir,config["model_name"]))

    for fold, (train_index, val_index) in enumerate(kf.split(prepared_data)):
        print(f"Fold {fold + 1}/{data_parts}")
        best_model_path = f"{run_dir}/best_model_fold{fold+1}"
        # Save fold indices
        fold_indices_df = fold_indices_df._append({
            'Fold': fold + 1,
            'Train_Indices': train_index.tolist(),
            'Val_Indices': val_index.tolist()
        }, ignore_index=True)

        # Split data
        train_d, val_d = prepared_data[train_index], prepared_data[val_index]
        train_lbs, val_lbs = prepared_labels[train_index], prepared_labels[val_index]
        train_timesteps, val_timesteps = prepared_labels_timesteps[train_index], prepared_labels_timesteps[val_index]
        
        # Reshape data
        train_d, train_lbs = reshaping_data_for_model(train_d, train_lbs)
        val_d, val_lbs = reshaping_data_for_model(val_d, val_lbs)
        test_d, test_lbs = reshaping_data_for_model(prepared_test_data, prepared_test_labels)
        
        print(train_d.shape)

        # Create datasets
        #train_dataset = BreathingDataset(train_d, train_lbs, processor, window_size, step_sequence, augment=True)
        train_dataset = BreathingDataset(train_d, train_lbs, processor,window_size, step_sequence)
        val_dataset = BreathingDataset(val_d, val_lbs, processor, window_size, step_sequence)
        test_dataset = BreathingDataset(test_d, test_lbs, processor, window_size, step_sequence)

        # Create DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size,num_workers=2, shuffle=True, collate_fn=train_dataset.collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, collate_fn=val_dataset.collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, collate_fn=test_dataset.collate_fn)
        print(config["output_size"])
        # Create and initialize model
        model = config["model"](config).to(device)
        
        #### training optimiser parameters fror apple
        #learning_rate = 0.005 
        #optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        #### training optimiser parameters fror harma_2023 VRB model 
        learning_rate = 0.01 # From the paper
        optimizer = optim.Adam(model.parameters())       
        
        best_val_loss = float('inf')
        best_val_loss_flat = float('inf')
        early_stopping_counter = 0
        # To accumulate metrics across folds for each epoch
        train_acc = []
        val_acc = []
        test_acc = []
        for epoch in range(epochs):

            model.train()
            train_loss = 0.0
            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            
            for batch_d, batch_lbs in progress_bar:
                optimizer.zero_grad()
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                progress_bar.set_postfix({'train_loss': f'{train_loss/(progress_bar.n+1):.4f}'})


            train_loss /= len(train_loader)

            # Combined validation loop
            model.eval()
            val_loss = 0.0
            val_pred = []
            with torch.no_grad():
                for batch_d, batch_lbs in val_loader:
                    input_values = batch_d.to(device)
                    batch_lbs = batch_lbs.to(device)
                    
                    outputs = model(input_values)
                    loss = correlation_coefficient_loss(outputs, batch_lbs)
                    val_loss += loss.item()
                    val_pred.extend(outputs.cpu().numpy())

            val_loss /= len(val_loader)

            # Calculate validation metrics
            val_pred = np.array(val_pred).reshape(val_timesteps.shape)
            val_ground_truth = _get_ground_truth_labels([all_dict[i] for i in val_index], all_labels)
            val_pred_flat = unsplit_data_ogsize(val_pred, window_size, step_sequence, 25, val_ground_truth.shape[-1])
            val_prc_coef = _calculate_flattened_accuracy(val_pred_flat, val_ground_truth)
            
            # Accumulate metrics for this fold and epoch
            train_acc.append(1- train_loss)
            val_acc.append(1- val_loss)

            # Log metrics
            writer.add_scalar(f"Loss/train_fold_{fold + 1}", train_loss, epoch)
            writer.add_scalar(f"Loss/val_fold_{fold + 1}", val_loss, epoch)
            writer.add_scalar(f"Pearson/val_fold_{fold + 1}", val_prc_coef, epoch)

            print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Pearson: {val_prc_coef:.4f}")

            # Check if validation loss improved
            if val_loss < best_val_loss:
                print(f"Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving best model...")
                best_val_loss = val_loss
                best_val_loss_flat = val_prc_coef
                early_stopping_counter = 0

                # Save the best model
                torch.save(model.state_dict(), best_model_path)
            else:
                early_stopping_counter += 1
                print(f"Validation loss did not improve for {early_stopping_counter} epochs.")
                #model.load_state_dict(torch.load(best_model_path))


            # Early stopping
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping triggered at epoch {epoch + 1}. Loading best model.")
                # Load the best model's weights
                model.load_state_dict(torch.load(best_model_path))
                break

        test_pred = []
        test_loss = 0.0
        with torch.no_grad():
            for batch_d, batch_lbs in test_loader:
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                test_loss += loss.item()
                test_pred.extend(outputs.cpu().numpy())

        test_loss /= len(test_loader)
        test_pred = np.array(test_pred).reshape(prepared_test_labels_timesteps.shape)
        test_ground_truth = _get_ground_truth_labels(list(test_dict.values()), test_labels)
        test_pred_flat = unsplit_data_ogsize(test_pred, window_size, step_sequence, 25, test_ground_truth.shape[-1])
        test_prc_coef = _calculate_flattened_accuracy(test_pred_flat, test_ground_truth)

        print(f"Fold {fold + 1}:")
        print(f"  Validation Pearson Coefficient  acc: {1- best_val_loss}")
        print(f"  Validation Pearson Coefficient flat acc: {best_val_loss_flat}")
        print(f"  Test acc: {1- test_loss}")
        print(f"  Test Pearson Coefficient acc(flattened): {test_prc_coef}")

        fold_metrics.append({
            'Fold': fold + 1,
            'val_prc_acc': 1- best_val_loss,
            'val_prc_acc_flat': best_val_loss_flat,
            'test_acc': 1- test_loss,
            'test_prc_flat': test_prc_coef
        })


                # Log fold-specific metrics as tables
        fold_table = f"| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                     f"|------|-----------------|------------------|----------|-------------------|\n" \
                     f"| {fold + 1} | {1 - val_loss:.4f} | {val_prc_coef:.4f} | {1 - test_loss:.4f} | {test_prc_coef:.4f} |\n"
        writer.add_text(f"Fold_{fold + 1}_Metrics", fold_table)
        # Accumulate fold metrics across all folds
        train_acc_epoch.append(train_acc)
        val_acc_epoch.append(train_acc)


    
        # After all folds, compute and log the average metrics per epoch across all folds
    for epoch in range(epochs):
        avg_train_loss = np.mean([fold_losses[epoch] for fold_losses in train_acc_epoch if len(fold_losses) > epoch])
        avg_val_loss = np.mean([fold_losses[epoch] for fold_losses in val_acc_epoch if len(fold_losses) > epoch])

        # Log the averaged metrics for the epoch across all folds
        writer.add_scalar("Average_acc/train", avg_train_loss, epoch)
        writer.add_scalar("Average_acc/val", avg_val_loss, epoch)
            

    # Calculate average metrics
    avg_metrics = {key: np.mean([fold[key] for fold in fold_metrics if key != 'Fold']) for key in fold_metrics[0].keys() if key != 'Fold'}
        # Log the final average table
    avg_table = "| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                "|------|-----------------|------------------|----------|-------------------|\n" \
                f"| Average | {avg_metrics['val_prc_acc']:.4f} | {avg_metrics['val_prc_acc_flat']:.4f} | {avg_metrics['test_acc']:.4f} | {avg_metrics['test_prc_flat']:.4f} |\n"
    writer.add_text("Average_Metrics", avg_table)
    # Add average metrics to results
    avg_metrics['Fold'] = 'Average'
    fold_metrics.append(avg_metrics)

    # save averga date to CSV
    results_df = pd.DataFrame(fold_metrics)
    csv_path = os.path.join(run_dir, 'fold_results.csv')
    results_df.to_csv(csv_path, index=False)
    
    # Save fold indices CSV
    fold_indices_df.to_csv(os.path.join(run_dir, 'fold_indices.csv'), index=False)
    
    writer.close()


    print("\nTraining completed.")
    print("Average metrics across all folds:")
    for key, value in avg_metrics.items():
        print(f"  {key}: {value}")


if __name__ == "__main__":
    ## Path to data
    #path = "/home/glenn/Downloads/"
    path = "../DATA/"


    # Model parameters
    model_config = {
        "VRBModel": {
            "model" : VRBModel,
            "model_name": "facebook/hubert-large-ls960-ft",
            "hidden_units": 64,
            "n_gru": 3,
            "output_size": None  # Will be set dynamically
        },
        "Wav2Vec2ConvLSTMModel": {
            "model" : Wav2Vec2ConvLSTMModel,
            "model_name": "facebook/wav2vec2-base",
            "hidden_units": 128,
            "n_lstm": 2,
            "output_size": None  # Will be set dynamically
        },
            "RespBertCNNModelV2": {
            'model' : RespBertCNNModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "output_size": None  
        }
    }
    

    
    # Train and data parameters
    epochs = 60
    batch_size = 15
    window_size = 30
    step_size = 6
    data_parts = 4 # aka folds
    early_stopping_patience = 10
    
    config = model_config["RespBertCNNModelV2"]
    

    ## same wav2vec2 base model and pipeline used in the paper
    #processor = AutoProcessor.from_pretrained(config["model_name"])
    processor = Wav2Vec2FeatureExtractor.from_pretrained(config["model_name"])

    train(
        path_to_data=path+"ComParE2020_Breathing/wav/",
        path_to_labels=path+"ComParE2020_Breathing/lab/",
        window_size=window_size,
        batch_size=batch_size,
        config = config,
        step_size=step_size,
        data_parts= data_parts ,
        early_stopping_patience= early_stopping_patience,
        epochs= epochs,
        processor = processor
    )

Fold 1/4
(864, 480000)
750


Epoch 1/60:   0%|          | 0/58 [00:02<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 2.02 GiB. GPU 0 has a total capacity of 93.12 GiB of which 314.75 MiB is free. Including non-PyTorch memory, this process has 92.80 GiB memory in use. Of the allocated memory 91.90 GiB is allocated by PyTorch, and 245.51 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from datetime import datetime
#from torch.utils.tensorboard import SummaryWriter
import torchaudio
from typing import List, Tuple
from pt_utils import *
from pt_dataset import *
from pt_models import *
from pt_utils import *
from tensorboardX import SummaryWriter
from transformers import get_cosine_schedule_with_warmup


def create_run_directory():
    base_dir = "pt_runs_SOTA"
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir

def _calculate_flattened_accuracy(average, ground_truth_labels):
    s_acc = 0
    for b in range(len(ground_truth_labels)):
        s, _ = scipy.stats.pearsonr(average[b], ground_truth_labels[b])
        s_acc += s
    return s_acc / len(ground_truth_labels)

def _choose_real_labs_only_with_filenames(labels, filenames):
    return labels[labels['filename'].isin(filenames)]

def _get_ground_truth_labels(ground_truth_names, labels):
    ground_truth_labels = []
    for batch_name in ground_truth_names:
        ground_truth_label = _choose_real_labs_only_with_filenames(labels, [batch_name])
        ground_truth_labels.append(ground_truth_label)
    return np.array(ground_truth_labels)[:, :, -1].astype(np.float32)

def train(path_to_data, path_to_labels, window_size=16, step_size=6, data_parts=4, epochs=100, batch_size=10, early_stopping_patience=20, config = None, processor = None):
    run_dir = create_run_directory()
    log_dir = os.path.join(run_dir, "logs")
    os.makedirs(log_dir, exist_ok=True)

    # Parameters
    length_sequence = window_size 
    step_sequence = step_size

    # Load and prepare data
    train_data, train_labels, train_dict, frame_rate = load_data(path_to_data, path_to_labels, 'train')
    devel_data, devel_labels, devel_dict, frame_rate = load_data(path_to_data, path_to_labels, 'devel')
    test_data, test_labels, test_dict, frame_rate = load_data(path_to_data, path_to_labels, 'test')

    # Combine train and devel data
    all_data = np.concatenate((train_data, devel_data), axis=0)
    all_labels = pd.concat([train_labels, devel_labels])
    all_dict = np.concatenate((list(train_dict.values()), list(devel_dict.values())), axis=0)

    # Prepare data
    prepared_data, prepared_labels, prepared_labels_timesteps = prepare_data(all_data, all_labels, all_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)
    prepared_test_data, prepared_test_labels, prepared_test_labels_timesteps = prepare_data(test_data, test_labels, test_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)

    # Create CSV file for storing fold indices
    fold_indices_df = pd.DataFrame(columns=['Fold', 'Train_Indices', 'Val_Indices'])

    # Cross-validation
    kf = KFold(n_splits=data_parts)
    fold_metrics = []
    # To accumulate metrics across folds for each epoch
    train_acc_epoch = []
    val_acc_epoch = []
    test_acc_epoch = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    config["output_size"] = prepared_labels.shape[-1]
    writer = SummaryWriter(log_dir=os.path.join(log_dir,config["model_name"]))

    for fold, (train_index, val_index) in enumerate(kf.split(prepared_data)):
        print(f"Fold {fold + 1}/{data_parts}")
        best_model_path = f"{run_dir}/best_model_fold{fold+1}"
        # Save fold indices
        fold_indices_df = fold_indices_df._append({
            'Fold': fold + 1,
            'Train_Indices': train_index.tolist(),
            'Val_Indices': val_index.tolist()
        }, ignore_index=True)

        # Split data
        train_d, val_d = prepared_data[train_index], prepared_data[val_index]
        train_lbs, val_lbs = prepared_labels[train_index], prepared_labels[val_index]
        train_timesteps, val_timesteps = prepared_labels_timesteps[train_index], prepared_labels_timesteps[val_index]
        
        # Reshape data
        train_d, train_lbs = reshaping_data_for_model(train_d, train_lbs)
        val_d, val_lbs = reshaping_data_for_model(val_d, val_lbs)
        test_d, test_lbs = reshaping_data_for_model(prepared_test_data, prepared_test_labels)
        
        print(train_d.shape)

        # Create datasets
        #train_dataset = BreathingDataset(train_d, train_lbs, processor, window_size, step_sequence, augment=True)
        train_dataset = BreathingDataset(train_d, train_lbs, processor,window_size, step_sequence)
        val_dataset = BreathingDataset(val_d, val_lbs, processor, window_size, step_sequence)
        test_dataset = BreathingDataset(test_d, test_lbs, processor, window_size, step_sequence)

        # Create DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size,num_workers=2, shuffle=True, collate_fn=train_dataset.collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, collate_fn=val_dataset.collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, collate_fn=test_dataset.collate_fn)
        print(config["output_size"])
        # Create and initialize model
        model = config["model"](config).to(device)
        
        #### training optimiser parameters fror apple
        #learning_rate = 0.005 
        #optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        #### training optimiser parameters fror harma_2023 VRB model 
        learning_rate = 0.01 # From the paper
        optimizer = optim.Adam(model.parameters())       
        
        best_val_loss = float('inf')
        best_val_loss_flat = float('inf')
        early_stopping_counter = 0
        # To accumulate metrics across folds for each epoch
        train_acc = []
        val_acc = []
        test_acc = []
        for epoch in range(epochs):

            model.train()
            train_loss = 0.0
            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            
            for batch_d, batch_lbs in progress_bar:
                optimizer.zero_grad()
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                progress_bar.set_postfix({'train_loss': f'{train_loss/(progress_bar.n+1):.4f}'})


            train_loss /= len(train_loader)

            # Combined validation loop
            model.eval()
            val_loss = 0.0
            val_pred = []
            with torch.no_grad():
                for batch_d, batch_lbs in val_loader:
                    input_values = batch_d.to(device)
                    batch_lbs = batch_lbs.to(device)
                    
                    outputs = model(input_values)
                    loss = correlation_coefficient_loss(outputs, batch_lbs)
                    val_loss += loss.item()
                    val_pred.extend(outputs.cpu().numpy())

            val_loss /= len(val_loader)

            # Calculate validation metrics
            val_pred = np.array(val_pred).reshape(val_timesteps.shape)
            val_ground_truth = _get_ground_truth_labels([all_dict[i] for i in val_index], all_labels)
            val_pred_flat = unsplit_data_ogsize(val_pred, window_size, step_sequence, 25, val_ground_truth.shape[-1])
            val_prc_coef = _calculate_flattened_accuracy(val_pred_flat, val_ground_truth)
            
            # Accumulate metrics for this fold and epoch
            train_acc.append(1- train_loss)
            val_acc.append(1- val_loss)

            # Log metrics
            writer.add_scalar(f"Loss/train_fold_{fold + 1}", train_loss, epoch)
            writer.add_scalar(f"Loss/val_fold_{fold + 1}", val_loss, epoch)
            writer.add_scalar(f"Pearson/val_fold_{fold + 1}", val_prc_coef, epoch)

            print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Pearson: {val_prc_coef:.4f}")

            # Check if validation loss improved
            if val_loss < best_val_loss:
                print(f"Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving best model...")
                best_val_loss = val_loss
                best_val_loss_flat = val_prc_coef
                early_stopping_counter = 0

                # Save the best model
                torch.save(model.state_dict(), best_model_path)
            else:
                early_stopping_counter += 1
                print(f"Validation loss did not improve for {early_stopping_counter} epochs.")
                #model.load_state_dict(torch.load(best_model_path))


            # Early stopping
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping triggered at epoch {epoch + 1}. Loading best model.")
                # Load the best model's weights
                model.load_state_dict(torch.load(best_model_path))
                break

        test_pred = []
        test_loss = 0.0
        with torch.no_grad():
            for batch_d, batch_lbs in test_loader:
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                test_loss += loss.item()
                test_pred.extend(outputs.cpu().numpy())

        test_loss /= len(test_loader)
        test_pred = np.array(test_pred).reshape(prepared_test_labels_timesteps.shape)
        test_ground_truth = _get_ground_truth_labels(list(test_dict.values()), test_labels)
        test_pred_flat = unsplit_data_ogsize(test_pred, window_size, step_sequence, 25, test_ground_truth.shape[-1])
        test_prc_coef = _calculate_flattened_accuracy(test_pred_flat, test_ground_truth)

        print(f"Fold {fold + 1}:")
        print(f"  Validation Pearson Coefficient  acc: {1- best_val_loss}")
        print(f"  Validation Pearson Coefficient flat acc: {best_val_loss_flat}")
        print(f"  Test acc: {1- test_loss}")
        print(f"  Test Pearson Coefficient acc(flattened): {test_prc_coef}")

        fold_metrics.append({
            'Fold': fold + 1,
            'val_prc_acc': 1- best_val_loss,
            'val_prc_acc_flat': best_val_loss_flat,
            'test_acc': 1- test_loss,
            'test_prc_flat': test_prc_coef
        })


                # Log fold-specific metrics as tables
        fold_table = f"| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                     f"|------|-----------------|------------------|----------|-------------------|\n" \
                     f"| {fold + 1} | {1 - val_loss:.4f} | {val_prc_coef:.4f} | {1 - test_loss:.4f} | {test_prc_coef:.4f} |\n"
        writer.add_text(f"Fold_{fold + 1}_Metrics", fold_table)
        # Accumulate fold metrics across all folds
        train_acc_epoch.append(train_acc)
        val_acc_epoch.append(train_acc)


    
        # After all folds, compute and log the average metrics per epoch across all folds
    for epoch in range(epochs):
        avg_train_loss = np.mean([fold_losses[epoch] for fold_losses in train_acc_epoch if len(fold_losses) > epoch])
        avg_val_loss = np.mean([fold_losses[epoch] for fold_losses in val_acc_epoch if len(fold_losses) > epoch])

        # Log the averaged metrics for the epoch across all folds
        writer.add_scalar("Average_acc/train", avg_train_loss, epoch)
        writer.add_scalar("Average_acc/val", avg_val_loss, epoch)
            

    # Calculate average metrics
    avg_metrics = {key: np.mean([fold[key] for fold in fold_metrics if key != 'Fold']) for key in fold_metrics[0].keys() if key != 'Fold'}
        # Log the final average table
    avg_table = "| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                "|------|-----------------|------------------|----------|-------------------|\n" \
                f"| Average | {avg_metrics['val_prc_acc']:.4f} | {avg_metrics['val_prc_acc_flat']:.4f} | {avg_metrics['test_acc']:.4f} | {avg_metrics['test_prc_flat']:.4f} |\n"
    writer.add_text("Average_Metrics", avg_table)
    # Add average metrics to results
    avg_metrics['Fold'] = 'Average'
    fold_metrics.append(avg_metrics)

    # save averga date to CSV
    results_df = pd.DataFrame(fold_metrics)
    csv_path = os.path.join(run_dir, 'fold_results.csv')
    results_df.to_csv(csv_path, index=False)
    
    # Save fold indices CSV
    fold_indices_df.to_csv(os.path.join(run_dir, 'fold_indices.csv'), index=False)
    
    writer.close()


    print("\nTraining completed.")
    print("Average metrics across all folds:")
    for key, value in avg_metrics.items():
        print(f"  {key}: {value}")


if __name__ == "__main__":
    ## Path to data
    #path = "/home/glenn/Downloads/"
    path = "../DATA/"


    # Model parameters
    model_config = {
        "VRBModel": {
            "model" : VRBModel,
            "model_name": "facebook/hubert-large-ls960-ft",
            "hidden_units": 64,
            "n_gru": 3,
            "output_size": None  # Will be set dynamically
        },
        "Wav2Vec2ConvLSTMModel": {
            "model" : Wav2Vec2ConvLSTMModel,
            "model_name": "facebook/wav2vec2-base",
            "hidden_units": 128,
            "n_lstm": 2,
            "output_size": None  # Will be set dynamically
        },
            "RespBertCNNModelV2": {
            'model' : RespBertCNNModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "output_size": None  
        }
    }
    

    
    # Train and data parameters
    epochs = 60
    batch_size = 13
    window_size = 30
    step_size = 6
    data_parts = 4 # aka folds
    early_stopping_patience = 10
    
    config = model_config["RespBertCNNModelV2"]
    

    ## same wav2vec2 base model and pipeline used in the paper
    #processor = AutoProcessor.from_pretrained(config["model_name"])
    processor = Wav2Vec2FeatureExtractor.from_pretrained(config["model_name"])

    train(
        path_to_data=path+"ComParE2020_Breathing/wav/",
        path_to_labels=path+"ComParE2020_Breathing/lab/",
        window_size=window_size,
        batch_size=batch_size,
        config = config,
        step_size=step_size,
        data_parts= data_parts ,
        early_stopping_patience= early_stopping_patience,
        epochs= epochs,
        processor = processor
    )

Fold 1/4
(864, 480000)
750


Epoch 1/60: 100%|██████████| 67/67 [02:09<00:00,  1.93s/it, train_loss=0.8820]


Epoch 1/60 - Train Loss: 0.8820, Val Loss: 0.9271, Val Pearson: 0.1845
Validation loss improved from inf to 0.9271. Saving best model...


Epoch 2/60: 100%|██████████| 67/67 [02:09<00:00,  1.93s/it, train_loss=0.4037]


Epoch 2/60 - Train Loss: 0.4037, Val Loss: 0.4840, Val Pearson: 0.7195
Validation loss improved from 0.9271 to 0.4840. Saving best model...


Epoch 3/60: 100%|██████████| 67/67 [02:09<00:00,  1.94s/it, train_loss=0.2746]


Epoch 3/60 - Train Loss: 0.2746, Val Loss: 0.1686, Val Pearson: 0.8376
Validation loss improved from 0.4840 to 0.1686. Saving best model...


Epoch 4/60: 100%|██████████| 67/67 [02:09<00:00,  1.94s/it, train_loss=0.2206]


Epoch 4/60 - Train Loss: 0.2206, Val Loss: 0.2022, Val Pearson: 0.8093
Validation loss did not improve for 1 epochs.


Epoch 5/60: 100%|██████████| 67/67 [02:08<00:00,  1.92s/it, train_loss=0.1811]


Epoch 5/60 - Train Loss: 0.1811, Val Loss: 0.1725, Val Pearson: 0.8296
Validation loss did not improve for 2 epochs.


Epoch 6/60: 100%|██████████| 67/67 [02:09<00:00,  1.93s/it, train_loss=0.1428]


Epoch 6/60 - Train Loss: 0.1428, Val Loss: 0.1616, Val Pearson: 0.8407
Validation loss improved from 0.1686 to 0.1616. Saving best model...


Epoch 7/60: 100%|██████████| 67/67 [02:08<00:00,  1.92s/it, train_loss=0.1143]


Epoch 7/60 - Train Loss: 0.1143, Val Loss: 0.1737, Val Pearson: 0.8324
Validation loss did not improve for 1 epochs.


Epoch 8/60: 100%|██████████| 67/67 [02:10<00:00,  1.94s/it, train_loss=0.0871]


Epoch 8/60 - Train Loss: 0.0871, Val Loss: 0.1736, Val Pearson: 0.8300
Validation loss did not improve for 2 epochs.


Epoch 9/60: 100%|██████████| 67/67 [02:10<00:00,  1.94s/it, train_loss=0.0820]


Epoch 9/60 - Train Loss: 0.0820, Val Loss: 0.1770, Val Pearson: 0.8332
Validation loss did not improve for 3 epochs.


Epoch 10/60: 100%|██████████| 67/67 [02:10<00:00,  1.95s/it, train_loss=0.0649]


Epoch 10/60 - Train Loss: 0.0649, Val Loss: 0.1662, Val Pearson: 0.8398
Validation loss did not improve for 4 epochs.


Epoch 11/60: 100%|██████████| 67/67 [02:09<00:00,  1.94s/it, train_loss=0.0542]


Epoch 11/60 - Train Loss: 0.0542, Val Loss: 0.1617, Val Pearson: 0.8414
Validation loss did not improve for 5 epochs.


Epoch 12/60: 100%|██████████| 67/67 [02:09<00:00,  1.94s/it, train_loss=0.0545]


Epoch 12/60 - Train Loss: 0.0545, Val Loss: 0.1612, Val Pearson: 0.8467
Validation loss improved from 0.1616 to 0.1612. Saving best model...


Epoch 13/60: 100%|██████████| 67/67 [02:09<00:00,  1.93s/it, train_loss=0.0475]


Epoch 13/60 - Train Loss: 0.0475, Val Loss: 0.1871, Val Pearson: 0.8310
Validation loss did not improve for 1 epochs.


Epoch 14/60: 100%|██████████| 67/67 [02:07<00:00,  1.91s/it, train_loss=0.0419]


Epoch 14/60 - Train Loss: 0.0419, Val Loss: 0.1576, Val Pearson: 0.8476
Validation loss improved from 0.1612 to 0.1576. Saving best model...


Epoch 15/60: 100%|██████████| 67/67 [02:08<00:00,  1.92s/it, train_loss=0.0373]


Epoch 15/60 - Train Loss: 0.0373, Val Loss: 0.1612, Val Pearson: 0.8468
Validation loss did not improve for 1 epochs.


Epoch 16/60: 100%|██████████| 67/67 [02:09<00:00,  1.93s/it, train_loss=0.0367]


Epoch 16/60 - Train Loss: 0.0367, Val Loss: 0.1599, Val Pearson: 0.8438
Validation loss did not improve for 2 epochs.


Epoch 17/60: 100%|██████████| 67/67 [02:10<00:00,  1.94s/it, train_loss=0.0360]


Epoch 17/60 - Train Loss: 0.0360, Val Loss: 0.1682, Val Pearson: 0.8373
Validation loss did not improve for 3 epochs.


Epoch 18/60: 100%|██████████| 67/67 [02:10<00:00,  1.94s/it, train_loss=0.0317]


Epoch 18/60 - Train Loss: 0.0317, Val Loss: 0.1696, Val Pearson: 0.8399
Validation loss did not improve for 4 epochs.


Epoch 19/60: 100%|██████████| 67/67 [02:09<00:00,  1.93s/it, train_loss=0.0289]


Epoch 19/60 - Train Loss: 0.0289, Val Loss: 0.1642, Val Pearson: 0.8417
Validation loss did not improve for 5 epochs.


Epoch 20/60: 100%|██████████| 67/67 [02:09<00:00,  1.93s/it, train_loss=0.0267]


Epoch 20/60 - Train Loss: 0.0267, Val Loss: 0.1847, Val Pearson: 0.8306
Validation loss did not improve for 6 epochs.


Epoch 21/60: 100%|██████████| 67/67 [02:09<00:00,  1.94s/it, train_loss=0.0247]


Epoch 21/60 - Train Loss: 0.0247, Val Loss: 0.1638, Val Pearson: 0.8401
Validation loss did not improve for 7 epochs.


Epoch 22/60: 100%|██████████| 67/67 [02:09<00:00,  1.94s/it, train_loss=0.0225]


Epoch 22/60 - Train Loss: 0.0225, Val Loss: 0.1605, Val Pearson: 0.8456
Validation loss did not improve for 8 epochs.


Epoch 23/60: 100%|██████████| 67/67 [02:08<00:00,  1.92s/it, train_loss=0.0209]


Epoch 23/60 - Train Loss: 0.0209, Val Loss: 0.1577, Val Pearson: 0.8450
Validation loss did not improve for 9 epochs.


Epoch 24/60: 100%|██████████| 67/67 [02:09<00:00,  1.94s/it, train_loss=0.0195]


Epoch 24/60 - Train Loss: 0.0195, Val Loss: 0.1649, Val Pearson: 0.8422
Validation loss did not improve for 10 epochs.
Early stopping triggered at epoch 24. Loading best model.


  model.load_state_dict(torch.load(best_model_path))


Fold 1:
  Validation Pearson Coefficient  acc: 0.8423852348327636
  Validation Pearson Coefficient flat acc: 0.8475608024030405
  Test acc: 0.7859138780170016
  Test Pearson Coefficient acc(flattened): 0.8007289066878426
Fold 2/4
(900, 480000)
750


Epoch 1/60: 100%|██████████| 70/70 [02:13<00:00,  1.90s/it, train_loss=0.7956]


Epoch 1/60 - Train Loss: 0.7956, Val Loss: 0.6812, Val Pearson: 0.4716
Validation loss improved from inf to 0.6812. Saving best model...


Epoch 2/60: 100%|██████████| 70/70 [02:13<00:00,  1.91s/it, train_loss=0.2667]


Epoch 2/60 - Train Loss: 0.2667, Val Loss: 0.2774, Val Pearson: 0.7448
Validation loss improved from 0.6812 to 0.2774. Saving best model...


Epoch 3/60: 100%|██████████| 70/70 [02:12<00:00,  1.89s/it, train_loss=0.1832]


Epoch 3/60 - Train Loss: 0.1832, Val Loss: 0.2921, Val Pearson: 0.7379
Validation loss did not improve for 1 epochs.


Epoch 4/60: 100%|██████████| 70/70 [02:14<00:00,  1.91s/it, train_loss=0.1404]


Epoch 4/60 - Train Loss: 0.1404, Val Loss: 0.2514, Val Pearson: 0.7631
Validation loss improved from 0.2774 to 0.2514. Saving best model...


Epoch 5/60: 100%|██████████| 70/70 [02:14<00:00,  1.92s/it, train_loss=0.1093]


Epoch 5/60 - Train Loss: 0.1093, Val Loss: 0.2481, Val Pearson: 0.7648
Validation loss improved from 0.2514 to 0.2481. Saving best model...


Epoch 6/60: 100%|██████████| 70/70 [02:14<00:00,  1.92s/it, train_loss=0.0849]


Epoch 6/60 - Train Loss: 0.0849, Val Loss: 0.2498, Val Pearson: 0.7626
Validation loss did not improve for 1 epochs.


Epoch 7/60: 100%|██████████| 70/70 [02:13<00:00,  1.91s/it, train_loss=0.0694]


Epoch 7/60 - Train Loss: 0.0694, Val Loss: 0.2540, Val Pearson: 0.7609
Validation loss did not improve for 2 epochs.


Epoch 8/60: 100%|██████████| 70/70 [02:14<00:00,  1.91s/it, train_loss=0.0632]


Epoch 8/60 - Train Loss: 0.0632, Val Loss: 0.2542, Val Pearson: 0.7544
Validation loss did not improve for 3 epochs.


Epoch 9/60: 100%|██████████| 70/70 [02:14<00:00,  1.91s/it, train_loss=0.0512]


Epoch 9/60 - Train Loss: 0.0512, Val Loss: 0.2548, Val Pearson: 0.7558
Validation loss did not improve for 4 epochs.


Epoch 10/60: 100%|██████████| 70/70 [02:14<00:00,  1.92s/it, train_loss=0.0471]


In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from datetime import datetime
#from torch.utils.tensorboard import SummaryWriter
import torchaudio
from typing import List, Tuple
from pt_utils import *
from pt_dataset import *
from pt_models import *
from pt_utils import *
from tensorboardX import SummaryWriter
from transformers import get_cosine_schedule_with_warmup


def create_run_directory():
    base_dir = "pt_runs_SOTA"
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir

def _calculate_flattened_accuracy(average, ground_truth_labels):
    s_acc = 0
    for b in range(len(ground_truth_labels)):
        s, _ = scipy.stats.pearsonr(average[b], ground_truth_labels[b])
        s_acc += s
    return s_acc / len(ground_truth_labels)

def _choose_real_labs_only_with_filenames(labels, filenames):
    return labels[labels['filename'].isin(filenames)]

def _get_ground_truth_labels(ground_truth_names, labels):
    ground_truth_labels = []
    for batch_name in ground_truth_names:
        ground_truth_label = _choose_real_labs_only_with_filenames(labels, [batch_name])
        ground_truth_labels.append(ground_truth_label)
    return np.array(ground_truth_labels)[:, :, -1].astype(np.float32)

def train(path_to_data, path_to_labels, window_size=16, step_size=6, data_parts=4, epochs=100, batch_size=10, early_stopping_patience=20, config = None, processor = None):
    run_dir = create_run_directory()
    log_dir = os.path.join(run_dir, "logs")
    os.makedirs(log_dir, exist_ok=True)

    # Parameters
    length_sequence = window_size 
    step_sequence = step_size

    # Load and prepare data
    train_data, train_labels, train_dict, frame_rate = load_data(path_to_data, path_to_labels, 'train')
    devel_data, devel_labels, devel_dict, frame_rate = load_data(path_to_data, path_to_labels, 'devel')
    test_data, test_labels, test_dict, frame_rate = load_data(path_to_data, path_to_labels, 'test')

    # Combine train and devel data
    all_data = np.concatenate((train_data, devel_data), axis=0)
    all_labels = pd.concat([train_labels, devel_labels])
    all_dict = np.concatenate((list(train_dict.values()), list(devel_dict.values())), axis=0)

    # Prepare data
    prepared_data, prepared_labels, prepared_labels_timesteps = prepare_data(all_data, all_labels, all_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)
    prepared_test_data, prepared_test_labels, prepared_test_labels_timesteps = prepare_data(test_data, test_labels, test_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)

    # Create CSV file for storing fold indices
    fold_indices_df = pd.DataFrame(columns=['Fold', 'Train_Indices', 'Val_Indices'])

    # Cross-validation
    kf = KFold(n_splits=data_parts)
    fold_metrics = []
    # To accumulate metrics across folds for each epoch
    train_acc_epoch = []
    val_acc_epoch = []
    test_acc_epoch = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    config["output_size"] = prepared_labels.shape[-1]
    writer = SummaryWriter(log_dir=os.path.join(log_dir,config["model_name"]))

    for fold, (train_index, val_index) in enumerate(kf.split(prepared_data)):
        print(f"Fold {fold + 1}/{data_parts}")
        best_model_path = f"{run_dir}/best_model_fold{fold+1}"
        # Save fold indices
        fold_indices_df = fold_indices_df._append({
            'Fold': fold + 1,
            'Train_Indices': train_index.tolist(),
            'Val_Indices': val_index.tolist()
        }, ignore_index=True)

        # Split data
        train_d, val_d = prepared_data[train_index], prepared_data[val_index]
        train_lbs, val_lbs = prepared_labels[train_index], prepared_labels[val_index]
        train_timesteps, val_timesteps = prepared_labels_timesteps[train_index], prepared_labels_timesteps[val_index]
        
        # Reshape data
        train_d, train_lbs = reshaping_data_for_model(train_d, train_lbs)
        val_d, val_lbs = reshaping_data_for_model(val_d, val_lbs)
        test_d, test_lbs = reshaping_data_for_model(prepared_test_data, prepared_test_labels)
        
        print(train_d.shape)

        # Create datasets
        #train_dataset = BreathingDataset(train_d, train_lbs, processor, window_size, step_sequence, augment=True)
        train_dataset = BreathingDataset(train_d, train_lbs, processor,window_size, step_sequence)
        val_dataset = BreathingDataset(val_d, val_lbs, processor, window_size, step_sequence)
        test_dataset = BreathingDataset(test_d, test_lbs, processor, window_size, step_sequence)

        # Create DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size,num_workers=2, shuffle=True, collate_fn=train_dataset.collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, collate_fn=val_dataset.collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, collate_fn=test_dataset.collate_fn)
        print(config["output_size"])
        # Create and initialize model
        model = config["model"](config).to(device)
        
        #### training optimiser parameters fror apple
        #learning_rate = 0.005 
        #optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        #### training optimiser parameters fror harma_2023 VRB model 
        learning_rate = 0.01 # From the paper
        optimizer = optim.Adam(model.parameters())       
        
        best_val_loss = float('inf')
        best_val_loss_flat = float('inf')
        early_stopping_counter = 0
        # To accumulate metrics across folds for each epoch
        train_acc = []
        val_acc = []
        test_acc = []
        for epoch in range(epochs):

            model.train()
            train_loss = 0.0
            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            
            for batch_d, batch_lbs in progress_bar:
                optimizer.zero_grad()
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                progress_bar.set_postfix({'train_loss': f'{train_loss/(progress_bar.n+1):.4f}'})


            train_loss /= len(train_loader)

            # Combined validation loop
            model.eval()
            val_loss = 0.0
            val_pred = []
            with torch.no_grad():
                for batch_d, batch_lbs in val_loader:
                    input_values = batch_d.to(device)
                    batch_lbs = batch_lbs.to(device)
                    
                    outputs = model(input_values)
                    loss = correlation_coefficient_loss(outputs, batch_lbs)
                    val_loss += loss.item()
                    val_pred.extend(outputs.cpu().numpy())

            val_loss /= len(val_loader)

            # Calculate validation metrics
            val_pred = np.array(val_pred).reshape(val_timesteps.shape)
            val_ground_truth = _get_ground_truth_labels([all_dict[i] for i in val_index], all_labels)
            val_pred_flat = unsplit_data_ogsize(val_pred, window_size, step_sequence, 25, val_ground_truth.shape[-1])
            val_prc_coef = _calculate_flattened_accuracy(val_pred_flat, val_ground_truth)
            
            # Accumulate metrics for this fold and epoch
            train_acc.append(1- train_loss)
            val_acc.append(1- val_loss)

            # Log metrics
            writer.add_scalar(f"Loss/train_fold_{fold + 1}", train_loss, epoch)
            writer.add_scalar(f"Loss/val_fold_{fold + 1}", val_loss, epoch)
            writer.add_scalar(f"Pearson/val_fold_{fold + 1}", val_prc_coef, epoch)

            print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Pearson: {val_prc_coef:.4f}")

            # Check if validation loss improved
            if val_loss < best_val_loss:
                print(f"Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving best model...")
                best_val_loss = val_loss
                best_val_loss_flat = val_prc_coef
                early_stopping_counter = 0

                # Save the best model
                torch.save(model.state_dict(), best_model_path)
            else:
                early_stopping_counter += 1
                print(f"Validation loss did not improve for {early_stopping_counter} epochs.")
                #model.load_state_dict(torch.load(best_model_path))


            # Early stopping
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping triggered at epoch {epoch + 1}. Loading best model.")
                # Load the best model's weights
                model.load_state_dict(torch.load(best_model_path))
                break

        test_pred = []
        test_loss = 0.0
        with torch.no_grad():
            for batch_d, batch_lbs in test_loader:
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                test_loss += loss.item()
                test_pred.extend(outputs.cpu().numpy())

        test_loss /= len(test_loader)
        test_pred = np.array(test_pred).reshape(prepared_test_labels_timesteps.shape)
        test_ground_truth = _get_ground_truth_labels(list(test_dict.values()), test_labels)
        test_pred_flat = unsplit_data_ogsize(test_pred, window_size, step_sequence, 25, test_ground_truth.shape[-1])
        test_prc_coef = _calculate_flattened_accuracy(test_pred_flat, test_ground_truth)

        print(f"Fold {fold + 1}:")
        print(f"  Validation Pearson Coefficient  acc: {1- best_val_loss}")
        print(f"  Validation Pearson Coefficient flat acc: {best_val_loss_flat}")
        print(f"  Test acc: {1- test_loss}")
        print(f"  Test Pearson Coefficient acc(flattened): {test_prc_coef}")

        fold_metrics.append({
            'Fold': fold + 1,
            'val_prc_acc': 1- best_val_loss,
            'val_prc_acc_flat': best_val_loss_flat,
            'test_acc': 1- test_loss,
            'test_prc_flat': test_prc_coef
        })


                # Log fold-specific metrics as tables
        fold_table = f"| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                     f"|------|-----------------|------------------|----------|-------------------|\n" \
                     f"| {fold + 1} | {1 - val_loss:.4f} | {val_prc_coef:.4f} | {1 - test_loss:.4f} | {test_prc_coef:.4f} |\n"
        writer.add_text(f"Fold_{fold + 1}_Metrics", fold_table)
        # Accumulate fold metrics across all folds
        train_acc_epoch.append(train_acc)
        val_acc_epoch.append(train_acc)


    
        # After all folds, compute and log the average metrics per epoch across all folds
    for epoch in range(epochs):
        avg_train_loss = np.mean([fold_losses[epoch] for fold_losses in train_acc_epoch if len(fold_losses) > epoch])
        avg_val_loss = np.mean([fold_losses[epoch] for fold_losses in val_acc_epoch if len(fold_losses) > epoch])

        # Log the averaged metrics for the epoch across all folds
        writer.add_scalar("Average_acc/train", avg_train_loss, epoch)
        writer.add_scalar("Average_acc/val", avg_val_loss, epoch)
            

    # Calculate average metrics
    avg_metrics = {key: np.mean([fold[key] for fold in fold_metrics if key != 'Fold']) for key in fold_metrics[0].keys() if key != 'Fold'}
        # Log the final average table
    avg_table = "| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                "|------|-----------------|------------------|----------|-------------------|\n" \
                f"| Average | {avg_metrics['val_prc_acc']:.4f} | {avg_metrics['val_prc_acc_flat']:.4f} | {avg_metrics['test_acc']:.4f} | {avg_metrics['test_prc_flat']:.4f} |\n"
    writer.add_text("Average_Metrics", avg_table)
    # Add average metrics to results
    avg_metrics['Fold'] = 'Average'
    fold_metrics.append(avg_metrics)

    # save averga date to CSV
    results_df = pd.DataFrame(fold_metrics)
    csv_path = os.path.join(run_dir, 'fold_results.csv')
    results_df.to_csv(csv_path, index=False)
    
    # Save fold indices CSV
    fold_indices_df.to_csv(os.path.join(run_dir, 'fold_indices.csv'), index=False)
    
    writer.close()


    print("\nTraining completed.")
    print("Average metrics across all folds:")
    for key, value in avg_metrics.items():
        print(f"  {key}: {value}")


if __name__ == "__main__":
    ## Path to data
    #path = "/home/glenn/Downloads/"
    path = "../DATA/"


    # Model parameters
    model_config = {
        "VRBModel": {
            "model" : VRBModel,
            "model_name": "facebook/hubert-large-ll60k",
            "hidden_units": 64,
            "n_gru": 3,
            "output_size": None  # Will be set dynamically
        },
        "Wav2Vec2ConvLSTMModel": {
            "model" : Wav2Vec2ConvLSTMModel,
            "model_name": "facebook/wav2vec2-base",
            "hidden_units": 128,
            "n_lstm": 2,
            "output_size": None  # Will be set dynamically
        },
            "RespBertCNNModelV2": {
            'model' : RespBertCNNModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "output_size": None  
        },
            "RespBertLSTMModelV2": {
            'model': RespBertLSTMModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "n_lstm": 3,
            "output_size": None  
        }
    }
    

    
    # Train and data parameters
    epochs = 60
    batch_size = 13
    window_size = 30
    step_size = 6
    data_parts = 4 # aka folds
    early_stopping_patience = 10
    
    config = model_config["RespBertCNNModelV2"]
    

    ## same wav2vec2 base model and pipeline used in the paper
    #processor = AutoProcessor.from_pretrained(config["model_name"])
    processor = Wav2Vec2FeatureExtractor.from_pretrained(config["model_name"])

    train(
        path_to_data=path+"ComParE2020_Breathing/wav/",
        path_to_labels=path+"ComParE2020_Breathing/lab/",
        window_size=window_size,
        batch_size=batch_size,
        config = config,
        step_size=step_size,
        data_parts= data_parts ,
        early_stopping_patience= early_stopping_patience,
        epochs= epochs,
        processor = processor
    )

Fold 1/4
(864, 480000)
750


Epoch 1/60: 100%|██████████| 67/67 [02:09<00:00,  1.94s/it, train_loss=0.8485]


Epoch 1/60 - Train Loss: 0.8485, Val Loss: 0.5918, Val Pearson: 0.4822
Validation loss improved from inf to 0.5918. Saving best model...


Epoch 2/60: 100%|██████████| 67/67 [02:08<00:00,  1.91s/it, train_loss=0.4059]


Epoch 2/60 - Train Loss: 0.4059, Val Loss: 0.2168, Val Pearson: 0.7915
Validation loss improved from 0.5918 to 0.2168. Saving best model...


Epoch 3/60: 100%|██████████| 67/67 [02:08<00:00,  1.91s/it, train_loss=0.2648]


Epoch 3/60 - Train Loss: 0.2648, Val Loss: 0.1711, Val Pearson: 0.8308
Validation loss improved from 0.2168 to 0.1711. Saving best model...


Epoch 4/60: 100%|██████████| 67/67 [02:08<00:00,  1.92s/it, train_loss=0.2053]


Epoch 4/60 - Train Loss: 0.2053, Val Loss: 0.1694, Val Pearson: 0.8359
Validation loss improved from 0.1711 to 0.1694. Saving best model...


Epoch 5/60: 100%|██████████| 67/67 [02:09<00:00,  1.93s/it, train_loss=0.1643]


Epoch 5/60 - Train Loss: 0.1643, Val Loss: 0.1751, Val Pearson: 0.8335
Validation loss did not improve for 1 epochs.


Epoch 6/60: 100%|██████████| 67/67 [02:08<00:00,  1.92s/it, train_loss=0.1326]


Epoch 6/60 - Train Loss: 0.1326, Val Loss: 0.1593, Val Pearson: 0.8444
Validation loss improved from 0.1694 to 0.1593. Saving best model...


Epoch 7/60: 100%|██████████| 67/67 [02:09<00:00,  1.93s/it, train_loss=0.1051]


Epoch 7/60 - Train Loss: 0.1051, Val Loss: 0.1749, Val Pearson: 0.8363
Validation loss did not improve for 1 epochs.


Epoch 8/60: 100%|██████████| 67/67 [02:09<00:00,  1.94s/it, train_loss=0.0845]


Epoch 8/60 - Train Loss: 0.0845, Val Loss: 0.1877, Val Pearson: 0.8277
Validation loss did not improve for 2 epochs.


Epoch 9/60: 100%|██████████| 67/67 [02:08<00:00,  1.92s/it, train_loss=0.0748]


Epoch 9/60 - Train Loss: 0.0748, Val Loss: 0.1616, Val Pearson: 0.8450
Validation loss did not improve for 3 epochs.


Epoch 10/60: 100%|██████████| 67/67 [02:09<00:00,  1.93s/it, train_loss=0.0654]


Epoch 10/60 - Train Loss: 0.0654, Val Loss: 0.1627, Val Pearson: 0.8419
Validation loss did not improve for 4 epochs.


Epoch 11/60: 100%|██████████| 67/67 [02:08<00:00,  1.91s/it, train_loss=0.0541]


Epoch 11/60 - Train Loss: 0.0541, Val Loss: 0.1660, Val Pearson: 0.8431
Validation loss did not improve for 5 epochs.


Epoch 12/60: 100%|██████████| 67/67 [02:09<00:00,  1.93s/it, train_loss=0.0452]


Epoch 12/60 - Train Loss: 0.0452, Val Loss: 0.1659, Val Pearson: 0.8454
Validation loss did not improve for 6 epochs.


Epoch 13/60: 100%|██████████| 67/67 [02:09<00:00,  1.93s/it, train_loss=0.0380]


Epoch 13/60 - Train Loss: 0.0380, Val Loss: 0.1691, Val Pearson: 0.8399
Validation loss did not improve for 7 epochs.


Epoch 14/60: 100%|██████████| 67/67 [02:09<00:00,  1.93s/it, train_loss=0.0332]


Epoch 14/60 - Train Loss: 0.0332, Val Loss: 0.1650, Val Pearson: 0.8440
Validation loss did not improve for 8 epochs.


Epoch 15/60: 100%|██████████| 67/67 [02:07<00:00,  1.90s/it, train_loss=0.0339]


Epoch 15/60 - Train Loss: 0.0339, Val Loss: 0.1852, Val Pearson: 0.8330
Validation loss did not improve for 9 epochs.


Epoch 16/60: 100%|██████████| 67/67 [02:09<00:00,  1.93s/it, train_loss=0.0285]


Epoch 16/60 - Train Loss: 0.0285, Val Loss: 0.1856, Val Pearson: 0.8334
Validation loss did not improve for 10 epochs.
Early stopping triggered at epoch 16. Loading best model.


  model.load_state_dict(torch.load(best_model_path))


Fold 1:
  Validation Pearson Coefficient  acc: 0.8407481002807617
  Validation Pearson Coefficient flat acc: 0.8444429401851432
  Test acc: 0.8068313148286608
  Test Pearson Coefficient acc(flattened): 0.8187036644945287
Fold 2/4
(900, 480000)
750


Epoch 1/60: 100%|██████████| 70/70 [02:14<00:00,  1.93s/it, train_loss=0.7436]


Epoch 1/60 - Train Loss: 0.7436, Val Loss: 0.4820, Val Pearson: 0.5720
Validation loss improved from inf to 0.4820. Saving best model...


Epoch 2/60: 100%|██████████| 70/70 [02:13<00:00,  1.91s/it, train_loss=0.2384]


Epoch 2/60 - Train Loss: 0.2384, Val Loss: 0.2624, Val Pearson: 0.7540
Validation loss improved from 0.4820 to 0.2624. Saving best model...


Epoch 3/60: 100%|██████████| 70/70 [02:12<00:00,  1.89s/it, train_loss=0.1605]


Epoch 3/60 - Train Loss: 0.1605, Val Loss: 0.2848, Val Pearson: 0.7390
Validation loss did not improve for 1 epochs.


Epoch 4/60: 100%|██████████| 70/70 [02:13<00:00,  1.91s/it, train_loss=0.1238]


Epoch 4/60 - Train Loss: 0.1238, Val Loss: 0.2843, Val Pearson: 0.7345
Validation loss did not improve for 2 epochs.


Epoch 5/60: 100%|██████████| 70/70 [02:15<00:00,  1.93s/it, train_loss=0.0947]


Epoch 5/60 - Train Loss: 0.0947, Val Loss: 0.3035, Val Pearson: 0.7129
Validation loss did not improve for 3 epochs.


Epoch 6/60: 100%|██████████| 70/70 [02:14<00:00,  1.92s/it, train_loss=0.0757]


Epoch 6/60 - Train Loss: 0.0757, Val Loss: 0.2848, Val Pearson: 0.7305
Validation loss did not improve for 4 epochs.


Epoch 7/60: 100%|██████████| 70/70 [02:16<00:00,  1.95s/it, train_loss=0.0580]


Epoch 7/60 - Train Loss: 0.0580, Val Loss: 0.2701, Val Pearson: 0.7436
Validation loss did not improve for 5 epochs.


Epoch 8/60: 100%|██████████| 70/70 [02:14<00:00,  1.93s/it, train_loss=0.0509]


Epoch 8/60 - Train Loss: 0.0509, Val Loss: 0.3061, Val Pearson: 0.7152
Validation loss did not improve for 6 epochs.


Epoch 9/60: 100%|██████████| 70/70 [02:13<00:00,  1.91s/it, train_loss=0.0450]


Epoch 9/60 - Train Loss: 0.0450, Val Loss: 0.2823, Val Pearson: 0.7323
Validation loss did not improve for 7 epochs.


Epoch 10/60: 100%|██████████| 70/70 [02:17<00:00,  1.97s/it, train_loss=0.0356]


Epoch 10/60 - Train Loss: 0.0356, Val Loss: 0.2823, Val Pearson: 0.7284
Validation loss did not improve for 8 epochs.


Epoch 11/60: 100%|██████████| 70/70 [02:15<00:00,  1.93s/it, train_loss=0.0394]


Epoch 11/60 - Train Loss: 0.0394, Val Loss: 0.2684, Val Pearson: 0.7452
Validation loss did not improve for 9 epochs.


Epoch 12/60: 100%|██████████| 70/70 [02:14<00:00,  1.92s/it, train_loss=0.0309]


Epoch 12/60 - Train Loss: 0.0309, Val Loss: 0.2820, Val Pearson: 0.7336
Validation loss did not improve for 10 epochs.
Early stopping triggered at epoch 12. Loading best model.


  model.load_state_dict(torch.load(best_model_path))


Fold 2:
  Validation Pearson Coefficient  acc: 0.7376274881155594
  Validation Pearson Coefficient flat acc: 0.7539924720269748
  Test acc: 0.8144472320874532
  Test Pearson Coefficient acc(flattened): 0.8206217674927101
Fold 3/4
(900, 480000)
750


Epoch 1/60: 100%|██████████| 70/70 [02:14<00:00,  1.92s/it, train_loss=0.7808]


Epoch 1/60 - Train Loss: 0.7808, Val Loss: 0.5202, Val Pearson: 0.5410
Validation loss improved from inf to 0.5202. Saving best model...


Epoch 2/60: 100%|██████████| 70/70 [02:13<00:00,  1.91s/it, train_loss=0.2727]


Epoch 2/60 - Train Loss: 0.2727, Val Loss: 0.3196, Val Pearson: 0.6845
Validation loss improved from 0.5202 to 0.3196. Saving best model...


Epoch 3/60: 100%|██████████| 70/70 [02:15<00:00,  1.93s/it, train_loss=0.1864]


Epoch 3/60 - Train Loss: 0.1864, Val Loss: 0.3364, Val Pearson: 0.6625
Validation loss did not improve for 1 epochs.


Epoch 4/60: 100%|██████████| 70/70 [02:13<00:00,  1.91s/it, train_loss=0.1545]


Epoch 4/60 - Train Loss: 0.1545, Val Loss: 0.3081, Val Pearson: 0.6969
Validation loss improved from 0.3196 to 0.3081. Saving best model...


Epoch 5/60: 100%|██████████| 70/70 [02:14<00:00,  1.92s/it, train_loss=0.1226]


Epoch 5/60 - Train Loss: 0.1226, Val Loss: 0.3352, Val Pearson: 0.6674
Validation loss did not improve for 1 epochs.


Epoch 6/60: 100%|██████████| 70/70 [02:13<00:00,  1.91s/it, train_loss=0.0894]


Epoch 6/60 - Train Loss: 0.0894, Val Loss: 0.3341, Val Pearson: 0.6630
Validation loss did not improve for 2 epochs.


Epoch 7/60: 100%|██████████| 70/70 [02:13<00:00,  1.91s/it, train_loss=0.0751]


Epoch 7/60 - Train Loss: 0.0751, Val Loss: 0.3469, Val Pearson: 0.6543
Validation loss did not improve for 3 epochs.


Epoch 8/60: 100%|██████████| 70/70 [02:14<00:00,  1.92s/it, train_loss=0.0572]


Epoch 8/60 - Train Loss: 0.0572, Val Loss: 0.3100, Val Pearson: 0.6902
Validation loss did not improve for 4 epochs.


Epoch 9/60: 100%|██████████| 70/70 [02:13<00:00,  1.90s/it, train_loss=0.0464]


Epoch 9/60 - Train Loss: 0.0464, Val Loss: 0.3251, Val Pearson: 0.6745
Validation loss did not improve for 5 epochs.


Epoch 10/60: 100%|██████████| 70/70 [02:14<00:00,  1.92s/it, train_loss=0.0381]


Epoch 10/60 - Train Loss: 0.0381, Val Loss: 0.3263, Val Pearson: 0.6751
Validation loss did not improve for 6 epochs.


Epoch 11/60: 100%|██████████| 70/70 [02:14<00:00,  1.92s/it, train_loss=0.0372]


Epoch 11/60 - Train Loss: 0.0372, Val Loss: 0.3330, Val Pearson: 0.6696
Validation loss did not improve for 7 epochs.


Epoch 12/60: 100%|██████████| 70/70 [02:13<00:00,  1.90s/it, train_loss=0.0332]


Epoch 12/60 - Train Loss: 0.0332, Val Loss: 0.3226, Val Pearson: 0.6809
Validation loss did not improve for 8 epochs.


Epoch 13/60: 100%|██████████| 70/70 [02:14<00:00,  1.92s/it, train_loss=0.0294]


Epoch 13/60 - Train Loss: 0.0294, Val Loss: 0.3296, Val Pearson: 0.6699
Validation loss did not improve for 9 epochs.


Epoch 14/60: 100%|██████████| 70/70 [02:12<00:00,  1.90s/it, train_loss=0.0266]


Epoch 14/60 - Train Loss: 0.0266, Val Loss: 0.3289, Val Pearson: 0.6749
Validation loss did not improve for 10 epochs.
Early stopping triggered at epoch 14. Loading best model.


  model.load_state_dict(torch.load(best_model_path))


Fold 3:
  Validation Pearson Coefficient  acc: 0.6918946608253147
  Validation Pearson Coefficient flat acc: 0.6969476326705786
  Test acc: 0.8040992577870687
  Test Pearson Coefficient acc(flattened): 0.8153199017917342
Fold 4/4
(900, 480000)
750


Epoch 1/60: 100%|██████████| 70/70 [02:15<00:00,  1.93s/it, train_loss=0.7181]


Epoch 1/60 - Train Loss: 0.7181, Val Loss: 0.5269, Val Pearson: 0.5855
Validation loss improved from inf to 0.5269. Saving best model...


Epoch 2/60: 100%|██████████| 70/70 [02:14<00:00,  1.92s/it, train_loss=0.2376]


Epoch 2/60 - Train Loss: 0.2376, Val Loss: 0.3257, Val Pearson: 0.6850
Validation loss improved from 0.5269 to 0.3257. Saving best model...


Epoch 3/60: 100%|██████████| 70/70 [02:14<00:00,  1.92s/it, train_loss=0.1800]


Epoch 3/60 - Train Loss: 0.1800, Val Loss: 0.3300, Val Pearson: 0.6822
Validation loss did not improve for 1 epochs.


Epoch 4/60: 100%|██████████| 70/70 [02:14<00:00,  1.93s/it, train_loss=0.1331]


Epoch 4/60 - Train Loss: 0.1331, Val Loss: 0.3367, Val Pearson: 0.6767
Validation loss did not improve for 2 epochs.


Epoch 5/60: 100%|██████████| 70/70 [02:13<00:00,  1.91s/it, train_loss=0.0999]


Epoch 5/60 - Train Loss: 0.0999, Val Loss: 0.3143, Val Pearson: 0.6936
Validation loss improved from 0.3257 to 0.3143. Saving best model...


Epoch 6/60: 100%|██████████| 70/70 [02:14<00:00,  1.92s/it, train_loss=0.0768]


Epoch 6/60 - Train Loss: 0.0768, Val Loss: 0.3215, Val Pearson: 0.6941
Validation loss did not improve for 1 epochs.


Epoch 7/60: 100%|██████████| 70/70 [02:15<00:00,  1.94s/it, train_loss=0.0626]


Epoch 7/60 - Train Loss: 0.0626, Val Loss: 0.3023, Val Pearson: 0.7080
Validation loss improved from 0.3143 to 0.3023. Saving best model...


Epoch 8/60: 100%|██████████| 70/70 [02:14<00:00,  1.92s/it, train_loss=0.0557]


Epoch 8/60 - Train Loss: 0.0557, Val Loss: 0.3185, Val Pearson: 0.6966
Validation loss did not improve for 1 epochs.


Epoch 9/60: 100%|██████████| 70/70 [02:13<00:00,  1.91s/it, train_loss=0.0515]


Epoch 9/60 - Train Loss: 0.0515, Val Loss: 0.3020, Val Pearson: 0.7109
Validation loss improved from 0.3023 to 0.3020. Saving best model...


Epoch 10/60: 100%|██████████| 70/70 [02:14<00:00,  1.93s/it, train_loss=0.0424]


Epoch 10/60 - Train Loss: 0.0424, Val Loss: 0.3194, Val Pearson: 0.6954
Validation loss did not improve for 1 epochs.


Epoch 11/60: 100%|██████████| 70/70 [02:13<00:00,  1.91s/it, train_loss=0.0450]


Epoch 11/60 - Train Loss: 0.0450, Val Loss: 0.3250, Val Pearson: 0.6934
Validation loss did not improve for 2 epochs.


Epoch 12/60: 100%|██████████| 70/70 [02:13<00:00,  1.91s/it, train_loss=0.0438]


Epoch 12/60 - Train Loss: 0.0438, Val Loss: 0.3186, Val Pearson: 0.6985
Validation loss did not improve for 3 epochs.


Epoch 13/60: 100%|██████████| 70/70 [02:15<00:00,  1.94s/it, train_loss=0.0305]


Epoch 13/60 - Train Loss: 0.0305, Val Loss: 0.3190, Val Pearson: 0.6993
Validation loss did not improve for 4 epochs.


Epoch 14/60: 100%|██████████| 70/70 [02:13<00:00,  1.91s/it, train_loss=0.0327]


Epoch 14/60 - Train Loss: 0.0327, Val Loss: 0.3196, Val Pearson: 0.6945
Validation loss did not improve for 5 epochs.


Epoch 15/60: 100%|██████████| 70/70 [02:14<00:00,  1.92s/it, train_loss=0.0285]


Epoch 15/60 - Train Loss: 0.0285, Val Loss: 0.3159, Val Pearson: 0.7037
Validation loss did not improve for 6 epochs.


Epoch 16/60: 100%|██████████| 70/70 [02:15<00:00,  1.94s/it, train_loss=0.0264]


Epoch 16/60 - Train Loss: 0.0264, Val Loss: 0.3236, Val Pearson: 0.6954
Validation loss did not improve for 7 epochs.


Epoch 17/60: 100%|██████████| 70/70 [02:15<00:00,  1.94s/it, train_loss=0.0197]


Epoch 17/60 - Train Loss: 0.0197, Val Loss: 0.3190, Val Pearson: 0.6990
Validation loss did not improve for 8 epochs.


Epoch 18/60: 100%|██████████| 70/70 [02:13<00:00,  1.91s/it, train_loss=0.0189]


Epoch 18/60 - Train Loss: 0.0189, Val Loss: 0.3197, Val Pearson: 0.6954
Validation loss did not improve for 9 epochs.


Epoch 19/60: 100%|██████████| 70/70 [02:14<00:00,  1.92s/it, train_loss=0.0186]


Epoch 19/60 - Train Loss: 0.0186, Val Loss: 0.3251, Val Pearson: 0.6928
Validation loss did not improve for 10 epochs.
Early stopping triggered at epoch 19. Loading best model.


  model.load_state_dict(torch.load(best_model_path))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or In

Fold 4:
  Validation Pearson Coefficient  acc: 0.6979680061340332
  Validation Pearson Coefficient flat acc: 0.7109361224983757
  Test acc: 0.806722899278005
  Test Pearson Coefficient acc(flattened): 0.8146534796330512

Training completed.
Average metrics across all folds:
  val_prc_acc: 0.7420595638389174
  val_prc_acc_flat: 0.7515797918452681
  test_acc: 0.8080251759952969
  test_prc_flat: 0.8173247033530061
  Fold: Average


In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from datetime import datetime
#from torch.utils.tensorboard import SummaryWriter
import torchaudio
from typing import List, Tuple
from pt_utils import *
from pt_dataset import *
from pt_models import *
from pt_utils import *
from tensorboardX import SummaryWriter
from transformers import get_cosine_schedule_with_warmup


def create_run_directory():
    base_dir = "pt_runs_SOTA"
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir

def _calculate_flattened_accuracy(average, ground_truth_labels):
    s_acc = 0
    for b in range(len(ground_truth_labels)):
        s, _ = scipy.stats.pearsonr(average[b], ground_truth_labels[b])
        s_acc += s
    return s_acc / len(ground_truth_labels)

def _choose_real_labs_only_with_filenames(labels, filenames):
    return labels[labels['filename'].isin(filenames)]

def _get_ground_truth_labels(ground_truth_names, labels):
    ground_truth_labels = []
    for batch_name in ground_truth_names:
        ground_truth_label = _choose_real_labs_only_with_filenames(labels, [batch_name])
        ground_truth_labels.append(ground_truth_label)
    return np.array(ground_truth_labels)[:, :, -1].astype(np.float32)

def train(path_to_data, path_to_labels, window_size=16, step_size=6, data_parts=4, epochs=100, batch_size=10, early_stopping_patience=20, config = None, processor = None):
    run_dir = create_run_directory()
    log_dir = os.path.join(run_dir, "logs")
    os.makedirs(log_dir, exist_ok=True)

    # Parameters
    length_sequence = window_size 
    step_sequence = step_size

    # Load and prepare data
    train_data, train_labels, train_dict, frame_rate = load_data(path_to_data, path_to_labels, 'train')
    devel_data, devel_labels, devel_dict, frame_rate = load_data(path_to_data, path_to_labels, 'devel')
    test_data, test_labels, test_dict, frame_rate = load_data(path_to_data, path_to_labels, 'test')

    # Combine train and devel data
    all_data = np.concatenate((train_data, devel_data), axis=0)
    all_labels = pd.concat([train_labels, devel_labels])
    all_dict = np.concatenate((list(train_dict.values()), list(devel_dict.values())), axis=0)

    # Prepare data
    prepared_data, prepared_labels, prepared_labels_timesteps = prepare_data(all_data, all_labels, all_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)
    prepared_test_data, prepared_test_labels, prepared_test_labels_timesteps = prepare_data(test_data, test_labels, test_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)

    # Create CSV file for storing fold indices
    fold_indices_df = pd.DataFrame(columns=['Fold', 'Train_Indices', 'Val_Indices'])

    # Cross-validation
    kf = KFold(n_splits=data_parts)
    fold_metrics = []
    # To accumulate metrics across folds for each epoch
    train_acc_epoch = []
    val_acc_epoch = []
    test_acc_epoch = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    config["output_size"] = prepared_labels.shape[-1]
    writer = SummaryWriter(log_dir=os.path.join(log_dir,config["model_name"]))

    for fold, (train_index, val_index) in enumerate(kf.split(prepared_data)):
        print(f"Fold {fold + 1}/{data_parts}")
        best_model_path = f"{run_dir}/best_model_fold{fold+1}"
        # Save fold indices
        fold_indices_df = fold_indices_df._append({
            'Fold': fold + 1,
            'Train_Indices': train_index.tolist(),
            'Val_Indices': val_index.tolist()
        }, ignore_index=True)

        # Split data
        train_d, val_d = prepared_data[train_index], prepared_data[val_index]
        train_lbs, val_lbs = prepared_labels[train_index], prepared_labels[val_index]
        train_timesteps, val_timesteps = prepared_labels_timesteps[train_index], prepared_labels_timesteps[val_index]
        
        # Reshape data
        train_d, train_lbs = reshaping_data_for_model(train_d, train_lbs)
        val_d, val_lbs = reshaping_data_for_model(val_d, val_lbs)
        test_d, test_lbs = reshaping_data_for_model(prepared_test_data, prepared_test_labels)
        
        print(train_d.shape)

        # Create datasets
        #train_dataset = BreathingDataset(train_d, train_lbs, processor, window_size, step_sequence, augment=True)
        train_dataset = BreathingDataset(train_d, train_lbs, processor,window_size, step_sequence)
        val_dataset = BreathingDataset(val_d, val_lbs, processor, window_size, step_sequence)
        test_dataset = BreathingDataset(test_d, test_lbs, processor, window_size, step_sequence)

        # Create DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size,num_workers=2, shuffle=True, collate_fn=train_dataset.collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, collate_fn=val_dataset.collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, collate_fn=test_dataset.collate_fn)
        print(config["output_size"])
        # Create and initialize model
        model = config["model"](config).to(device)
        
        #### training optimiser parameters fror apple
        #learning_rate = 0.005 
        #optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        #### training optimiser parameters fror harma_2023 VRB model 
        learning_rate = 0.01 # From the paper
        optimizer = optim.Adam(model.parameters())       
        
        best_val_loss = float('inf')
        best_val_loss_flat = float('inf')
        early_stopping_counter = 0
        # To accumulate metrics across folds for each epoch
        train_acc = []
        val_acc = []
        test_acc = []
        for epoch in range(epochs):

            model.train()
            train_loss = 0.0
            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            
            for batch_d, batch_lbs in progress_bar:
                optimizer.zero_grad()
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                progress_bar.set_postfix({'train_loss': f'{train_loss/(progress_bar.n+1):.4f}'})


            train_loss /= len(train_loader)

            # Combined validation loop
            model.eval()
            val_loss = 0.0
            val_pred = []
            with torch.no_grad():
                for batch_d, batch_lbs in val_loader:
                    input_values = batch_d.to(device)
                    batch_lbs = batch_lbs.to(device)
                    
                    outputs = model(input_values)
                    loss = correlation_coefficient_loss(outputs, batch_lbs)
                    val_loss += loss.item()
                    val_pred.extend(outputs.cpu().numpy())

            val_loss /= len(val_loader)

            # Calculate validation metrics
            val_pred = np.array(val_pred).reshape(val_timesteps.shape)
            val_ground_truth = _get_ground_truth_labels([all_dict[i] for i in val_index], all_labels)
            val_pred_flat = unsplit_data_ogsize(val_pred, window_size, step_sequence, 25, val_ground_truth.shape[-1])
            val_prc_coef = _calculate_flattened_accuracy(val_pred_flat, val_ground_truth)
            
            # Accumulate metrics for this fold and epoch
            train_acc.append(1- train_loss)
            val_acc.append(1- val_loss)

            # Log metrics
            writer.add_scalar(f"Loss/train_fold_{fold + 1}", train_loss, epoch)
            writer.add_scalar(f"Loss/val_fold_{fold + 1}", val_loss, epoch)
            writer.add_scalar(f"Pearson/val_fold_{fold + 1}", val_prc_coef, epoch)

            print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Pearson: {val_prc_coef:.4f}")

            # Check if validation loss improved
            if val_loss < best_val_loss:
                print(f"Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving best model...")
                best_val_loss = val_loss
                best_val_loss_flat = val_prc_coef
                early_stopping_counter = 0

                # Save the best model
                torch.save(model.state_dict(), best_model_path)
            else:
                early_stopping_counter += 1
                print(f"Validation loss did not improve for {early_stopping_counter} epochs.")
                #model.load_state_dict(torch.load(best_model_path))


            # Early stopping
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping triggered at epoch {epoch + 1}. Loading best model.")
                # Load the best model's weights
                model.load_state_dict(torch.load(best_model_path))
                break

        test_pred = []
        test_loss = 0.0
        with torch.no_grad():
            for batch_d, batch_lbs in test_loader:
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                test_loss += loss.item()
                test_pred.extend(outputs.cpu().numpy())

        test_loss /= len(test_loader)
        test_pred = np.array(test_pred).reshape(prepared_test_labels_timesteps.shape)
        test_ground_truth = _get_ground_truth_labels(list(test_dict.values()), test_labels)
        test_pred_flat = unsplit_data_ogsize(test_pred, window_size, step_sequence, 25, test_ground_truth.shape[-1])
        test_prc_coef = _calculate_flattened_accuracy(test_pred_flat, test_ground_truth)

        print(f"Fold {fold + 1}:")
        print(f"  Validation Pearson Coefficient  acc: {1- best_val_loss}")
        print(f"  Validation Pearson Coefficient flat acc: {best_val_loss_flat}")
        print(f"  Test acc: {1- test_loss}")
        print(f"  Test Pearson Coefficient acc(flattened): {test_prc_coef}")

        fold_metrics.append({
            'Fold': fold + 1,
            'val_prc_acc': 1- best_val_loss,
            'val_prc_acc_flat': best_val_loss_flat,
            'test_acc': 1- test_loss,
            'test_prc_flat': test_prc_coef
        })


                # Log fold-specific metrics as tables
        fold_table = f"| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                     f"|------|-----------------|------------------|----------|-------------------|\n" \
                     f"| {fold + 1} | {1 - val_loss:.4f} | {val_prc_coef:.4f} | {1 - test_loss:.4f} | {test_prc_coef:.4f} |\n"
        writer.add_text(f"Fold_{fold + 1}_Metrics", fold_table)
        # Accumulate fold metrics across all folds
        train_acc_epoch.append(train_acc)
        val_acc_epoch.append(train_acc)


    
        # After all folds, compute and log the average metrics per epoch across all folds
    for epoch in range(epochs):
        avg_train_loss = np.mean([fold_losses[epoch] for fold_losses in train_acc_epoch if len(fold_losses) > epoch])
        avg_val_loss = np.mean([fold_losses[epoch] for fold_losses in val_acc_epoch if len(fold_losses) > epoch])

        # Log the averaged metrics for the epoch across all folds
        writer.add_scalar("Average_acc/train", avg_train_loss, epoch)
        writer.add_scalar("Average_acc/val", avg_val_loss, epoch)
            

    # Calculate average metrics
    avg_metrics = {key: np.mean([fold[key] for fold in fold_metrics if key != 'Fold']) for key in fold_metrics[0].keys() if key != 'Fold'}
        # Log the final average table
    avg_table = "| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                "|------|-----------------|------------------|----------|-------------------|\n" \
                f"| Average | {avg_metrics['val_prc_acc']:.4f} | {avg_metrics['val_prc_acc_flat']:.4f} | {avg_metrics['test_acc']:.4f} | {avg_metrics['test_prc_flat']:.4f} |\n"
    writer.add_text("Average_Metrics", avg_table)
    # Add average metrics to results
    avg_metrics['Fold'] = 'Average'
    fold_metrics.append(avg_metrics)

    # save averga date to CSV
    results_df = pd.DataFrame(fold_metrics)
    csv_path = os.path.join(run_dir, 'fold_results.csv')
    results_df.to_csv(csv_path, index=False)
    
    # Save fold indices CSV
    fold_indices_df.to_csv(os.path.join(run_dir, 'fold_indices.csv'), index=False)
    
    writer.close()


    print("\nTraining completed.")
    print("Average metrics across all folds:")
    for key, value in avg_metrics.items():
        print(f"  {key}: {value}")


if __name__ == "__main__":
    ## Path to data
    #path = "/home/glenn/Downloads/"
    path = "../DATA/"


    # Model parameters
    model_config = {
        "VRBModel": {
            "model" : VRBModel,
            "model_name": "facebook/hubert-large-ll60k",
            "hidden_units": 64,
            "n_gru": 3,
            "output_size": None  # Will be set dynamically
        },
        "Wav2Vec2ConvLSTMModel": {
            "model" : Wav2Vec2ConvLSTMModel,
            "model_name": "facebook/wav2vec2-base",
            "hidden_units": 128,
            "n_lstm": 2,
            "output_size": None  # Will be set dynamically
        },
            "RespBertCNNModelV2": {
            'model' : RespBertCNNModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "output_size": None  
        },
            "RespBertLSTMModelV2": {
            'model': RespBertLSTMModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "n_lstm": 3,
            "output_size": None  
        }
            ,
            "RespBertLSTMCNNTransformerModel": {
            'model': RespBertLSTMCNNTransformerModel,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 1024,
            "n_lstm": 2,
            "output_size": None  
        }
    }
    

    
    # Train and data parameters
    epochs = 60
    batch_size = 13
    window_size = 30
    step_size = 6
    data_parts = 4 # aka folds
    early_stopping_patience = 10
    
    config = model_config["RespBertCNNModelV2"]
    

    ## same wav2vec2 base model and pipeline used in the paper
    #processor = AutoProcessor.from_pretrained(config["model_name"])
    processor = Wav2Vec2FeatureExtractor.from_pretrained(config["model_name"])

    train(
        path_to_data=path+"ComParE2020_Breathing/wav/",
        path_to_labels=path+"ComParE2020_Breathing/lab/",
        window_size=window_size,
        batch_size=batch_size,
        config = config,
        step_size=step_size,
        data_parts= data_parts ,
        early_stopping_patience= early_stopping_patience,
        epochs= epochs,
        processor = processor
    )

Fold 1/4
(864, 480000)
750


Epoch 1/60:  33%|███▎      | 22/67 [00:44<01:27,  1.95s/it, train_loss=0.9990]

In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from datetime import datetime
#from torch.utils.tensorboard import SummaryWriter
import torchaudio
from typing import List, Tuple
from pt_utils import *
from pt_dataset import *
from pt_models import *
from pt_utils import *
from tensorboardX import SummaryWriter
from transformers import get_cosine_schedule_with_warmup


def create_run_directory():
    base_dir = "pt_runs_SOTA"
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir

def _calculate_flattened_accuracy(average, ground_truth_labels):
    s_acc = 0
    for b in range(len(ground_truth_labels)):
        s, _ = scipy.stats.pearsonr(average[b], ground_truth_labels[b])
        s_acc += s
    return s_acc / len(ground_truth_labels)

def _choose_real_labs_only_with_filenames(labels, filenames):
    return labels[labels['filename'].isin(filenames)]

def _get_ground_truth_labels(ground_truth_names, labels):
    ground_truth_labels = []
    for batch_name in ground_truth_names:
        ground_truth_label = _choose_real_labs_only_with_filenames(labels, [batch_name])
        ground_truth_labels.append(ground_truth_label)
    return np.array(ground_truth_labels)[:, :, -1].astype(np.float32)

def train(path_to_data, path_to_labels, window_size=16, step_size=6, data_parts=4, epochs=100, batch_size=10, early_stopping_patience=20, config = None, processor = None):
    run_dir = create_run_directory()
    log_dir = os.path.join(run_dir, "logs")
    os.makedirs(log_dir, exist_ok=True)

    # Parameters
    length_sequence = window_size 
    step_sequence = step_size

    # Load and prepare data
    train_data, train_labels, train_dict, frame_rate = load_data(path_to_data, path_to_labels, 'train')
    devel_data, devel_labels, devel_dict, frame_rate = load_data(path_to_data, path_to_labels, 'devel')
    test_data, test_labels, test_dict, frame_rate = load_data(path_to_data, path_to_labels, 'test')

    # Combine train and devel data
    all_data = np.concatenate((train_data, devel_data), axis=0)
    all_labels = pd.concat([train_labels, devel_labels])
    all_dict = np.concatenate((list(train_dict.values()), list(devel_dict.values())), axis=0)

    # Prepare data
    prepared_data, prepared_labels, prepared_labels_timesteps = prepare_data(all_data, all_labels, all_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)
    prepared_test_data, prepared_test_labels, prepared_test_labels_timesteps = prepare_data(test_data, test_labels, test_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)

    # Create CSV file for storing fold indices
    fold_indices_df = pd.DataFrame(columns=['Fold', 'Train_Indices', 'Val_Indices'])

    # Cross-validation
    kf = KFold(n_splits=data_parts)
    fold_metrics = []
    # To accumulate metrics across folds for each epoch
    train_acc_epoch = []
    val_acc_epoch = []
    test_acc_epoch = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    config["output_size"] = prepared_labels.shape[-1]
    writer = SummaryWriter(log_dir=os.path.join(log_dir,config["model_name"]))

    for fold, (train_index, val_index) in enumerate(kf.split(prepared_data)):
        print(f"Fold {fold + 1}/{data_parts}")
        best_model_path = f"{run_dir}/best_model_fold{fold+1}"
        # Save fold indices
        fold_indices_df = fold_indices_df._append({
            'Fold': fold + 1,
            'Train_Indices': train_index.tolist(),
            'Val_Indices': val_index.tolist()
        }, ignore_index=True)

        # Split data
        train_d, val_d = prepared_data[train_index], prepared_data[val_index]
        train_lbs, val_lbs = prepared_labels[train_index], prepared_labels[val_index]
        train_timesteps, val_timesteps = prepared_labels_timesteps[train_index], prepared_labels_timesteps[val_index]
        
        # Reshape data
        train_d, train_lbs = reshaping_data_for_model(train_d, train_lbs)
        val_d, val_lbs = reshaping_data_for_model(val_d, val_lbs)
        test_d, test_lbs = reshaping_data_for_model(prepared_test_data, prepared_test_labels)
        
        print(train_d.shape)

        # Create datasets
        #train_dataset = BreathingDataset(train_d, train_lbs, processor, window_size, step_sequence, augment=True)
        train_dataset = BreathingDataset(train_d, train_lbs, processor,window_size, step_sequence)
        val_dataset = BreathingDataset(val_d, val_lbs, processor, window_size, step_sequence)
        test_dataset = BreathingDataset(test_d, test_lbs, processor, window_size, step_sequence)

        # Create DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size,num_workers=2, shuffle=True, collate_fn=train_dataset.collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, collate_fn=val_dataset.collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, collate_fn=test_dataset.collate_fn)
        print(config["output_size"])
        # Create and initialize model
        model = config["model"](config).to(device)
        
        #### training optimiser parameters fror apple
        #learning_rate = 0.005 
        #optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        #### training optimiser parameters fror harma_2023 VRB model 
        learning_rate = 0.01 # From the paper
        optimizer = optim.Adam(model.parameters())       
        
        best_val_loss = float('inf')
        best_val_loss_flat = float('inf')
        early_stopping_counter = 0
        # To accumulate metrics across folds for each epoch
        train_acc = []
        val_acc = []
        test_acc = []
        for epoch in range(epochs):

            model.train()
            train_loss = 0.0
            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            
            for batch_d, batch_lbs in progress_bar:
                optimizer.zero_grad()
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                progress_bar.set_postfix({'train_loss': f'{train_loss/(progress_bar.n+1):.4f}'})


            train_loss /= len(train_loader)

            # Combined validation loop
            model.eval()
            val_loss = 0.0
            val_pred = []
            with torch.no_grad():
                for batch_d, batch_lbs in val_loader:
                    input_values = batch_d.to(device)
                    batch_lbs = batch_lbs.to(device)
                    
                    outputs = model(input_values)
                    loss = correlation_coefficient_loss(outputs, batch_lbs)
                    val_loss += loss.item()
                    val_pred.extend(outputs.cpu().numpy())

            val_loss /= len(val_loader)

            # Calculate validation metrics
            val_pred = np.array(val_pred).reshape(val_timesteps.shape)
            val_ground_truth = _get_ground_truth_labels([all_dict[i] for i in val_index], all_labels)
            val_pred_flat = unsplit_data_ogsize(val_pred, window_size, step_sequence, 25, val_ground_truth.shape[-1])
            val_prc_coef = _calculate_flattened_accuracy(val_pred_flat, val_ground_truth)
            
            # Accumulate metrics for this fold and epoch
            train_acc.append(1- train_loss)
            val_acc.append(1- val_loss)

            # Log metrics
            writer.add_scalar(f"Loss/train_fold_{fold + 1}", train_loss, epoch)
            writer.add_scalar(f"Loss/val_fold_{fold + 1}", val_loss, epoch)
            writer.add_scalar(f"Pearson/val_fold_{fold + 1}", val_prc_coef, epoch)

            print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Pearson: {val_prc_coef:.4f}")

            # Check if validation loss improved
            if val_loss < best_val_loss:
                print(f"Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving best model...")
                best_val_loss = val_loss
                best_val_loss_flat = val_prc_coef
                early_stopping_counter = 0

                # Save the best model
                torch.save(model.state_dict(), best_model_path)
            else:
                early_stopping_counter += 1
                print(f"Validation loss did not improve for {early_stopping_counter} epochs.")
                #model.load_state_dict(torch.load(best_model_path))


            # Early stopping
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping triggered at epoch {epoch + 1}. Loading best model.")
                # Load the best model's weights
                model.load_state_dict(torch.load(best_model_path))
                break

        test_pred = []
        test_loss = 0.0
        with torch.no_grad():
            for batch_d, batch_lbs in test_loader:
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                test_loss += loss.item()
                test_pred.extend(outputs.cpu().numpy())

        test_loss /= len(test_loader)
        test_pred = np.array(test_pred).reshape(prepared_test_labels_timesteps.shape)
        test_ground_truth = _get_ground_truth_labels(list(test_dict.values()), test_labels)
        test_pred_flat = unsplit_data_ogsize(test_pred, window_size, step_sequence, 25, test_ground_truth.shape[-1])
        test_prc_coef = _calculate_flattened_accuracy(test_pred_flat, test_ground_truth)

        print(f"Fold {fold + 1}:")
        print(f"  Validation Pearson Coefficient  acc: {1- best_val_loss}")
        print(f"  Validation Pearson Coefficient flat acc: {best_val_loss_flat}")
        print(f"  Test acc: {1- test_loss}")
        print(f"  Test Pearson Coefficient acc(flattened): {test_prc_coef}")

        fold_metrics.append({
            'Fold': fold + 1,
            'val_prc_acc': 1- best_val_loss,
            'val_prc_acc_flat': best_val_loss_flat,
            'test_acc': 1- test_loss,
            'test_prc_flat': test_prc_coef
        })


                # Log fold-specific metrics as tables
        fold_table = f"| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                     f"|------|-----------------|------------------|----------|-------------------|\n" \
                     f"| {fold + 1} | {1 - val_loss:.4f} | {val_prc_coef:.4f} | {1 - test_loss:.4f} | {test_prc_coef:.4f} |\n"
        writer.add_text(f"Fold_{fold + 1}_Metrics", fold_table)
        # Accumulate fold metrics across all folds
        train_acc_epoch.append(train_acc)
        val_acc_epoch.append(train_acc)


    
        # After all folds, compute and log the average metrics per epoch across all folds
    for epoch in range(epochs):
        avg_train_loss = np.mean([fold_losses[epoch] for fold_losses in train_acc_epoch if len(fold_losses) > epoch])
        avg_val_loss = np.mean([fold_losses[epoch] for fold_losses in val_acc_epoch if len(fold_losses) > epoch])

        # Log the averaged metrics for the epoch across all folds
        writer.add_scalar("Average_acc/train", avg_train_loss, epoch)
        writer.add_scalar("Average_acc/val", avg_val_loss, epoch)
            

    # Calculate average metrics
    avg_metrics = {key: np.mean([fold[key] for fold in fold_metrics if key != 'Fold']) for key in fold_metrics[0].keys() if key != 'Fold'}
        # Log the final average table
    avg_table = "| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                "|------|-----------------|------------------|----------|-------------------|\n" \
                f"| Average | {avg_metrics['val_prc_acc']:.4f} | {avg_metrics['val_prc_acc_flat']:.4f} | {avg_metrics['test_acc']:.4f} | {avg_metrics['test_prc_flat']:.4f} |\n"
    writer.add_text("Average_Metrics", avg_table)
    # Add average metrics to results
    avg_metrics['Fold'] = 'Average'
    fold_metrics.append(avg_metrics)

    # save averga date to CSV
    results_df = pd.DataFrame(fold_metrics)
    csv_path = os.path.join(run_dir, 'fold_results.csv')
    results_df.to_csv(csv_path, index=False)
    
    # Save fold indices CSV
    fold_indices_df.to_csv(os.path.join(run_dir, 'fold_indices.csv'), index=False)
    
    writer.close()


    print("\nTraining completed.")
    print("Average metrics across all folds:")
    for key, value in avg_metrics.items():
        print(f"  {key}: {value}")


if __name__ == "__main__":
    ## Path to data
    #path = "/home/glenn/Downloads/"
    path = "../DATA/"


    # Model parameters
    model_config = {
        "VRBModel": {
            "model" : VRBModel,
            "model_name": "facebook/hubert-large-ll60k",
            "hidden_units": 64,
            "n_gru": 3,
            "output_size": None  # Will be set dynamically
        },
        "Wav2Vec2ConvLSTMModel": {
            "model" : Wav2Vec2ConvLSTMModel,
            "model_name": "facebook/wav2vec2-base",
            "hidden_units": 128,
            "n_lstm": 2,
            "output_size": None  # Will be set dynamically
        },
            "RespBertCNNModelV2": {
            'model' : RespBertCNNModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "output_size": None  
        },
            "RespBertLSTMModelV2": {
            'model': RespBertLSTMModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "n_lstm": 3,
            "output_size": None  
        }
            ,
            "RespBertLSTMCNNTransformerModel": {
            'model': RespBertLSTMCNNTransformerModel,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 1024,
            "n_lstm": 2,
            "output_size": None  
        }
    }
    

    
    # Train and data parameters
    epochs = 60
    batch_size = 13
    window_size = 30
    step_size = 6
    data_parts = 4 # aka folds
    early_stopping_patience = 10
    
    config = model_config["RespBertLSTMCNNTransformerModel"]
    

    ## same wav2vec2 base model and pipeline used in the paper
    #processor = AutoProcessor.from_pretrained(config["model_name"])
    processor = Wav2Vec2FeatureExtractor.from_pretrained(config["model_name"])

    train(
        path_to_data=path+"ComParE2020_Breathing/wav/",
        path_to_labels=path+"ComParE2020_Breathing/lab/",
        window_size=window_size,
        batch_size=batch_size,
        config = config,
        step_size=step_size,
        data_parts= data_parts ,
        early_stopping_patience= early_stopping_patience,
        epochs= epochs,
        processor = processor
    )

Fold 1/4
(864, 480000)
750


Epoch 1/60:   0%|          | 0/67 [00:02<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 1.75 GiB. GPU 0 has a total capacity of 93.12 GiB of which 218.75 MiB is free. Including non-PyTorch memory, this process has 92.90 GiB memory in use. Of the allocated memory 91.37 GiB is allocated by PyTorch, and 882.05 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from datetime import datetime
#from torch.utils.tensorboard import SummaryWriter
import torchaudio
from typing import List, Tuple
from pt_utils import *
from pt_dataset import *
from pt_models import *
from pt_utils import *
from tensorboardX import SummaryWriter
from transformers import get_cosine_schedule_with_warmup


def create_run_directory():
    base_dir = "pt_runs_SOTA"
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir

def _calculate_flattened_accuracy(average, ground_truth_labels):
    s_acc = 0
    for b in range(len(ground_truth_labels)):
        s, _ = scipy.stats.pearsonr(average[b], ground_truth_labels[b])
        s_acc += s
    return s_acc / len(ground_truth_labels)

def _choose_real_labs_only_with_filenames(labels, filenames):
    return labels[labels['filename'].isin(filenames)]

def _get_ground_truth_labels(ground_truth_names, labels):
    ground_truth_labels = []
    for batch_name in ground_truth_names:
        ground_truth_label = _choose_real_labs_only_with_filenames(labels, [batch_name])
        ground_truth_labels.append(ground_truth_label)
    return np.array(ground_truth_labels)[:, :, -1].astype(np.float32)

def train(path_to_data, path_to_labels, window_size=16, step_size=6, data_parts=4, epochs=100, batch_size=10, early_stopping_patience=20, config = None, processor = None):
    run_dir = create_run_directory()
    log_dir = os.path.join(run_dir, "logs")
    os.makedirs(log_dir, exist_ok=True)

    # Parameters
    length_sequence = window_size 
    step_sequence = step_size

    # Load and prepare data
    train_data, train_labels, train_dict, frame_rate = load_data(path_to_data, path_to_labels, 'train')
    devel_data, devel_labels, devel_dict, frame_rate = load_data(path_to_data, path_to_labels, 'devel')
    test_data, test_labels, test_dict, frame_rate = load_data(path_to_data, path_to_labels, 'test')

    # Combine train and devel data
    all_data = np.concatenate((train_data, devel_data), axis=0)
    all_labels = pd.concat([train_labels, devel_labels])
    all_dict = np.concatenate((list(train_dict.values()), list(devel_dict.values())), axis=0)

    # Prepare data
    prepared_data, prepared_labels, prepared_labels_timesteps = prepare_data(all_data, all_labels, all_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)
    prepared_test_data, prepared_test_labels, prepared_test_labels_timesteps = prepare_data(test_data, test_labels, test_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)

    # Create CSV file for storing fold indices
    fold_indices_df = pd.DataFrame(columns=['Fold', 'Train_Indices', 'Val_Indices'])

    # Cross-validation
    kf = KFold(n_splits=data_parts)
    fold_metrics = []
    # To accumulate metrics across folds for each epoch
    train_acc_epoch = []
    val_acc_epoch = []
    test_acc_epoch = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    config["output_size"] = prepared_labels.shape[-1]
    writer = SummaryWriter(log_dir=os.path.join(log_dir,config["model_name"]))

    for fold, (train_index, val_index) in enumerate(kf.split(prepared_data)):
        print(f"Fold {fold + 1}/{data_parts}")
        best_model_path = f"{run_dir}/best_model_fold{fold+1}"
        # Save fold indices
        fold_indices_df = fold_indices_df._append({
            'Fold': fold + 1,
            'Train_Indices': train_index.tolist(),
            'Val_Indices': val_index.tolist()
        }, ignore_index=True)

        # Split data
        train_d, val_d = prepared_data[train_index], prepared_data[val_index]
        train_lbs, val_lbs = prepared_labels[train_index], prepared_labels[val_index]
        train_timesteps, val_timesteps = prepared_labels_timesteps[train_index], prepared_labels_timesteps[val_index]
        
        # Reshape data
        train_d, train_lbs = reshaping_data_for_model(train_d, train_lbs)
        val_d, val_lbs = reshaping_data_for_model(val_d, val_lbs)
        test_d, test_lbs = reshaping_data_for_model(prepared_test_data, prepared_test_labels)
        
        print(train_d.shape)

        # Create datasets
        #train_dataset = BreathingDataset(train_d, train_lbs, processor, window_size, step_sequence, augment=True)
        train_dataset = BreathingDataset(train_d, train_lbs, processor,window_size, step_sequence)
        val_dataset = BreathingDataset(val_d, val_lbs, processor, window_size, step_sequence)
        test_dataset = BreathingDataset(test_d, test_lbs, processor, window_size, step_sequence)

        # Create DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size,num_workers=2, shuffle=True, collate_fn=train_dataset.collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, collate_fn=val_dataset.collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, collate_fn=test_dataset.collate_fn)
        print(config["output_size"])
        # Create and initialize model
        model = config["model"](config).to(device)
        
        #### training optimiser parameters fror apple
        #learning_rate = 0.005 
        #optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        #### training optimiser parameters fror harma_2023 VRB model 
        learning_rate = 0.01 # From the paper
        optimizer = optim.Adam(model.parameters())       
        
        best_val_loss = float('inf')
        best_val_loss_flat = float('inf')
        early_stopping_counter = 0
        # To accumulate metrics across folds for each epoch
        train_acc = []
        val_acc = []
        test_acc = []
        for epoch in range(epochs):

            model.train()
            train_loss = 0.0
            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            
            for batch_d, batch_lbs in progress_bar:
                optimizer.zero_grad()
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                progress_bar.set_postfix({'train_loss': f'{train_loss/(progress_bar.n+1):.4f}'})


            train_loss /= len(train_loader)

            # Combined validation loop
            model.eval()
            val_loss = 0.0
            val_pred = []
            with torch.no_grad():
                for batch_d, batch_lbs in val_loader:
                    input_values = batch_d.to(device)
                    batch_lbs = batch_lbs.to(device)
                    
                    outputs = model(input_values)
                    loss = correlation_coefficient_loss(outputs, batch_lbs)
                    val_loss += loss.item()
                    val_pred.extend(outputs.cpu().numpy())

            val_loss /= len(val_loader)

            # Calculate validation metrics
            val_pred = np.array(val_pred).reshape(val_timesteps.shape)
            val_ground_truth = _get_ground_truth_labels([all_dict[i] for i in val_index], all_labels)
            val_pred_flat = unsplit_data_ogsize(val_pred, window_size, step_sequence, 25, val_ground_truth.shape[-1])
            val_prc_coef = _calculate_flattened_accuracy(val_pred_flat, val_ground_truth)
            
            # Accumulate metrics for this fold and epoch
            train_acc.append(1- train_loss)
            val_acc.append(1- val_loss)

            # Log metrics
            writer.add_scalar(f"Loss/train_fold_{fold + 1}", train_loss, epoch)
            writer.add_scalar(f"Loss/val_fold_{fold + 1}", val_loss, epoch)
            writer.add_scalar(f"Pearson/val_fold_{fold + 1}", val_prc_coef, epoch)

            print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Pearson: {val_prc_coef:.4f}")

            # Check if validation loss improved
            if val_loss < best_val_loss:
                print(f"Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving best model...")
                best_val_loss = val_loss
                best_val_loss_flat = val_prc_coef
                early_stopping_counter = 0

                # Save the best model
                torch.save(model.state_dict(), best_model_path)
            else:
                early_stopping_counter += 1
                print(f"Validation loss did not improve for {early_stopping_counter} epochs.")
                #model.load_state_dict(torch.load(best_model_path))


            # Early stopping
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping triggered at epoch {epoch + 1}. Loading best model.")
                # Load the best model's weights
                model.load_state_dict(torch.load(best_model_path))
                break

        test_pred = []
        test_loss = 0.0
        with torch.no_grad():
            for batch_d, batch_lbs in test_loader:
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                test_loss += loss.item()
                test_pred.extend(outputs.cpu().numpy())

        test_loss /= len(test_loader)
        test_pred = np.array(test_pred).reshape(prepared_test_labels_timesteps.shape)
        test_ground_truth = _get_ground_truth_labels(list(test_dict.values()), test_labels)
        test_pred_flat = unsplit_data_ogsize(test_pred, window_size, step_sequence, 25, test_ground_truth.shape[-1])
        test_prc_coef = _calculate_flattened_accuracy(test_pred_flat, test_ground_truth)

        print(f"Fold {fold + 1}:")
        print(f"  Validation Pearson Coefficient  acc: {1- best_val_loss}")
        print(f"  Validation Pearson Coefficient flat acc: {best_val_loss_flat}")
        print(f"  Test acc: {1- test_loss}")
        print(f"  Test Pearson Coefficient acc(flattened): {test_prc_coef}")

        fold_metrics.append({
            'Fold': fold + 1,
            'val_prc_acc': 1- best_val_loss,
            'val_prc_acc_flat': best_val_loss_flat,
            'test_acc': 1- test_loss,
            'test_prc_flat': test_prc_coef
        })


                # Log fold-specific metrics as tables
        fold_table = f"| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                     f"|------|-----------------|------------------|----------|-------------------|\n" \
                     f"| {fold + 1} | {1 - val_loss:.4f} | {val_prc_coef:.4f} | {1 - test_loss:.4f} | {test_prc_coef:.4f} |\n"
        writer.add_text(f"Fold_{fold + 1}_Metrics", fold_table)
        # Accumulate fold metrics across all folds
        train_acc_epoch.append(train_acc)
        val_acc_epoch.append(train_acc)


    
        # After all folds, compute and log the average metrics per epoch across all folds
    for epoch in range(epochs):
        avg_train_loss = np.mean([fold_losses[epoch] for fold_losses in train_acc_epoch if len(fold_losses) > epoch])
        avg_val_loss = np.mean([fold_losses[epoch] for fold_losses in val_acc_epoch if len(fold_losses) > epoch])

        # Log the averaged metrics for the epoch across all folds
        writer.add_scalar("Average_acc/train", avg_train_loss, epoch)
        writer.add_scalar("Average_acc/val", avg_val_loss, epoch)
            

    # Calculate average metrics
    avg_metrics = {key: np.mean([fold[key] for fold in fold_metrics if key != 'Fold']) for key in fold_metrics[0].keys() if key != 'Fold'}
        # Log the final average table
    avg_table = "| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                "|------|-----------------|------------------|----------|-------------------|\n" \
                f"| Average | {avg_metrics['val_prc_acc']:.4f} | {avg_metrics['val_prc_acc_flat']:.4f} | {avg_metrics['test_acc']:.4f} | {avg_metrics['test_prc_flat']:.4f} |\n"
    writer.add_text("Average_Metrics", avg_table)
    # Add average metrics to results
    avg_metrics['Fold'] = 'Average'
    fold_metrics.append(avg_metrics)

    # save averga date to CSV
    results_df = pd.DataFrame(fold_metrics)
    csv_path = os.path.join(run_dir, 'fold_results.csv')
    results_df.to_csv(csv_path, index=False)
    
    # Save fold indices CSV
    fold_indices_df.to_csv(os.path.join(run_dir, 'fold_indices.csv'), index=False)
    
    writer.close()


    print("\nTraining completed.")
    print("Average metrics across all folds:")
    for key, value in avg_metrics.items():
        print(f"  {key}: {value}")


if __name__ == "__main__":
    ## Path to data
    #path = "/home/glenn/Downloads/"
    path = "../DATA/"


    # Model parameters
    model_config = {
        "VRBModel": {
            "model" : VRBModel,
            "model_name": "facebook/hubert-large-ll60k",
            "hidden_units": 64,
            "n_gru": 3,
            "output_size": None  # Will be set dynamically
        },
        "Wav2Vec2ConvLSTMModel": {
            "model" : Wav2Vec2ConvLSTMModel,
            "model_name": "facebook/wav2vec2-base",
            "hidden_units": 128,
            "n_lstm": 2,
            "output_size": None  # Will be set dynamically
        },
            "RespBertCNNModelV2": {
            'model' : RespBertCNNModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "output_size": None  
        },
            "RespBertLSTMModelV2": {
            'model': RespBertLSTMModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "n_lstm": 3,
            "output_size": None  
        }
            ,
            "RespBertLSTMCNNTransformerModel": {
            'model': RespBertLSTMCNNTransformerModel,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 1024,
            "n_lstm": 2,
            "output_size": None  
        }
    }
    

    
    # Train and data parameters
    epochs = 60
    batch_size = 10
    window_size = 30
    step_size = 6
    data_parts = 4 # aka folds
    early_stopping_patience = 10
    
    config = model_config["RespBertLSTMCNNTransformerModel"]
    

    ## same wav2vec2 base model and pipeline used in the paper
    #processor = AutoProcessor.from_pretrained(config["model_name"])
    processor = Wav2Vec2FeatureExtractor.from_pretrained(config["model_name"])

    train(
        path_to_data=path+"ComParE2020_Breathing/wav/",
        path_to_labels=path+"ComParE2020_Breathing/lab/",
        window_size=window_size,
        batch_size=batch_size,
        config = config,
        step_size=step_size,
        data_parts= data_parts ,
        early_stopping_patience= early_stopping_patience,
        epochs= epochs,
        processor = processor
    )

Fold 1/4
(864, 480000)
750


Epoch 1/60:   0%|          | 0/87 [00:02<?, ?it/s]


AssertionError: was expecting embedding dimension of 3072, but got 1499

In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from datetime import datetime
#from torch.utils.tensorboard import SummaryWriter
import torchaudio
from typing import List, Tuple
from pt_utils import *
from pt_dataset import *
from pt_models import *
from pt_utils import *
from tensorboardX import SummaryWriter
from transformers import get_cosine_schedule_with_warmup


def create_run_directory():
    base_dir = "pt_runs_SOTA"
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir

def _calculate_flattened_accuracy(average, ground_truth_labels):
    s_acc = 0
    for b in range(len(ground_truth_labels)):
        s, _ = scipy.stats.pearsonr(average[b], ground_truth_labels[b])
        s_acc += s
    return s_acc / len(ground_truth_labels)

def _choose_real_labs_only_with_filenames(labels, filenames):
    return labels[labels['filename'].isin(filenames)]

def _get_ground_truth_labels(ground_truth_names, labels):
    ground_truth_labels = []
    for batch_name in ground_truth_names:
        ground_truth_label = _choose_real_labs_only_with_filenames(labels, [batch_name])
        ground_truth_labels.append(ground_truth_label)
    return np.array(ground_truth_labels)[:, :, -1].astype(np.float32)

def train(path_to_data, path_to_labels, window_size=16, step_size=6, data_parts=4, epochs=100, batch_size=10, early_stopping_patience=20, config = None, processor = None):
    run_dir = create_run_directory()
    log_dir = os.path.join(run_dir, "logs")
    os.makedirs(log_dir, exist_ok=True)

    # Parameters
    length_sequence = window_size 
    step_sequence = step_size

    # Load and prepare data
    train_data, train_labels, train_dict, frame_rate = load_data(path_to_data, path_to_labels, 'train')
    devel_data, devel_labels, devel_dict, frame_rate = load_data(path_to_data, path_to_labels, 'devel')
    test_data, test_labels, test_dict, frame_rate = load_data(path_to_data, path_to_labels, 'test')

    # Combine train and devel data
    all_data = np.concatenate((train_data, devel_data), axis=0)
    all_labels = pd.concat([train_labels, devel_labels])
    all_dict = np.concatenate((list(train_dict.values()), list(devel_dict.values())), axis=0)

    # Prepare data
    prepared_data, prepared_labels, prepared_labels_timesteps = prepare_data(all_data, all_labels, all_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)
    prepared_test_data, prepared_test_labels, prepared_test_labels_timesteps = prepare_data(test_data, test_labels, test_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)

    # Create CSV file for storing fold indices
    fold_indices_df = pd.DataFrame(columns=['Fold', 'Train_Indices', 'Val_Indices'])

    # Cross-validation
    kf = KFold(n_splits=data_parts)
    fold_metrics = []
    # To accumulate metrics across folds for each epoch
    train_acc_epoch = []
    val_acc_epoch = []
    test_acc_epoch = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    config["output_size"] = prepared_labels.shape[-1]
    writer = SummaryWriter(log_dir=os.path.join(log_dir,config["model_name"]))

    for fold, (train_index, val_index) in enumerate(kf.split(prepared_data)):
        print(f"Fold {fold + 1}/{data_parts}")
        best_model_path = f"{run_dir}/best_model_fold{fold+1}"
        # Save fold indices
        fold_indices_df = fold_indices_df._append({
            'Fold': fold + 1,
            'Train_Indices': train_index.tolist(),
            'Val_Indices': val_index.tolist()
        }, ignore_index=True)

        # Split data
        train_d, val_d = prepared_data[train_index], prepared_data[val_index]
        train_lbs, val_lbs = prepared_labels[train_index], prepared_labels[val_index]
        train_timesteps, val_timesteps = prepared_labels_timesteps[train_index], prepared_labels_timesteps[val_index]
        
        # Reshape data
        train_d, train_lbs = reshaping_data_for_model(train_d, train_lbs)
        val_d, val_lbs = reshaping_data_for_model(val_d, val_lbs)
        test_d, test_lbs = reshaping_data_for_model(prepared_test_data, prepared_test_labels)
        
        print(train_d.shape)

        # Create datasets
        #train_dataset = BreathingDataset(train_d, train_lbs, processor, window_size, step_sequence, augment=True)
        train_dataset = BreathingDataset(train_d, train_lbs, processor,window_size, step_sequence)
        val_dataset = BreathingDataset(val_d, val_lbs, processor, window_size, step_sequence)
        test_dataset = BreathingDataset(test_d, test_lbs, processor, window_size, step_sequence)

        # Create DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size,num_workers=2, shuffle=True, collate_fn=train_dataset.collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, collate_fn=val_dataset.collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, collate_fn=test_dataset.collate_fn)
        print(config["output_size"])
        # Create and initialize model
        model = config["model"](config).to(device)
        
        #### training optimiser parameters fror apple
        #learning_rate = 0.005 
        #optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        #### training optimiser parameters fror harma_2023 VRB model 
        learning_rate = 0.01 # From the paper
        optimizer = optim.Adam(model.parameters())       
        
        best_val_loss = float('inf')
        best_val_loss_flat = float('inf')
        early_stopping_counter = 0
        # To accumulate metrics across folds for each epoch
        train_acc = []
        val_acc = []
        test_acc = []
        for epoch in range(epochs):

            model.train()
            train_loss = 0.0
            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            
            for batch_d, batch_lbs in progress_bar:
                optimizer.zero_grad()
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                progress_bar.set_postfix({'train_loss': f'{train_loss/(progress_bar.n+1):.4f}'})


            train_loss /= len(train_loader)

            # Combined validation loop
            model.eval()
            val_loss = 0.0
            val_pred = []
            with torch.no_grad():
                for batch_d, batch_lbs in val_loader:
                    input_values = batch_d.to(device)
                    batch_lbs = batch_lbs.to(device)
                    
                    outputs = model(input_values)
                    loss = correlation_coefficient_loss(outputs, batch_lbs)
                    val_loss += loss.item()
                    val_pred.extend(outputs.cpu().numpy())

            val_loss /= len(val_loader)

            # Calculate validation metrics
            val_pred = np.array(val_pred).reshape(val_timesteps.shape)
            val_ground_truth = _get_ground_truth_labels([all_dict[i] for i in val_index], all_labels)
            val_pred_flat = unsplit_data_ogsize(val_pred, window_size, step_sequence, 25, val_ground_truth.shape[-1])
            val_prc_coef = _calculate_flattened_accuracy(val_pred_flat, val_ground_truth)
            
            # Accumulate metrics for this fold and epoch
            train_acc.append(1- train_loss)
            val_acc.append(1- val_loss)

            # Log metrics
            writer.add_scalar(f"Loss/train_fold_{fold + 1}", train_loss, epoch)
            writer.add_scalar(f"Loss/val_fold_{fold + 1}", val_loss, epoch)
            writer.add_scalar(f"Pearson/val_fold_{fold + 1}", val_prc_coef, epoch)

            print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Pearson: {val_prc_coef:.4f}")

            # Check if validation loss improved
            if val_loss < best_val_loss:
                print(f"Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving best model...")
                best_val_loss = val_loss
                best_val_loss_flat = val_prc_coef
                early_stopping_counter = 0

                # Save the best model
                torch.save(model.state_dict(), best_model_path)
            else:
                early_stopping_counter += 1
                print(f"Validation loss did not improve for {early_stopping_counter} epochs.")
                #model.load_state_dict(torch.load(best_model_path))


            # Early stopping
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping triggered at epoch {epoch + 1}. Loading best model.")
                # Load the best model's weights
                model.load_state_dict(torch.load(best_model_path))
                break

        test_pred = []
        test_loss = 0.0
        with torch.no_grad():
            for batch_d, batch_lbs in test_loader:
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                test_loss += loss.item()
                test_pred.extend(outputs.cpu().numpy())

        test_loss /= len(test_loader)
        test_pred = np.array(test_pred).reshape(prepared_test_labels_timesteps.shape)
        test_ground_truth = _get_ground_truth_labels(list(test_dict.values()), test_labels)
        test_pred_flat = unsplit_data_ogsize(test_pred, window_size, step_sequence, 25, test_ground_truth.shape[-1])
        test_prc_coef = _calculate_flattened_accuracy(test_pred_flat, test_ground_truth)

        print(f"Fold {fold + 1}:")
        print(f"  Validation Pearson Coefficient  acc: {1- best_val_loss}")
        print(f"  Validation Pearson Coefficient flat acc: {best_val_loss_flat}")
        print(f"  Test acc: {1- test_loss}")
        print(f"  Test Pearson Coefficient acc(flattened): {test_prc_coef}")

        fold_metrics.append({
            'Fold': fold + 1,
            'val_prc_acc': 1- best_val_loss,
            'val_prc_acc_flat': best_val_loss_flat,
            'test_acc': 1- test_loss,
            'test_prc_flat': test_prc_coef
        })


                # Log fold-specific metrics as tables
        fold_table = f"| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                     f"|------|-----------------|------------------|----------|-------------------|\n" \
                     f"| {fold + 1} | {1 - val_loss:.4f} | {val_prc_coef:.4f} | {1 - test_loss:.4f} | {test_prc_coef:.4f} |\n"
        writer.add_text(f"Fold_{fold + 1}_Metrics", fold_table)
        # Accumulate fold metrics across all folds
        train_acc_epoch.append(train_acc)
        val_acc_epoch.append(train_acc)


    
        # After all folds, compute and log the average metrics per epoch across all folds
    for epoch in range(epochs):
        avg_train_loss = np.mean([fold_losses[epoch] for fold_losses in train_acc_epoch if len(fold_losses) > epoch])
        avg_val_loss = np.mean([fold_losses[epoch] for fold_losses in val_acc_epoch if len(fold_losses) > epoch])

        # Log the averaged metrics for the epoch across all folds
        writer.add_scalar("Average_acc/train", avg_train_loss, epoch)
        writer.add_scalar("Average_acc/val", avg_val_loss, epoch)
            

    # Calculate average metrics
    avg_metrics = {key: np.mean([fold[key] for fold in fold_metrics if key != 'Fold']) for key in fold_metrics[0].keys() if key != 'Fold'}
        # Log the final average table
    avg_table = "| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                "|------|-----------------|------------------|----------|-------------------|\n" \
                f"| Average | {avg_metrics['val_prc_acc']:.4f} | {avg_metrics['val_prc_acc_flat']:.4f} | {avg_metrics['test_acc']:.4f} | {avg_metrics['test_prc_flat']:.4f} |\n"
    writer.add_text("Average_Metrics", avg_table)
    # Add average metrics to results
    avg_metrics['Fold'] = 'Average'
    fold_metrics.append(avg_metrics)

    # save averga date to CSV
    results_df = pd.DataFrame(fold_metrics)
    csv_path = os.path.join(run_dir, 'fold_results.csv')
    results_df.to_csv(csv_path, index=False)
    
    # Save fold indices CSV
    fold_indices_df.to_csv(os.path.join(run_dir, 'fold_indices.csv'), index=False)
    
    writer.close()


    print("\nTraining completed.")
    print("Average metrics across all folds:")
    for key, value in avg_metrics.items():
        print(f"  {key}: {value}")


if __name__ == "__main__":
    ## Path to data
    #path = "/home/glenn/Downloads/"
    path = "../DATA/"


    # Model parameters
    model_config = {
        "VRBModel": {
            "model" : VRBModel,
            "model_name": "facebook/hubert-large-ll60k",
            "hidden_units": 64,
            "n_gru": 3,
            "output_size": None  # Will be set dynamically
        },
        "Wav2Vec2ConvLSTMModel": {
            "model" : Wav2Vec2ConvLSTMModel,
            "model_name": "facebook/wav2vec2-base",
            "hidden_units": 128,
            "n_lstm": 2,
            "output_size": None  # Will be set dynamically
        },
            "RespBertCNNModelV2": {
            'model' : RespBertCNNModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "output_size": None  
        },
            "RespBertLSTMModelV2": {
            'model': RespBertLSTMModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "n_lstm": 3,
            "output_size": None  
        }
            ,
            "RespBertLSTMCNNTransformerModel": {
            'model': RespBertLSTMCNNTransformerModel,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 1024,
            "n_lstm": 2,
            "output_size": None  
        }
    }
    

    
    # Train and data parameters
    epochs = 60
    batch_size = 10
    window_size = 30
    step_size = 6
    data_parts = 4 # aka folds
    early_stopping_patience = 10
    
    config = model_config["RespBertLSTMCNNTransformerModel"]
    

    ## same wav2vec2 base model and pipeline used in the paper
    #processor = AutoProcessor.from_pretrained(config["model_name"])
    processor = Wav2Vec2FeatureExtractor.from_pretrained(config["model_name"])

    train(
        path_to_data=path+"ComParE2020_Breathing/wav/",
        path_to_labels=path+"ComParE2020_Breathing/lab/",
        window_size=window_size,
        batch_size=batch_size,
        config = config,
        step_size=step_size,
        data_parts= data_parts ,
        early_stopping_patience= early_stopping_patience,
        epochs= epochs,
        processor = processor
    )

Fold 1/4
(864, 480000)
750


Epoch 1/60:   0%|          | 0/87 [00:02<?, ?it/s]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])





AssertionError: was expecting embedding dimension of 3072, but got 1499

In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from datetime import datetime
#from torch.utils.tensorboard import SummaryWriter
import torchaudio
from typing import List, Tuple
from pt_utils import *
from pt_dataset import *
from pt_models import *
from pt_utils import *
from tensorboardX import SummaryWriter
from transformers import get_cosine_schedule_with_warmup


def create_run_directory():
    base_dir = "pt_runs_SOTA"
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir

def _calculate_flattened_accuracy(average, ground_truth_labels):
    s_acc = 0
    for b in range(len(ground_truth_labels)):
        s, _ = scipy.stats.pearsonr(average[b], ground_truth_labels[b])
        s_acc += s
    return s_acc / len(ground_truth_labels)

def _choose_real_labs_only_with_filenames(labels, filenames):
    return labels[labels['filename'].isin(filenames)]

def _get_ground_truth_labels(ground_truth_names, labels):
    ground_truth_labels = []
    for batch_name in ground_truth_names:
        ground_truth_label = _choose_real_labs_only_with_filenames(labels, [batch_name])
        ground_truth_labels.append(ground_truth_label)
    return np.array(ground_truth_labels)[:, :, -1].astype(np.float32)

def train(path_to_data, path_to_labels, window_size=16, step_size=6, data_parts=4, epochs=100, batch_size=10, early_stopping_patience=20, config = None, processor = None):
    run_dir = create_run_directory()
    log_dir = os.path.join(run_dir, "logs")
    os.makedirs(log_dir, exist_ok=True)

    # Parameters
    length_sequence = window_size 
    step_sequence = step_size

    # Load and prepare data
    train_data, train_labels, train_dict, frame_rate = load_data(path_to_data, path_to_labels, 'train')
    devel_data, devel_labels, devel_dict, frame_rate = load_data(path_to_data, path_to_labels, 'devel')
    test_data, test_labels, test_dict, frame_rate = load_data(path_to_data, path_to_labels, 'test')

    # Combine train and devel data
    all_data = np.concatenate((train_data, devel_data), axis=0)
    all_labels = pd.concat([train_labels, devel_labels])
    all_dict = np.concatenate((list(train_dict.values()), list(devel_dict.values())), axis=0)

    # Prepare data
    prepared_data, prepared_labels, prepared_labels_timesteps = prepare_data(all_data, all_labels, all_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)
    prepared_test_data, prepared_test_labels, prepared_test_labels_timesteps = prepare_data(test_data, test_labels, test_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)

    # Create CSV file for storing fold indices
    fold_indices_df = pd.DataFrame(columns=['Fold', 'Train_Indices', 'Val_Indices'])

    # Cross-validation
    kf = KFold(n_splits=data_parts)
    fold_metrics = []
    # To accumulate metrics across folds for each epoch
    train_acc_epoch = []
    val_acc_epoch = []
    test_acc_epoch = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    config["output_size"] = prepared_labels.shape[-1]
    writer = SummaryWriter(log_dir=os.path.join(log_dir,config["model_name"]))

    for fold, (train_index, val_index) in enumerate(kf.split(prepared_data)):
        print(f"Fold {fold + 1}/{data_parts}")
        best_model_path = f"{run_dir}/best_model_fold{fold+1}"
        # Save fold indices
        fold_indices_df = fold_indices_df._append({
            'Fold': fold + 1,
            'Train_Indices': train_index.tolist(),
            'Val_Indices': val_index.tolist()
        }, ignore_index=True)

        # Split data
        train_d, val_d = prepared_data[train_index], prepared_data[val_index]
        train_lbs, val_lbs = prepared_labels[train_index], prepared_labels[val_index]
        train_timesteps, val_timesteps = prepared_labels_timesteps[train_index], prepared_labels_timesteps[val_index]
        
        # Reshape data
        train_d, train_lbs = reshaping_data_for_model(train_d, train_lbs)
        val_d, val_lbs = reshaping_data_for_model(val_d, val_lbs)
        test_d, test_lbs = reshaping_data_for_model(prepared_test_data, prepared_test_labels)
        
        print(train_d.shape)

        # Create datasets
        #train_dataset = BreathingDataset(train_d, train_lbs, processor, window_size, step_sequence, augment=True)
        train_dataset = BreathingDataset(train_d, train_lbs, processor,window_size, step_sequence)
        val_dataset = BreathingDataset(val_d, val_lbs, processor, window_size, step_sequence)
        test_dataset = BreathingDataset(test_d, test_lbs, processor, window_size, step_sequence)

        # Create DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size,num_workers=2, shuffle=True, collate_fn=train_dataset.collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, collate_fn=val_dataset.collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, collate_fn=test_dataset.collate_fn)
        print(config["output_size"])
        # Create and initialize model
        model = config["model"](config).to(device)
        
        #### training optimiser parameters fror apple
        #learning_rate = 0.005 
        #optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        #### training optimiser parameters fror harma_2023 VRB model 
        learning_rate = 0.01 # From the paper
        optimizer = optim.Adam(model.parameters())       
        
        best_val_loss = float('inf')
        best_val_loss_flat = float('inf')
        early_stopping_counter = 0
        # To accumulate metrics across folds for each epoch
        train_acc = []
        val_acc = []
        test_acc = []
        for epoch in range(epochs):

            model.train()
            train_loss = 0.0
            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            
            for batch_d, batch_lbs in progress_bar:
                optimizer.zero_grad()
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                progress_bar.set_postfix({'train_loss': f'{train_loss/(progress_bar.n+1):.4f}'})


            train_loss /= len(train_loader)

            # Combined validation loop
            model.eval()
            val_loss = 0.0
            val_pred = []
            with torch.no_grad():
                for batch_d, batch_lbs in val_loader:
                    input_values = batch_d.to(device)
                    batch_lbs = batch_lbs.to(device)
                    
                    outputs = model(input_values)
                    loss = correlation_coefficient_loss(outputs, batch_lbs)
                    val_loss += loss.item()
                    val_pred.extend(outputs.cpu().numpy())

            val_loss /= len(val_loader)

            # Calculate validation metrics
            val_pred = np.array(val_pred).reshape(val_timesteps.shape)
            val_ground_truth = _get_ground_truth_labels([all_dict[i] for i in val_index], all_labels)
            val_pred_flat = unsplit_data_ogsize(val_pred, window_size, step_sequence, 25, val_ground_truth.shape[-1])
            val_prc_coef = _calculate_flattened_accuracy(val_pred_flat, val_ground_truth)
            
            # Accumulate metrics for this fold and epoch
            train_acc.append(1- train_loss)
            val_acc.append(1- val_loss)

            # Log metrics
            writer.add_scalar(f"Loss/train_fold_{fold + 1}", train_loss, epoch)
            writer.add_scalar(f"Loss/val_fold_{fold + 1}", val_loss, epoch)
            writer.add_scalar(f"Pearson/val_fold_{fold + 1}", val_prc_coef, epoch)

            print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Pearson: {val_prc_coef:.4f}")

            # Check if validation loss improved
            if val_loss < best_val_loss:
                print(f"Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving best model...")
                best_val_loss = val_loss
                best_val_loss_flat = val_prc_coef
                early_stopping_counter = 0

                # Save the best model
                torch.save(model.state_dict(), best_model_path)
            else:
                early_stopping_counter += 1
                print(f"Validation loss did not improve for {early_stopping_counter} epochs.")
                #model.load_state_dict(torch.load(best_model_path))


            # Early stopping
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping triggered at epoch {epoch + 1}. Loading best model.")
                # Load the best model's weights
                model.load_state_dict(torch.load(best_model_path))
                break

        test_pred = []
        test_loss = 0.0
        with torch.no_grad():
            for batch_d, batch_lbs in test_loader:
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                test_loss += loss.item()
                test_pred.extend(outputs.cpu().numpy())

        test_loss /= len(test_loader)
        test_pred = np.array(test_pred).reshape(prepared_test_labels_timesteps.shape)
        test_ground_truth = _get_ground_truth_labels(list(test_dict.values()), test_labels)
        test_pred_flat = unsplit_data_ogsize(test_pred, window_size, step_sequence, 25, test_ground_truth.shape[-1])
        test_prc_coef = _calculate_flattened_accuracy(test_pred_flat, test_ground_truth)

        print(f"Fold {fold + 1}:")
        print(f"  Validation Pearson Coefficient  acc: {1- best_val_loss}")
        print(f"  Validation Pearson Coefficient flat acc: {best_val_loss_flat}")
        print(f"  Test acc: {1- test_loss}")
        print(f"  Test Pearson Coefficient acc(flattened): {test_prc_coef}")

        fold_metrics.append({
            'Fold': fold + 1,
            'val_prc_acc': 1- best_val_loss,
            'val_prc_acc_flat': best_val_loss_flat,
            'test_acc': 1- test_loss,
            'test_prc_flat': test_prc_coef
        })


                # Log fold-specific metrics as tables
        fold_table = f"| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                     f"|------|-----------------|------------------|----------|-------------------|\n" \
                     f"| {fold + 1} | {1 - val_loss:.4f} | {val_prc_coef:.4f} | {1 - test_loss:.4f} | {test_prc_coef:.4f} |\n"
        writer.add_text(f"Fold_{fold + 1}_Metrics", fold_table)
        # Accumulate fold metrics across all folds
        train_acc_epoch.append(train_acc)
        val_acc_epoch.append(train_acc)


    
        # After all folds, compute and log the average metrics per epoch across all folds
    for epoch in range(epochs):
        avg_train_loss = np.mean([fold_losses[epoch] for fold_losses in train_acc_epoch if len(fold_losses) > epoch])
        avg_val_loss = np.mean([fold_losses[epoch] for fold_losses in val_acc_epoch if len(fold_losses) > epoch])

        # Log the averaged metrics for the epoch across all folds
        writer.add_scalar("Average_acc/train", avg_train_loss, epoch)
        writer.add_scalar("Average_acc/val", avg_val_loss, epoch)
            

    # Calculate average metrics
    avg_metrics = {key: np.mean([fold[key] for fold in fold_metrics if key != 'Fold']) for key in fold_metrics[0].keys() if key != 'Fold'}
        # Log the final average table
    avg_table = "| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                "|------|-----------------|------------------|----------|-------------------|\n" \
                f"| Average | {avg_metrics['val_prc_acc']:.4f} | {avg_metrics['val_prc_acc_flat']:.4f} | {avg_metrics['test_acc']:.4f} | {avg_metrics['test_prc_flat']:.4f} |\n"
    writer.add_text("Average_Metrics", avg_table)
    # Add average metrics to results
    avg_metrics['Fold'] = 'Average'
    fold_metrics.append(avg_metrics)

    # save averga date to CSV
    results_df = pd.DataFrame(fold_metrics)
    csv_path = os.path.join(run_dir, 'fold_results.csv')
    results_df.to_csv(csv_path, index=False)
    
    # Save fold indices CSV
    fold_indices_df.to_csv(os.path.join(run_dir, 'fold_indices.csv'), index=False)
    
    writer.close()


    print("\nTraining completed.")
    print("Average metrics across all folds:")
    for key, value in avg_metrics.items():
        print(f"  {key}: {value}")


if __name__ == "__main__":
    ## Path to data
    #path = "/home/glenn/Downloads/"
    path = "../DATA/"


    # Model parameters
    model_config = {
        "VRBModel": {
            "model" : VRBModel,
            "model_name": "facebook/hubert-large-ll60k",
            "hidden_units": 64,
            "n_gru": 3,
            "output_size": None  # Will be set dynamically
        },
        "Wav2Vec2ConvLSTMModel": {
            "model" : Wav2Vec2ConvLSTMModel,
            "model_name": "facebook/wav2vec2-base",
            "hidden_units": 128,
            "n_lstm": 2,
            "output_size": None  # Will be set dynamically
        },
            "RespBertCNNModelV2": {
            'model' : RespBertCNNModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "output_size": None  
        },
            "RespBertLSTMModelV2": {
            'model': RespBertLSTMModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "n_lstm": 3,
            "output_size": None  
        }
            ,
            "RespBertLSTMCNNTransformerModel": {
            'model': RespBertLSTMCNNTransformerModel,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 1024,
            "n_lstm": 2,
            "output_size": None  
        }
    }
    

    
    # Train and data parameters
    epochs = 60
    batch_size = 10
    window_size = 30
    step_size = 6
    data_parts = 4 # aka folds
    early_stopping_patience = 10
    
    config = model_config["RespBertLSTMCNNTransformerModel"]
    

    ## same wav2vec2 base model and pipeline used in the paper
    #processor = AutoProcessor.from_pretrained(config["model_name"])
    processor = Wav2Vec2FeatureExtractor.from_pretrained(config["model_name"])

    train(
        path_to_data=path+"ComParE2020_Breathing/wav/",
        path_to_labels=path+"ComParE2020_Breathing/lab/",
        window_size=window_size,
        batch_size=batch_size,
        config = config,
        step_size=step_size,
        data_parts= data_parts ,
        early_stopping_patience= early_stopping_patience,
        epochs= epochs,
        processor = processor
    )

Fold 1/4
(864, 480000)
750


Epoch 1/60:   0%|          | 0/87 [00:02<?, ?it/s]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])





AssertionError: was expecting embedding dimension of 3072, but got 10

In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from datetime import datetime
#from torch.utils.tensorboard import SummaryWriter
import torchaudio
from typing import List, Tuple
from pt_utils import *
from pt_dataset import *
from pt_models import *
from pt_utils import *
from tensorboardX import SummaryWriter
from transformers import get_cosine_schedule_with_warmup


def create_run_directory():
    base_dir = "pt_runs_SOTA"
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir

def _calculate_flattened_accuracy(average, ground_truth_labels):
    s_acc = 0
    for b in range(len(ground_truth_labels)):
        s, _ = scipy.stats.pearsonr(average[b], ground_truth_labels[b])
        s_acc += s
    return s_acc / len(ground_truth_labels)

def _choose_real_labs_only_with_filenames(labels, filenames):
    return labels[labels['filename'].isin(filenames)]

def _get_ground_truth_labels(ground_truth_names, labels):
    ground_truth_labels = []
    for batch_name in ground_truth_names:
        ground_truth_label = _choose_real_labs_only_with_filenames(labels, [batch_name])
        ground_truth_labels.append(ground_truth_label)
    return np.array(ground_truth_labels)[:, :, -1].astype(np.float32)

def train(path_to_data, path_to_labels, window_size=16, step_size=6, data_parts=4, epochs=100, batch_size=10, early_stopping_patience=20, config = None, processor = None):
    run_dir = create_run_directory()
    log_dir = os.path.join(run_dir, "logs")
    os.makedirs(log_dir, exist_ok=True)

    # Parameters
    length_sequence = window_size 
    step_sequence = step_size

    # Load and prepare data
    train_data, train_labels, train_dict, frame_rate = load_data(path_to_data, path_to_labels, 'train')
    devel_data, devel_labels, devel_dict, frame_rate = load_data(path_to_data, path_to_labels, 'devel')
    test_data, test_labels, test_dict, frame_rate = load_data(path_to_data, path_to_labels, 'test')

    # Combine train and devel data
    all_data = np.concatenate((train_data, devel_data), axis=0)
    all_labels = pd.concat([train_labels, devel_labels])
    all_dict = np.concatenate((list(train_dict.values()), list(devel_dict.values())), axis=0)

    # Prepare data
    prepared_data, prepared_labels, prepared_labels_timesteps = prepare_data(all_data, all_labels, all_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)
    prepared_test_data, prepared_test_labels, prepared_test_labels_timesteps = prepare_data(test_data, test_labels, test_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)

    # Create CSV file for storing fold indices
    fold_indices_df = pd.DataFrame(columns=['Fold', 'Train_Indices', 'Val_Indices'])

    # Cross-validation
    kf = KFold(n_splits=data_parts)
    fold_metrics = []
    # To accumulate metrics across folds for each epoch
    train_acc_epoch = []
    val_acc_epoch = []
    test_acc_epoch = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    config["output_size"] = prepared_labels.shape[-1]
    writer = SummaryWriter(log_dir=os.path.join(log_dir,config["model_name"]))

    for fold, (train_index, val_index) in enumerate(kf.split(prepared_data)):
        print(f"Fold {fold + 1}/{data_parts}")
        best_model_path = f"{run_dir}/best_model_fold{fold+1}"
        # Save fold indices
        fold_indices_df = fold_indices_df._append({
            'Fold': fold + 1,
            'Train_Indices': train_index.tolist(),
            'Val_Indices': val_index.tolist()
        }, ignore_index=True)

        # Split data
        train_d, val_d = prepared_data[train_index], prepared_data[val_index]
        train_lbs, val_lbs = prepared_labels[train_index], prepared_labels[val_index]
        train_timesteps, val_timesteps = prepared_labels_timesteps[train_index], prepared_labels_timesteps[val_index]
        
        # Reshape data
        train_d, train_lbs = reshaping_data_for_model(train_d, train_lbs)
        val_d, val_lbs = reshaping_data_for_model(val_d, val_lbs)
        test_d, test_lbs = reshaping_data_for_model(prepared_test_data, prepared_test_labels)
        
        print(train_d.shape)

        # Create datasets
        #train_dataset = BreathingDataset(train_d, train_lbs, processor, window_size, step_sequence, augment=True)
        train_dataset = BreathingDataset(train_d, train_lbs, processor,window_size, step_sequence)
        val_dataset = BreathingDataset(val_d, val_lbs, processor, window_size, step_sequence)
        test_dataset = BreathingDataset(test_d, test_lbs, processor, window_size, step_sequence)

        # Create DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size,num_workers=2, shuffle=True, collate_fn=train_dataset.collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, collate_fn=val_dataset.collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, collate_fn=test_dataset.collate_fn)
        print(config["output_size"])
        # Create and initialize model
        model = config["model"](config).to(device)
        
        #### training optimiser parameters fror apple
        #learning_rate = 0.005 
        #optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        #### training optimiser parameters fror harma_2023 VRB model 
        learning_rate = 0.01 # From the paper
        optimizer = optim.Adam(model.parameters())       
        
        best_val_loss = float('inf')
        best_val_loss_flat = float('inf')
        early_stopping_counter = 0
        # To accumulate metrics across folds for each epoch
        train_acc = []
        val_acc = []
        test_acc = []
        for epoch in range(epochs):

            model.train()
            train_loss = 0.0
            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            
            for batch_d, batch_lbs in progress_bar:
                optimizer.zero_grad()
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                progress_bar.set_postfix({'train_loss': f'{train_loss/(progress_bar.n+1):.4f}'})


            train_loss /= len(train_loader)

            # Combined validation loop
            model.eval()
            val_loss = 0.0
            val_pred = []
            with torch.no_grad():
                for batch_d, batch_lbs in val_loader:
                    input_values = batch_d.to(device)
                    batch_lbs = batch_lbs.to(device)
                    
                    outputs = model(input_values)
                    loss = correlation_coefficient_loss(outputs, batch_lbs)
                    val_loss += loss.item()
                    val_pred.extend(outputs.cpu().numpy())

            val_loss /= len(val_loader)

            # Calculate validation metrics
            val_pred = np.array(val_pred).reshape(val_timesteps.shape)
            val_ground_truth = _get_ground_truth_labels([all_dict[i] for i in val_index], all_labels)
            val_pred_flat = unsplit_data_ogsize(val_pred, window_size, step_sequence, 25, val_ground_truth.shape[-1])
            val_prc_coef = _calculate_flattened_accuracy(val_pred_flat, val_ground_truth)
            
            # Accumulate metrics for this fold and epoch
            train_acc.append(1- train_loss)
            val_acc.append(1- val_loss)

            # Log metrics
            writer.add_scalar(f"Loss/train_fold_{fold + 1}", train_loss, epoch)
            writer.add_scalar(f"Loss/val_fold_{fold + 1}", val_loss, epoch)
            writer.add_scalar(f"Pearson/val_fold_{fold + 1}", val_prc_coef, epoch)

            print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Pearson: {val_prc_coef:.4f}")

            # Check if validation loss improved
            if val_loss < best_val_loss:
                print(f"Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving best model...")
                best_val_loss = val_loss
                best_val_loss_flat = val_prc_coef
                early_stopping_counter = 0

                # Save the best model
                torch.save(model.state_dict(), best_model_path)
            else:
                early_stopping_counter += 1
                print(f"Validation loss did not improve for {early_stopping_counter} epochs.")
                #model.load_state_dict(torch.load(best_model_path))


            # Early stopping
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping triggered at epoch {epoch + 1}. Loading best model.")
                # Load the best model's weights
                model.load_state_dict(torch.load(best_model_path))
                break

        test_pred = []
        test_loss = 0.0
        with torch.no_grad():
            for batch_d, batch_lbs in test_loader:
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                test_loss += loss.item()
                test_pred.extend(outputs.cpu().numpy())

        test_loss /= len(test_loader)
        test_pred = np.array(test_pred).reshape(prepared_test_labels_timesteps.shape)
        test_ground_truth = _get_ground_truth_labels(list(test_dict.values()), test_labels)
        test_pred_flat = unsplit_data_ogsize(test_pred, window_size, step_sequence, 25, test_ground_truth.shape[-1])
        test_prc_coef = _calculate_flattened_accuracy(test_pred_flat, test_ground_truth)

        print(f"Fold {fold + 1}:")
        print(f"  Validation Pearson Coefficient  acc: {1- best_val_loss}")
        print(f"  Validation Pearson Coefficient flat acc: {best_val_loss_flat}")
        print(f"  Test acc: {1- test_loss}")
        print(f"  Test Pearson Coefficient acc(flattened): {test_prc_coef}")

        fold_metrics.append({
            'Fold': fold + 1,
            'val_prc_acc': 1- best_val_loss,
            'val_prc_acc_flat': best_val_loss_flat,
            'test_acc': 1- test_loss,
            'test_prc_flat': test_prc_coef
        })


                # Log fold-specific metrics as tables
        fold_table = f"| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                     f"|------|-----------------|------------------|----------|-------------------|\n" \
                     f"| {fold + 1} | {1 - val_loss:.4f} | {val_prc_coef:.4f} | {1 - test_loss:.4f} | {test_prc_coef:.4f} |\n"
        writer.add_text(f"Fold_{fold + 1}_Metrics", fold_table)
        # Accumulate fold metrics across all folds
        train_acc_epoch.append(train_acc)
        val_acc_epoch.append(train_acc)


    
        # After all folds, compute and log the average metrics per epoch across all folds
    for epoch in range(epochs):
        avg_train_loss = np.mean([fold_losses[epoch] for fold_losses in train_acc_epoch if len(fold_losses) > epoch])
        avg_val_loss = np.mean([fold_losses[epoch] for fold_losses in val_acc_epoch if len(fold_losses) > epoch])

        # Log the averaged metrics for the epoch across all folds
        writer.add_scalar("Average_acc/train", avg_train_loss, epoch)
        writer.add_scalar("Average_acc/val", avg_val_loss, epoch)
            

    # Calculate average metrics
    avg_metrics = {key: np.mean([fold[key] for fold in fold_metrics if key != 'Fold']) for key in fold_metrics[0].keys() if key != 'Fold'}
        # Log the final average table
    avg_table = "| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                "|------|-----------------|------------------|----------|-------------------|\n" \
                f"| Average | {avg_metrics['val_prc_acc']:.4f} | {avg_metrics['val_prc_acc_flat']:.4f} | {avg_metrics['test_acc']:.4f} | {avg_metrics['test_prc_flat']:.4f} |\n"
    writer.add_text("Average_Metrics", avg_table)
    # Add average metrics to results
    avg_metrics['Fold'] = 'Average'
    fold_metrics.append(avg_metrics)

    # save averga date to CSV
    results_df = pd.DataFrame(fold_metrics)
    csv_path = os.path.join(run_dir, 'fold_results.csv')
    results_df.to_csv(csv_path, index=False)
    
    # Save fold indices CSV
    fold_indices_df.to_csv(os.path.join(run_dir, 'fold_indices.csv'), index=False)
    
    writer.close()


    print("\nTraining completed.")
    print("Average metrics across all folds:")
    for key, value in avg_metrics.items():
        print(f"  {key}: {value}")


if __name__ == "__main__":
    ## Path to data
    #path = "/home/glenn/Downloads/"
    path = "../DATA/"


    # Model parameters
    model_config = {
        "VRBModel": {
            "model" : VRBModel,
            "model_name": "facebook/hubert-large-ll60k",
            "hidden_units": 64,
            "n_gru": 3,
            "output_size": None  # Will be set dynamically
        },
        "Wav2Vec2ConvLSTMModel": {
            "model" : Wav2Vec2ConvLSTMModel,
            "model_name": "facebook/wav2vec2-base",
            "hidden_units": 128,
            "n_lstm": 2,
            "output_size": None  # Will be set dynamically
        },
            "RespBertCNNModelV2": {
            'model' : RespBertCNNModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "output_size": None  
        },
            "RespBertLSTMModelV2": {
            'model': RespBertLSTMModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "n_lstm": 3,
            "output_size": None  
        }
            ,
            "RespBertLSTMCNNTransformerModel": {
            'model': RespBertLSTMCNNTransformerModel,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 1024,
            "n_lstm": 2,
            "output_size": None  
        }
    }
    

    
    # Train and data parameters
    epochs = 60
    batch_size = 10
    window_size = 30
    step_size = 6
    data_parts = 4 # aka folds
    early_stopping_patience = 10
    
    config = model_config["RespBertLSTMCNNTransformerModel"]
    

    ## same wav2vec2 base model and pipeline used in the paper
    #processor = AutoProcessor.from_pretrained(config["model_name"])
    processor = Wav2Vec2FeatureExtractor.from_pretrained(config["model_name"])

    train(
        path_to_data=path+"ComParE2020_Breathing/wav/",
        path_to_labels=path+"ComParE2020_Breathing/lab/",
        window_size=window_size,
        batch_size=batch_size,
        config = config,
        step_size=step_size,
        data_parts= data_parts ,
        early_stopping_patience= early_stopping_patience,
        epochs= epochs,
        processor = processor
    )

Fold 1/4
(864, 480000)
750


Epoch 1/60:   0%|          | 0/87 [00:01<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 1.35 GiB. GPU 0 has a total capacity of 93.12 GiB of which 98.75 MiB is free. Including non-PyTorch memory, this process has 93.02 GiB memory in use. Of the allocated memory 91.01 GiB is allocated by PyTorch, and 1.34 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from datetime import datetime
#from torch.utils.tensorboard import SummaryWriter
import torchaudio
from typing import List, Tuple
from pt_utils import *
from pt_dataset import *
from pt_models import *
from pt_utils import *
from tensorboardX import SummaryWriter
from transformers import get_cosine_schedule_with_warmup


def create_run_directory():
    base_dir = "pt_runs_SOTA"
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir

def _calculate_flattened_accuracy(average, ground_truth_labels):
    s_acc = 0
    for b in range(len(ground_truth_labels)):
        s, _ = scipy.stats.pearsonr(average[b], ground_truth_labels[b])
        s_acc += s
    return s_acc / len(ground_truth_labels)

def _choose_real_labs_only_with_filenames(labels, filenames):
    return labels[labels['filename'].isin(filenames)]

def _get_ground_truth_labels(ground_truth_names, labels):
    ground_truth_labels = []
    for batch_name in ground_truth_names:
        ground_truth_label = _choose_real_labs_only_with_filenames(labels, [batch_name])
        ground_truth_labels.append(ground_truth_label)
    return np.array(ground_truth_labels)[:, :, -1].astype(np.float32)

def train(path_to_data, path_to_labels, window_size=16, step_size=6, data_parts=4, epochs=100, batch_size=10, early_stopping_patience=20, config = None, processor = None):
    run_dir = create_run_directory()
    log_dir = os.path.join(run_dir, "logs")
    os.makedirs(log_dir, exist_ok=True)

    # Parameters
    length_sequence = window_size 
    step_sequence = step_size

    # Load and prepare data
    train_data, train_labels, train_dict, frame_rate = load_data(path_to_data, path_to_labels, 'train')
    devel_data, devel_labels, devel_dict, frame_rate = load_data(path_to_data, path_to_labels, 'devel')
    test_data, test_labels, test_dict, frame_rate = load_data(path_to_data, path_to_labels, 'test')

    # Combine train and devel data
    all_data = np.concatenate((train_data, devel_data), axis=0)
    all_labels = pd.concat([train_labels, devel_labels])
    all_dict = np.concatenate((list(train_dict.values()), list(devel_dict.values())), axis=0)

    # Prepare data
    prepared_data, prepared_labels, prepared_labels_timesteps = prepare_data(all_data, all_labels, all_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)
    prepared_test_data, prepared_test_labels, prepared_test_labels_timesteps = prepare_data(test_data, test_labels, test_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)

    # Create CSV file for storing fold indices
    fold_indices_df = pd.DataFrame(columns=['Fold', 'Train_Indices', 'Val_Indices'])

    # Cross-validation
    kf = KFold(n_splits=data_parts)
    fold_metrics = []
    # To accumulate metrics across folds for each epoch
    train_acc_epoch = []
    val_acc_epoch = []
    test_acc_epoch = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    config["output_size"] = prepared_labels.shape[-1]
    writer = SummaryWriter(log_dir=os.path.join(log_dir,config["model_name"]))

    for fold, (train_index, val_index) in enumerate(kf.split(prepared_data)):
        print(f"Fold {fold + 1}/{data_parts}")
        best_model_path = f"{run_dir}/best_model_fold{fold+1}"
        # Save fold indices
        fold_indices_df = fold_indices_df._append({
            'Fold': fold + 1,
            'Train_Indices': train_index.tolist(),
            'Val_Indices': val_index.tolist()
        }, ignore_index=True)

        # Split data
        train_d, val_d = prepared_data[train_index], prepared_data[val_index]
        train_lbs, val_lbs = prepared_labels[train_index], prepared_labels[val_index]
        train_timesteps, val_timesteps = prepared_labels_timesteps[train_index], prepared_labels_timesteps[val_index]
        
        # Reshape data
        train_d, train_lbs = reshaping_data_for_model(train_d, train_lbs)
        val_d, val_lbs = reshaping_data_for_model(val_d, val_lbs)
        test_d, test_lbs = reshaping_data_for_model(prepared_test_data, prepared_test_labels)
        
        print(train_d.shape)

        # Create datasets
        #train_dataset = BreathingDataset(train_d, train_lbs, processor, window_size, step_sequence, augment=True)
        train_dataset = BreathingDataset(train_d, train_lbs, processor,window_size, step_sequence)
        val_dataset = BreathingDataset(val_d, val_lbs, processor, window_size, step_sequence)
        test_dataset = BreathingDataset(test_d, test_lbs, processor, window_size, step_sequence)

        # Create DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size,num_workers=2, shuffle=True, collate_fn=train_dataset.collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, collate_fn=val_dataset.collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, collate_fn=test_dataset.collate_fn)
        print(config["output_size"])
        # Create and initialize model
        model = config["model"](config).to(device)
        
        #### training optimiser parameters fror apple
        #learning_rate = 0.005 
        #optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        #### training optimiser parameters fror harma_2023 VRB model 
        learning_rate = 0.01 # From the paper
        optimizer = optim.Adam(model.parameters())       
        
        best_val_loss = float('inf')
        best_val_loss_flat = float('inf')
        early_stopping_counter = 0
        # To accumulate metrics across folds for each epoch
        train_acc = []
        val_acc = []
        test_acc = []
        for epoch in range(epochs):

            model.train()
            train_loss = 0.0
            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            
            for batch_d, batch_lbs in progress_bar:
                optimizer.zero_grad()
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                progress_bar.set_postfix({'train_loss': f'{train_loss/(progress_bar.n+1):.4f}'})


            train_loss /= len(train_loader)

            # Combined validation loop
            model.eval()
            val_loss = 0.0
            val_pred = []
            with torch.no_grad():
                for batch_d, batch_lbs in val_loader:
                    input_values = batch_d.to(device)
                    batch_lbs = batch_lbs.to(device)
                    
                    outputs = model(input_values)
                    loss = correlation_coefficient_loss(outputs, batch_lbs)
                    val_loss += loss.item()
                    val_pred.extend(outputs.cpu().numpy())

            val_loss /= len(val_loader)

            # Calculate validation metrics
            val_pred = np.array(val_pred).reshape(val_timesteps.shape)
            val_ground_truth = _get_ground_truth_labels([all_dict[i] for i in val_index], all_labels)
            val_pred_flat = unsplit_data_ogsize(val_pred, window_size, step_sequence, 25, val_ground_truth.shape[-1])
            val_prc_coef = _calculate_flattened_accuracy(val_pred_flat, val_ground_truth)
            
            # Accumulate metrics for this fold and epoch
            train_acc.append(1- train_loss)
            val_acc.append(1- val_loss)

            # Log metrics
            writer.add_scalar(f"Loss/train_fold_{fold + 1}", train_loss, epoch)
            writer.add_scalar(f"Loss/val_fold_{fold + 1}", val_loss, epoch)
            writer.add_scalar(f"Pearson/val_fold_{fold + 1}", val_prc_coef, epoch)

            print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Pearson: {val_prc_coef:.4f}")

            # Check if validation loss improved
            if val_loss < best_val_loss:
                print(f"Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving best model...")
                best_val_loss = val_loss
                best_val_loss_flat = val_prc_coef
                early_stopping_counter = 0

                # Save the best model
                torch.save(model.state_dict(), best_model_path)
            else:
                early_stopping_counter += 1
                print(f"Validation loss did not improve for {early_stopping_counter} epochs.")
                #model.load_state_dict(torch.load(best_model_path))


            # Early stopping
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping triggered at epoch {epoch + 1}. Loading best model.")
                # Load the best model's weights
                model.load_state_dict(torch.load(best_model_path))
                break

        test_pred = []
        test_loss = 0.0
        with torch.no_grad():
            for batch_d, batch_lbs in test_loader:
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                test_loss += loss.item()
                test_pred.extend(outputs.cpu().numpy())

        test_loss /= len(test_loader)
        test_pred = np.array(test_pred).reshape(prepared_test_labels_timesteps.shape)
        test_ground_truth = _get_ground_truth_labels(list(test_dict.values()), test_labels)
        test_pred_flat = unsplit_data_ogsize(test_pred, window_size, step_sequence, 25, test_ground_truth.shape[-1])
        test_prc_coef = _calculate_flattened_accuracy(test_pred_flat, test_ground_truth)

        print(f"Fold {fold + 1}:")
        print(f"  Validation Pearson Coefficient  acc: {1- best_val_loss}")
        print(f"  Validation Pearson Coefficient flat acc: {best_val_loss_flat}")
        print(f"  Test acc: {1- test_loss}")
        print(f"  Test Pearson Coefficient acc(flattened): {test_prc_coef}")

        fold_metrics.append({
            'Fold': fold + 1,
            'val_prc_acc': 1- best_val_loss,
            'val_prc_acc_flat': best_val_loss_flat,
            'test_acc': 1- test_loss,
            'test_prc_flat': test_prc_coef
        })


                # Log fold-specific metrics as tables
        fold_table = f"| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                     f"|------|-----------------|------------------|----------|-------------------|\n" \
                     f"| {fold + 1} | {1 - val_loss:.4f} | {val_prc_coef:.4f} | {1 - test_loss:.4f} | {test_prc_coef:.4f} |\n"
        writer.add_text(f"Fold_{fold + 1}_Metrics", fold_table)
        # Accumulate fold metrics across all folds
        train_acc_epoch.append(train_acc)
        val_acc_epoch.append(train_acc)


    
        # After all folds, compute and log the average metrics per epoch across all folds
    for epoch in range(epochs):
        avg_train_loss = np.mean([fold_losses[epoch] for fold_losses in train_acc_epoch if len(fold_losses) > epoch])
        avg_val_loss = np.mean([fold_losses[epoch] for fold_losses in val_acc_epoch if len(fold_losses) > epoch])

        # Log the averaged metrics for the epoch across all folds
        writer.add_scalar("Average_acc/train", avg_train_loss, epoch)
        writer.add_scalar("Average_acc/val", avg_val_loss, epoch)
            

    # Calculate average metrics
    avg_metrics = {key: np.mean([fold[key] for fold in fold_metrics if key != 'Fold']) for key in fold_metrics[0].keys() if key != 'Fold'}
        # Log the final average table
    avg_table = "| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                "|------|-----------------|------------------|----------|-------------------|\n" \
                f"| Average | {avg_metrics['val_prc_acc']:.4f} | {avg_metrics['val_prc_acc_flat']:.4f} | {avg_metrics['test_acc']:.4f} | {avg_metrics['test_prc_flat']:.4f} |\n"
    writer.add_text("Average_Metrics", avg_table)
    # Add average metrics to results
    avg_metrics['Fold'] = 'Average'
    fold_metrics.append(avg_metrics)

    # save averga date to CSV
    results_df = pd.DataFrame(fold_metrics)
    csv_path = os.path.join(run_dir, 'fold_results.csv')
    results_df.to_csv(csv_path, index=False)
    
    # Save fold indices CSV
    fold_indices_df.to_csv(os.path.join(run_dir, 'fold_indices.csv'), index=False)
    
    writer.close()


    print("\nTraining completed.")
    print("Average metrics across all folds:")
    for key, value in avg_metrics.items():
        print(f"  {key}: {value}")


if __name__ == "__main__":
    ## Path to data
    #path = "/home/glenn/Downloads/"
    path = "../DATA/"


    # Model parameters
    model_config = {
        "VRBModel": {
            "model" : VRBModel,
            "model_name": "facebook/hubert-large-ll60k",
            "hidden_units": 64,
            "n_gru": 3,
            "output_size": None  # Will be set dynamically
        },
        "Wav2Vec2ConvLSTMModel": {
            "model" : Wav2Vec2ConvLSTMModel,
            "model_name": "facebook/wav2vec2-base",
            "hidden_units": 128,
            "n_lstm": 2,
            "output_size": None  # Will be set dynamically
        },
            "RespBertCNNModelV2": {
            'model' : RespBertCNNModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "output_size": None  
        },
            "RespBertLSTMModelV2": {
            'model': RespBertLSTMModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "n_lstm": 3,
            "output_size": None  
        }
            ,
            "RespBertLSTMCNNTransformerModel": {
            'model': RespBertLSTMCNNTransformerModel,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 1024,
            "n_lstm": 2,
            "output_size": None  
        }
    }
    

    
    # Train and data parameters
    epochs = 60
    batch_size = 10
    window_size = 30
    step_size = 6
    data_parts = 4 # aka folds
    early_stopping_patience = 10
    
    config = model_config["RespBertLSTMCNNTransformerModel"]
    

    ## same wav2vec2 base model and pipeline used in the paper
    #processor = AutoProcessor.from_pretrained(config["model_name"])
    processor = Wav2Vec2FeatureExtractor.from_pretrained(config["model_name"])

    train(
        path_to_data=path+"ComParE2020_Breathing/wav/",
        path_to_labels=path+"ComParE2020_Breathing/lab/",
        window_size=window_size,
        batch_size=batch_size,
        config = config,
        step_size=step_size,
        data_parts= data_parts ,
        early_stopping_patience= early_stopping_patience,
        epochs= epochs,
        processor = processor
    )

Fold 1/4
(864, 480000)
750


Epoch 1/60:   0%|          | 0/87 [00:02<?, ?it/s]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([10, 1499, 3072])





AssertionError: was expecting embedding dimension of 3072, but got 1499

In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from datetime import datetime
#from torch.utils.tensorboard import SummaryWriter
import torchaudio
from typing import List, Tuple
from pt_utils import *
from pt_dataset import *
from pt_models import *
from pt_utils import *
from tensorboardX import SummaryWriter
from transformers import get_cosine_schedule_with_warmup


def create_run_directory():
    base_dir = "pt_runs_SOTA"
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir

def _calculate_flattened_accuracy(average, ground_truth_labels):
    s_acc = 0
    for b in range(len(ground_truth_labels)):
        s, _ = scipy.stats.pearsonr(average[b], ground_truth_labels[b])
        s_acc += s
    return s_acc / len(ground_truth_labels)

def _choose_real_labs_only_with_filenames(labels, filenames):
    return labels[labels['filename'].isin(filenames)]

def _get_ground_truth_labels(ground_truth_names, labels):
    ground_truth_labels = []
    for batch_name in ground_truth_names:
        ground_truth_label = _choose_real_labs_only_with_filenames(labels, [batch_name])
        ground_truth_labels.append(ground_truth_label)
    return np.array(ground_truth_labels)[:, :, -1].astype(np.float32)

def train(path_to_data, path_to_labels, window_size=16, step_size=6, data_parts=4, epochs=100, batch_size=10, early_stopping_patience=20, config = None, processor = None):
    run_dir = create_run_directory()
    log_dir = os.path.join(run_dir, "logs")
    os.makedirs(log_dir, exist_ok=True)

    # Parameters
    length_sequence = window_size 
    step_sequence = step_size

    # Load and prepare data
    train_data, train_labels, train_dict, frame_rate = load_data(path_to_data, path_to_labels, 'train')
    devel_data, devel_labels, devel_dict, frame_rate = load_data(path_to_data, path_to_labels, 'devel')
    test_data, test_labels, test_dict, frame_rate = load_data(path_to_data, path_to_labels, 'test')

    # Combine train and devel data
    all_data = np.concatenate((train_data, devel_data), axis=0)
    all_labels = pd.concat([train_labels, devel_labels])
    all_dict = np.concatenate((list(train_dict.values()), list(devel_dict.values())), axis=0)

    # Prepare data
    prepared_data, prepared_labels, prepared_labels_timesteps = prepare_data(all_data, all_labels, all_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)
    prepared_test_data, prepared_test_labels, prepared_test_labels_timesteps = prepare_data(test_data, test_labels, test_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)

    # Create CSV file for storing fold indices
    fold_indices_df = pd.DataFrame(columns=['Fold', 'Train_Indices', 'Val_Indices'])

    # Cross-validation
    kf = KFold(n_splits=data_parts)
    fold_metrics = []
    # To accumulate metrics across folds for each epoch
    train_acc_epoch = []
    val_acc_epoch = []
    test_acc_epoch = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    config["output_size"] = prepared_labels.shape[-1]
    writer = SummaryWriter(log_dir=os.path.join(log_dir,config["model_name"]))

    for fold, (train_index, val_index) in enumerate(kf.split(prepared_data)):
        print(f"Fold {fold + 1}/{data_parts}")
        best_model_path = f"{run_dir}/best_model_fold{fold+1}"
        # Save fold indices
        fold_indices_df = fold_indices_df._append({
            'Fold': fold + 1,
            'Train_Indices': train_index.tolist(),
            'Val_Indices': val_index.tolist()
        }, ignore_index=True)

        # Split data
        train_d, val_d = prepared_data[train_index], prepared_data[val_index]
        train_lbs, val_lbs = prepared_labels[train_index], prepared_labels[val_index]
        train_timesteps, val_timesteps = prepared_labels_timesteps[train_index], prepared_labels_timesteps[val_index]
        
        # Reshape data
        train_d, train_lbs = reshaping_data_for_model(train_d, train_lbs)
        val_d, val_lbs = reshaping_data_for_model(val_d, val_lbs)
        test_d, test_lbs = reshaping_data_for_model(prepared_test_data, prepared_test_labels)
        
        print(train_d.shape)

        # Create datasets
        #train_dataset = BreathingDataset(train_d, train_lbs, processor, window_size, step_sequence, augment=True)
        train_dataset = BreathingDataset(train_d, train_lbs, processor,window_size, step_sequence)
        val_dataset = BreathingDataset(val_d, val_lbs, processor, window_size, step_sequence)
        test_dataset = BreathingDataset(test_d, test_lbs, processor, window_size, step_sequence)

        # Create DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size,num_workers=2, shuffle=True, collate_fn=train_dataset.collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, collate_fn=val_dataset.collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, collate_fn=test_dataset.collate_fn)
        print(config["output_size"])
        # Create and initialize model
        model = config["model"](config).to(device)
        
        #### training optimiser parameters fror apple
        #learning_rate = 0.005 
        #optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        #### training optimiser parameters fror harma_2023 VRB model 
        learning_rate = 0.01 # From the paper
        optimizer = optim.Adam(model.parameters())       
        
        best_val_loss = float('inf')
        best_val_loss_flat = float('inf')
        early_stopping_counter = 0
        # To accumulate metrics across folds for each epoch
        train_acc = []
        val_acc = []
        test_acc = []
        for epoch in range(epochs):

            model.train()
            train_loss = 0.0
            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            
            for batch_d, batch_lbs in progress_bar:
                optimizer.zero_grad()
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                progress_bar.set_postfix({'train_loss': f'{train_loss/(progress_bar.n+1):.4f}'})


            train_loss /= len(train_loader)

            # Combined validation loop
            model.eval()
            val_loss = 0.0
            val_pred = []
            with torch.no_grad():
                for batch_d, batch_lbs in val_loader:
                    input_values = batch_d.to(device)
                    batch_lbs = batch_lbs.to(device)
                    
                    outputs = model(input_values)
                    loss = correlation_coefficient_loss(outputs, batch_lbs)
                    val_loss += loss.item()
                    val_pred.extend(outputs.cpu().numpy())

            val_loss /= len(val_loader)

            # Calculate validation metrics
            val_pred = np.array(val_pred).reshape(val_timesteps.shape)
            val_ground_truth = _get_ground_truth_labels([all_dict[i] for i in val_index], all_labels)
            val_pred_flat = unsplit_data_ogsize(val_pred, window_size, step_sequence, 25, val_ground_truth.shape[-1])
            val_prc_coef = _calculate_flattened_accuracy(val_pred_flat, val_ground_truth)
            
            # Accumulate metrics for this fold and epoch
            train_acc.append(1- train_loss)
            val_acc.append(1- val_loss)

            # Log metrics
            writer.add_scalar(f"Loss/train_fold_{fold + 1}", train_loss, epoch)
            writer.add_scalar(f"Loss/val_fold_{fold + 1}", val_loss, epoch)
            writer.add_scalar(f"Pearson/val_fold_{fold + 1}", val_prc_coef, epoch)

            print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Pearson: {val_prc_coef:.4f}")

            # Check if validation loss improved
            if val_loss < best_val_loss:
                print(f"Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving best model...")
                best_val_loss = val_loss
                best_val_loss_flat = val_prc_coef
                early_stopping_counter = 0

                # Save the best model
                torch.save(model.state_dict(), best_model_path)
            else:
                early_stopping_counter += 1
                print(f"Validation loss did not improve for {early_stopping_counter} epochs.")
                #model.load_state_dict(torch.load(best_model_path))


            # Early stopping
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping triggered at epoch {epoch + 1}. Loading best model.")
                # Load the best model's weights
                model.load_state_dict(torch.load(best_model_path))
                break

        test_pred = []
        test_loss = 0.0
        with torch.no_grad():
            for batch_d, batch_lbs in test_loader:
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                test_loss += loss.item()
                test_pred.extend(outputs.cpu().numpy())

        test_loss /= len(test_loader)
        test_pred = np.array(test_pred).reshape(prepared_test_labels_timesteps.shape)
        test_ground_truth = _get_ground_truth_labels(list(test_dict.values()), test_labels)
        test_pred_flat = unsplit_data_ogsize(test_pred, window_size, step_sequence, 25, test_ground_truth.shape[-1])
        test_prc_coef = _calculate_flattened_accuracy(test_pred_flat, test_ground_truth)

        print(f"Fold {fold + 1}:")
        print(f"  Validation Pearson Coefficient  acc: {1- best_val_loss}")
        print(f"  Validation Pearson Coefficient flat acc: {best_val_loss_flat}")
        print(f"  Test acc: {1- test_loss}")
        print(f"  Test Pearson Coefficient acc(flattened): {test_prc_coef}")

        fold_metrics.append({
            'Fold': fold + 1,
            'val_prc_acc': 1- best_val_loss,
            'val_prc_acc_flat': best_val_loss_flat,
            'test_acc': 1- test_loss,
            'test_prc_flat': test_prc_coef
        })


                # Log fold-specific metrics as tables
        fold_table = f"| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                     f"|------|-----------------|------------------|----------|-------------------|\n" \
                     f"| {fold + 1} | {1 - val_loss:.4f} | {val_prc_coef:.4f} | {1 - test_loss:.4f} | {test_prc_coef:.4f} |\n"
        writer.add_text(f"Fold_{fold + 1}_Metrics", fold_table)
        # Accumulate fold metrics across all folds
        train_acc_epoch.append(train_acc)
        val_acc_epoch.append(train_acc)


    
        # After all folds, compute and log the average metrics per epoch across all folds
    for epoch in range(epochs):
        avg_train_loss = np.mean([fold_losses[epoch] for fold_losses in train_acc_epoch if len(fold_losses) > epoch])
        avg_val_loss = np.mean([fold_losses[epoch] for fold_losses in val_acc_epoch if len(fold_losses) > epoch])

        # Log the averaged metrics for the epoch across all folds
        writer.add_scalar("Average_acc/train", avg_train_loss, epoch)
        writer.add_scalar("Average_acc/val", avg_val_loss, epoch)
            

    # Calculate average metrics
    avg_metrics = {key: np.mean([fold[key] for fold in fold_metrics if key != 'Fold']) for key in fold_metrics[0].keys() if key != 'Fold'}
        # Log the final average table
    avg_table = "| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                "|------|-----------------|------------------|----------|-------------------|\n" \
                f"| Average | {avg_metrics['val_prc_acc']:.4f} | {avg_metrics['val_prc_acc_flat']:.4f} | {avg_metrics['test_acc']:.4f} | {avg_metrics['test_prc_flat']:.4f} |\n"
    writer.add_text("Average_Metrics", avg_table)
    # Add average metrics to results
    avg_metrics['Fold'] = 'Average'
    fold_metrics.append(avg_metrics)

    # save averga date to CSV
    results_df = pd.DataFrame(fold_metrics)
    csv_path = os.path.join(run_dir, 'fold_results.csv')
    results_df.to_csv(csv_path, index=False)
    
    # Save fold indices CSV
    fold_indices_df.to_csv(os.path.join(run_dir, 'fold_indices.csv'), index=False)
    
    writer.close()


    print("\nTraining completed.")
    print("Average metrics across all folds:")
    for key, value in avg_metrics.items():
        print(f"  {key}: {value}")


if __name__ == "__main__":
    ## Path to data
    #path = "/home/glenn/Downloads/"
    path = "../DATA/"


    # Model parameters
    model_config = {
        "VRBModel": {
            "model" : VRBModel,
            "model_name": "facebook/hubert-large-ll60k",
            "hidden_units": 64,
            "n_gru": 3,
            "output_size": None  # Will be set dynamically
        },
        "Wav2Vec2ConvLSTMModel": {
            "model" : Wav2Vec2ConvLSTMModel,
            "model_name": "facebook/wav2vec2-base",
            "hidden_units": 128,
            "n_lstm": 2,
            "output_size": None  # Will be set dynamically
        },
            "RespBertCNNModelV2": {
            'model' : RespBertCNNModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "output_size": None  
        },
            "RespBertLSTMModelV2": {
            'model': RespBertLSTMModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "n_lstm": 3,
            "output_size": None  
        }
            ,
            "RespBertLSTMCNNTransformerModel": {
            'model': RespBertLSTMCNNTransformerModel,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 1024,
            "n_lstm": 2,
            "output_size": None  
        }
    }
    

    
    # Train and data parameters
    epochs = 60
    batch_size = 10
    window_size = 30
    step_size = 6
    data_parts = 4 # aka folds
    early_stopping_patience = 10
    
    config = model_config["RespBertLSTMCNNTransformerModel"]
    

    ## same wav2vec2 base model and pipeline used in the paper
    #processor = AutoProcessor.from_pretrained(config["model_name"])
    processor = Wav2Vec2FeatureExtractor.from_pretrained(config["model_name"])

    train(
        path_to_data=path+"ComParE2020_Breathing/wav/",
        path_to_labels=path+"ComParE2020_Breathing/lab/",
        window_size=window_size,
        batch_size=batch_size,
        config = config,
        step_size=step_size,
        data_parts= data_parts ,
        early_stopping_patience= early_stopping_patience,
        epochs= epochs,
        processor = processor
    )

Fold 1/4
(864, 480000)
750


Epoch 1/60:   0%|          | 0/87 [00:02<?, ?it/s]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([10, 1499, 3072])





TypeError: transpose() received an invalid combination of arguments - got (int, int, int), but expected one of:
 * (int dim0, int dim1)
 * (name dim0, name dim1)


In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from datetime import datetime
#from torch.utils.tensorboard import SummaryWriter
import torchaudio
from typing import List, Tuple
from pt_utils import *
from pt_dataset import *
from pt_models import *
from pt_utils import *
from tensorboardX import SummaryWriter
from transformers import get_cosine_schedule_with_warmup


def create_run_directory():
    base_dir = "pt_runs_SOTA"
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir

def _calculate_flattened_accuracy(average, ground_truth_labels):
    s_acc = 0
    for b in range(len(ground_truth_labels)):
        s, _ = scipy.stats.pearsonr(average[b], ground_truth_labels[b])
        s_acc += s
    return s_acc / len(ground_truth_labels)

def _choose_real_labs_only_with_filenames(labels, filenames):
    return labels[labels['filename'].isin(filenames)]

def _get_ground_truth_labels(ground_truth_names, labels):
    ground_truth_labels = []
    for batch_name in ground_truth_names:
        ground_truth_label = _choose_real_labs_only_with_filenames(labels, [batch_name])
        ground_truth_labels.append(ground_truth_label)
    return np.array(ground_truth_labels)[:, :, -1].astype(np.float32)

def train(path_to_data, path_to_labels, window_size=16, step_size=6, data_parts=4, epochs=100, batch_size=10, early_stopping_patience=20, config = None, processor = None):
    run_dir = create_run_directory()
    log_dir = os.path.join(run_dir, "logs")
    os.makedirs(log_dir, exist_ok=True)

    # Parameters
    length_sequence = window_size 
    step_sequence = step_size

    # Load and prepare data
    train_data, train_labels, train_dict, frame_rate = load_data(path_to_data, path_to_labels, 'train')
    devel_data, devel_labels, devel_dict, frame_rate = load_data(path_to_data, path_to_labels, 'devel')
    test_data, test_labels, test_dict, frame_rate = load_data(path_to_data, path_to_labels, 'test')

    # Combine train and devel data
    all_data = np.concatenate((train_data, devel_data), axis=0)
    all_labels = pd.concat([train_labels, devel_labels])
    all_dict = np.concatenate((list(train_dict.values()), list(devel_dict.values())), axis=0)

    # Prepare data
    prepared_data, prepared_labels, prepared_labels_timesteps = prepare_data(all_data, all_labels, all_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)
    prepared_test_data, prepared_test_labels, prepared_test_labels_timesteps = prepare_data(test_data, test_labels, test_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)

    # Create CSV file for storing fold indices
    fold_indices_df = pd.DataFrame(columns=['Fold', 'Train_Indices', 'Val_Indices'])

    # Cross-validation
    kf = KFold(n_splits=data_parts)
    fold_metrics = []
    # To accumulate metrics across folds for each epoch
    train_acc_epoch = []
    val_acc_epoch = []
    test_acc_epoch = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    config["output_size"] = prepared_labels.shape[-1]
    writer = SummaryWriter(log_dir=os.path.join(log_dir,config["model_name"]))

    for fold, (train_index, val_index) in enumerate(kf.split(prepared_data)):
        print(f"Fold {fold + 1}/{data_parts}")
        best_model_path = f"{run_dir}/best_model_fold{fold+1}"
        # Save fold indices
        fold_indices_df = fold_indices_df._append({
            'Fold': fold + 1,
            'Train_Indices': train_index.tolist(),
            'Val_Indices': val_index.tolist()
        }, ignore_index=True)

        # Split data
        train_d, val_d = prepared_data[train_index], prepared_data[val_index]
        train_lbs, val_lbs = prepared_labels[train_index], prepared_labels[val_index]
        train_timesteps, val_timesteps = prepared_labels_timesteps[train_index], prepared_labels_timesteps[val_index]
        
        # Reshape data
        train_d, train_lbs = reshaping_data_for_model(train_d, train_lbs)
        val_d, val_lbs = reshaping_data_for_model(val_d, val_lbs)
        test_d, test_lbs = reshaping_data_for_model(prepared_test_data, prepared_test_labels)
        
        print(train_d.shape)

        # Create datasets
        #train_dataset = BreathingDataset(train_d, train_lbs, processor, window_size, step_sequence, augment=True)
        train_dataset = BreathingDataset(train_d, train_lbs, processor,window_size, step_sequence)
        val_dataset = BreathingDataset(val_d, val_lbs, processor, window_size, step_sequence)
        test_dataset = BreathingDataset(test_d, test_lbs, processor, window_size, step_sequence)

        # Create DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size,num_workers=2, shuffle=True, collate_fn=train_dataset.collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, collate_fn=val_dataset.collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, collate_fn=test_dataset.collate_fn)
        print(config["output_size"])
        # Create and initialize model
        model = config["model"](config).to(device)
        
        #### training optimiser parameters fror apple
        #learning_rate = 0.005 
        #optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        #### training optimiser parameters fror harma_2023 VRB model 
        learning_rate = 0.01 # From the paper
        optimizer = optim.Adam(model.parameters())       
        
        best_val_loss = float('inf')
        best_val_loss_flat = float('inf')
        early_stopping_counter = 0
        # To accumulate metrics across folds for each epoch
        train_acc = []
        val_acc = []
        test_acc = []
        for epoch in range(epochs):

            model.train()
            train_loss = 0.0
            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            
            for batch_d, batch_lbs in progress_bar:
                optimizer.zero_grad()
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                progress_bar.set_postfix({'train_loss': f'{train_loss/(progress_bar.n+1):.4f}'})


            train_loss /= len(train_loader)

            # Combined validation loop
            model.eval()
            val_loss = 0.0
            val_pred = []
            with torch.no_grad():
                for batch_d, batch_lbs in val_loader:
                    input_values = batch_d.to(device)
                    batch_lbs = batch_lbs.to(device)
                    
                    outputs = model(input_values)
                    loss = correlation_coefficient_loss(outputs, batch_lbs)
                    val_loss += loss.item()
                    val_pred.extend(outputs.cpu().numpy())

            val_loss /= len(val_loader)

            # Calculate validation metrics
            val_pred = np.array(val_pred).reshape(val_timesteps.shape)
            val_ground_truth = _get_ground_truth_labels([all_dict[i] for i in val_index], all_labels)
            val_pred_flat = unsplit_data_ogsize(val_pred, window_size, step_sequence, 25, val_ground_truth.shape[-1])
            val_prc_coef = _calculate_flattened_accuracy(val_pred_flat, val_ground_truth)
            
            # Accumulate metrics for this fold and epoch
            train_acc.append(1- train_loss)
            val_acc.append(1- val_loss)

            # Log metrics
            writer.add_scalar(f"Loss/train_fold_{fold + 1}", train_loss, epoch)
            writer.add_scalar(f"Loss/val_fold_{fold + 1}", val_loss, epoch)
            writer.add_scalar(f"Pearson/val_fold_{fold + 1}", val_prc_coef, epoch)

            print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Pearson: {val_prc_coef:.4f}")

            # Check if validation loss improved
            if val_loss < best_val_loss:
                print(f"Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving best model...")
                best_val_loss = val_loss
                best_val_loss_flat = val_prc_coef
                early_stopping_counter = 0

                # Save the best model
                torch.save(model.state_dict(), best_model_path)
            else:
                early_stopping_counter += 1
                print(f"Validation loss did not improve for {early_stopping_counter} epochs.")
                #model.load_state_dict(torch.load(best_model_path))


            # Early stopping
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping triggered at epoch {epoch + 1}. Loading best model.")
                # Load the best model's weights
                model.load_state_dict(torch.load(best_model_path))
                break

        test_pred = []
        test_loss = 0.0
        with torch.no_grad():
            for batch_d, batch_lbs in test_loader:
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                test_loss += loss.item()
                test_pred.extend(outputs.cpu().numpy())

        test_loss /= len(test_loader)
        test_pred = np.array(test_pred).reshape(prepared_test_labels_timesteps.shape)
        test_ground_truth = _get_ground_truth_labels(list(test_dict.values()), test_labels)
        test_pred_flat = unsplit_data_ogsize(test_pred, window_size, step_sequence, 25, test_ground_truth.shape[-1])
        test_prc_coef = _calculate_flattened_accuracy(test_pred_flat, test_ground_truth)

        print(f"Fold {fold + 1}:")
        print(f"  Validation Pearson Coefficient  acc: {1- best_val_loss}")
        print(f"  Validation Pearson Coefficient flat acc: {best_val_loss_flat}")
        print(f"  Test acc: {1- test_loss}")
        print(f"  Test Pearson Coefficient acc(flattened): {test_prc_coef}")

        fold_metrics.append({
            'Fold': fold + 1,
            'val_prc_acc': 1- best_val_loss,
            'val_prc_acc_flat': best_val_loss_flat,
            'test_acc': 1- test_loss,
            'test_prc_flat': test_prc_coef
        })


                # Log fold-specific metrics as tables
        fold_table = f"| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                     f"|------|-----------------|------------------|----------|-------------------|\n" \
                     f"| {fold + 1} | {1 - val_loss:.4f} | {val_prc_coef:.4f} | {1 - test_loss:.4f} | {test_prc_coef:.4f} |\n"
        writer.add_text(f"Fold_{fold + 1}_Metrics", fold_table)
        # Accumulate fold metrics across all folds
        train_acc_epoch.append(train_acc)
        val_acc_epoch.append(train_acc)


    
        # After all folds, compute and log the average metrics per epoch across all folds
    for epoch in range(epochs):
        avg_train_loss = np.mean([fold_losses[epoch] for fold_losses in train_acc_epoch if len(fold_losses) > epoch])
        avg_val_loss = np.mean([fold_losses[epoch] for fold_losses in val_acc_epoch if len(fold_losses) > epoch])

        # Log the averaged metrics for the epoch across all folds
        writer.add_scalar("Average_acc/train", avg_train_loss, epoch)
        writer.add_scalar("Average_acc/val", avg_val_loss, epoch)
            

    # Calculate average metrics
    avg_metrics = {key: np.mean([fold[key] for fold in fold_metrics if key != 'Fold']) for key in fold_metrics[0].keys() if key != 'Fold'}
        # Log the final average table
    avg_table = "| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                "|------|-----------------|------------------|----------|-------------------|\n" \
                f"| Average | {avg_metrics['val_prc_acc']:.4f} | {avg_metrics['val_prc_acc_flat']:.4f} | {avg_metrics['test_acc']:.4f} | {avg_metrics['test_prc_flat']:.4f} |\n"
    writer.add_text("Average_Metrics", avg_table)
    # Add average metrics to results
    avg_metrics['Fold'] = 'Average'
    fold_metrics.append(avg_metrics)

    # save averga date to CSV
    results_df = pd.DataFrame(fold_metrics)
    csv_path = os.path.join(run_dir, 'fold_results.csv')
    results_df.to_csv(csv_path, index=False)
    
    # Save fold indices CSV
    fold_indices_df.to_csv(os.path.join(run_dir, 'fold_indices.csv'), index=False)
    
    writer.close()


    print("\nTraining completed.")
    print("Average metrics across all folds:")
    for key, value in avg_metrics.items():
        print(f"  {key}: {value}")


if __name__ == "__main__":
    ## Path to data
    #path = "/home/glenn/Downloads/"
    path = "../DATA/"


    # Model parameters
    model_config = {
        "VRBModel": {
            "model" : VRBModel,
            "model_name": "facebook/hubert-large-ll60k",
            "hidden_units": 64,
            "n_gru": 3,
            "output_size": None  # Will be set dynamically
        },
        "Wav2Vec2ConvLSTMModel": {
            "model" : Wav2Vec2ConvLSTMModel,
            "model_name": "facebook/wav2vec2-base",
            "hidden_units": 128,
            "n_lstm": 2,
            "output_size": None  # Will be set dynamically
        },
            "RespBertCNNModelV2": {
            'model' : RespBertCNNModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "output_size": None  
        },
            "RespBertLSTMModelV2": {
            'model': RespBertLSTMModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "n_lstm": 3,
            "output_size": None  
        }
            ,
            "RespBertLSTMCNNTransformerModel": {
            'model': RespBertLSTMCNNTransformerModel,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 1024,
            "n_lstm": 2,
            "output_size": None  
        }
    }
    

    
    # Train and data parameters
    epochs = 60
    batch_size = 10
    window_size = 30
    step_size = 6
    data_parts = 4 # aka folds
    early_stopping_patience = 10
    
    config = model_config["RespBertLSTMCNNTransformerModel"]
    

    ## same wav2vec2 base model and pipeline used in the paper
    #processor = AutoProcessor.from_pretrained(config["model_name"])
    processor = Wav2Vec2FeatureExtractor.from_pretrained(config["model_name"])

    train(
        path_to_data=path+"ComParE2020_Breathing/wav/",
        path_to_labels=path+"ComParE2020_Breathing/lab/",
        window_size=window_size,
        batch_size=batch_size,
        config = config,
        step_size=step_size,
        data_parts= data_parts ,
        early_stopping_patience= early_stopping_patience,
        epochs= epochs,
        processor = processor
    )

Fold 1/4
(864, 480000)
750


Epoch 1/60:   0%|          | 0/87 [00:02<?, ?it/s]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([1499, 10, 3072])
torch.Size([1499, 750])





RuntimeError: The size of tensor a (1499) must match the size of tensor b (10) at non-singleton dimension 0

In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from datetime import datetime
#from torch.utils.tensorboard import SummaryWriter
import torchaudio
from typing import List, Tuple
from pt_utils import *
from pt_dataset import *
from pt_models import *
from pt_utils import *
from tensorboardX import SummaryWriter
from transformers import get_cosine_schedule_with_warmup


def create_run_directory():
    base_dir = "pt_runs_SOTA"
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir

def _calculate_flattened_accuracy(average, ground_truth_labels):
    s_acc = 0
    for b in range(len(ground_truth_labels)):
        s, _ = scipy.stats.pearsonr(average[b], ground_truth_labels[b])
        s_acc += s
    return s_acc / len(ground_truth_labels)

def _choose_real_labs_only_with_filenames(labels, filenames):
    return labels[labels['filename'].isin(filenames)]

def _get_ground_truth_labels(ground_truth_names, labels):
    ground_truth_labels = []
    for batch_name in ground_truth_names:
        ground_truth_label = _choose_real_labs_only_with_filenames(labels, [batch_name])
        ground_truth_labels.append(ground_truth_label)
    return np.array(ground_truth_labels)[:, :, -1].astype(np.float32)

def train(path_to_data, path_to_labels, window_size=16, step_size=6, data_parts=4, epochs=100, batch_size=10, early_stopping_patience=20, config = None, processor = None):
    run_dir = create_run_directory()
    log_dir = os.path.join(run_dir, "logs")
    os.makedirs(log_dir, exist_ok=True)

    # Parameters
    length_sequence = window_size 
    step_sequence = step_size

    # Load and prepare data
    train_data, train_labels, train_dict, frame_rate = load_data(path_to_data, path_to_labels, 'train')
    devel_data, devel_labels, devel_dict, frame_rate = load_data(path_to_data, path_to_labels, 'devel')
    test_data, test_labels, test_dict, frame_rate = load_data(path_to_data, path_to_labels, 'test')

    # Combine train and devel data
    all_data = np.concatenate((train_data, devel_data), axis=0)
    all_labels = pd.concat([train_labels, devel_labels])
    all_dict = np.concatenate((list(train_dict.values()), list(devel_dict.values())), axis=0)

    # Prepare data
    prepared_data, prepared_labels, prepared_labels_timesteps = prepare_data(all_data, all_labels, all_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)
    prepared_test_data, prepared_test_labels, prepared_test_labels_timesteps = prepare_data(test_data, test_labels, test_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)

    # Create CSV file for storing fold indices
    fold_indices_df = pd.DataFrame(columns=['Fold', 'Train_Indices', 'Val_Indices'])

    # Cross-validation
    kf = KFold(n_splits=data_parts)
    fold_metrics = []
    # To accumulate metrics across folds for each epoch
    train_acc_epoch = []
    val_acc_epoch = []
    test_acc_epoch = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    config["output_size"] = prepared_labels.shape[-1]
    writer = SummaryWriter(log_dir=os.path.join(log_dir,config["model_name"]))

    for fold, (train_index, val_index) in enumerate(kf.split(prepared_data)):
        print(f"Fold {fold + 1}/{data_parts}")
        best_model_path = f"{run_dir}/best_model_fold{fold+1}"
        # Save fold indices
        fold_indices_df = fold_indices_df._append({
            'Fold': fold + 1,
            'Train_Indices': train_index.tolist(),
            'Val_Indices': val_index.tolist()
        }, ignore_index=True)

        # Split data
        train_d, val_d = prepared_data[train_index], prepared_data[val_index]
        train_lbs, val_lbs = prepared_labels[train_index], prepared_labels[val_index]
        train_timesteps, val_timesteps = prepared_labels_timesteps[train_index], prepared_labels_timesteps[val_index]
        
        # Reshape data
        train_d, train_lbs = reshaping_data_for_model(train_d, train_lbs)
        val_d, val_lbs = reshaping_data_for_model(val_d, val_lbs)
        test_d, test_lbs = reshaping_data_for_model(prepared_test_data, prepared_test_labels)
        
        print(train_d.shape)

        # Create datasets
        #train_dataset = BreathingDataset(train_d, train_lbs, processor, window_size, step_sequence, augment=True)
        train_dataset = BreathingDataset(train_d, train_lbs, processor,window_size, step_sequence)
        val_dataset = BreathingDataset(val_d, val_lbs, processor, window_size, step_sequence)
        test_dataset = BreathingDataset(test_d, test_lbs, processor, window_size, step_sequence)

        # Create DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size,num_workers=2, shuffle=True, collate_fn=train_dataset.collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, collate_fn=val_dataset.collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, collate_fn=test_dataset.collate_fn)
        print(config["output_size"])
        # Create and initialize model
        model = config["model"](config).to(device)
        
        #### training optimiser parameters fror apple
        #learning_rate = 0.005 
        #optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        #### training optimiser parameters fror harma_2023 VRB model 
        learning_rate = 0.01 # From the paper
        optimizer = optim.Adam(model.parameters())       
        
        best_val_loss = float('inf')
        best_val_loss_flat = float('inf')
        early_stopping_counter = 0
        # To accumulate metrics across folds for each epoch
        train_acc = []
        val_acc = []
        test_acc = []
        for epoch in range(epochs):

            model.train()
            train_loss = 0.0
            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            
            for batch_d, batch_lbs in progress_bar:
                optimizer.zero_grad()
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                progress_bar.set_postfix({'train_loss': f'{train_loss/(progress_bar.n+1):.4f}'})


            train_loss /= len(train_loader)

            # Combined validation loop
            model.eval()
            val_loss = 0.0
            val_pred = []
            with torch.no_grad():
                for batch_d, batch_lbs in val_loader:
                    input_values = batch_d.to(device)
                    batch_lbs = batch_lbs.to(device)
                    
                    outputs = model(input_values)
                    loss = correlation_coefficient_loss(outputs, batch_lbs)
                    val_loss += loss.item()
                    val_pred.extend(outputs.cpu().numpy())

            val_loss /= len(val_loader)

            # Calculate validation metrics
            val_pred = np.array(val_pred).reshape(val_timesteps.shape)
            val_ground_truth = _get_ground_truth_labels([all_dict[i] for i in val_index], all_labels)
            val_pred_flat = unsplit_data_ogsize(val_pred, window_size, step_sequence, 25, val_ground_truth.shape[-1])
            val_prc_coef = _calculate_flattened_accuracy(val_pred_flat, val_ground_truth)
            
            # Accumulate metrics for this fold and epoch
            train_acc.append(1- train_loss)
            val_acc.append(1- val_loss)

            # Log metrics
            writer.add_scalar(f"Loss/train_fold_{fold + 1}", train_loss, epoch)
            writer.add_scalar(f"Loss/val_fold_{fold + 1}", val_loss, epoch)
            writer.add_scalar(f"Pearson/val_fold_{fold + 1}", val_prc_coef, epoch)

            print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Pearson: {val_prc_coef:.4f}")

            # Check if validation loss improved
            if val_loss < best_val_loss:
                print(f"Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving best model...")
                best_val_loss = val_loss
                best_val_loss_flat = val_prc_coef
                early_stopping_counter = 0

                # Save the best model
                torch.save(model.state_dict(), best_model_path)
            else:
                early_stopping_counter += 1
                print(f"Validation loss did not improve for {early_stopping_counter} epochs.")
                #model.load_state_dict(torch.load(best_model_path))


            # Early stopping
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping triggered at epoch {epoch + 1}. Loading best model.")
                # Load the best model's weights
                model.load_state_dict(torch.load(best_model_path))
                break

        test_pred = []
        test_loss = 0.0
        with torch.no_grad():
            for batch_d, batch_lbs in test_loader:
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                test_loss += loss.item()
                test_pred.extend(outputs.cpu().numpy())

        test_loss /= len(test_loader)
        test_pred = np.array(test_pred).reshape(prepared_test_labels_timesteps.shape)
        test_ground_truth = _get_ground_truth_labels(list(test_dict.values()), test_labels)
        test_pred_flat = unsplit_data_ogsize(test_pred, window_size, step_sequence, 25, test_ground_truth.shape[-1])
        test_prc_coef = _calculate_flattened_accuracy(test_pred_flat, test_ground_truth)

        print(f"Fold {fold + 1}:")
        print(f"  Validation Pearson Coefficient  acc: {1- best_val_loss}")
        print(f"  Validation Pearson Coefficient flat acc: {best_val_loss_flat}")
        print(f"  Test acc: {1- test_loss}")
        print(f"  Test Pearson Coefficient acc(flattened): {test_prc_coef}")

        fold_metrics.append({
            'Fold': fold + 1,
            'val_prc_acc': 1- best_val_loss,
            'val_prc_acc_flat': best_val_loss_flat,
            'test_acc': 1- test_loss,
            'test_prc_flat': test_prc_coef
        })


                # Log fold-specific metrics as tables
        fold_table = f"| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                     f"|------|-----------------|------------------|----------|-------------------|\n" \
                     f"| {fold + 1} | {1 - val_loss:.4f} | {val_prc_coef:.4f} | {1 - test_loss:.4f} | {test_prc_coef:.4f} |\n"
        writer.add_text(f"Fold_{fold + 1}_Metrics", fold_table)
        # Accumulate fold metrics across all folds
        train_acc_epoch.append(train_acc)
        val_acc_epoch.append(train_acc)


    
        # After all folds, compute and log the average metrics per epoch across all folds
    for epoch in range(epochs):
        avg_train_loss = np.mean([fold_losses[epoch] for fold_losses in train_acc_epoch if len(fold_losses) > epoch])
        avg_val_loss = np.mean([fold_losses[epoch] for fold_losses in val_acc_epoch if len(fold_losses) > epoch])

        # Log the averaged metrics for the epoch across all folds
        writer.add_scalar("Average_acc/train", avg_train_loss, epoch)
        writer.add_scalar("Average_acc/val", avg_val_loss, epoch)
            

    # Calculate average metrics
    avg_metrics = {key: np.mean([fold[key] for fold in fold_metrics if key != 'Fold']) for key in fold_metrics[0].keys() if key != 'Fold'}
        # Log the final average table
    avg_table = "| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                "|------|-----------------|------------------|----------|-------------------|\n" \
                f"| Average | {avg_metrics['val_prc_acc']:.4f} | {avg_metrics['val_prc_acc_flat']:.4f} | {avg_metrics['test_acc']:.4f} | {avg_metrics['test_prc_flat']:.4f} |\n"
    writer.add_text("Average_Metrics", avg_table)
    # Add average metrics to results
    avg_metrics['Fold'] = 'Average'
    fold_metrics.append(avg_metrics)

    # save averga date to CSV
    results_df = pd.DataFrame(fold_metrics)
    csv_path = os.path.join(run_dir, 'fold_results.csv')
    results_df.to_csv(csv_path, index=False)
    
    # Save fold indices CSV
    fold_indices_df.to_csv(os.path.join(run_dir, 'fold_indices.csv'), index=False)
    
    writer.close()


    print("\nTraining completed.")
    print("Average metrics across all folds:")
    for key, value in avg_metrics.items():
        print(f"  {key}: {value}")


if __name__ == "__main__":
    ## Path to data
    #path = "/home/glenn/Downloads/"
    path = "../DATA/"


    # Model parameters
    model_config = {
        "VRBModel": {
            "model" : VRBModel,
            "model_name": "facebook/hubert-large-ll60k",
            "hidden_units": 64,
            "n_gru": 3,
            "output_size": None  # Will be set dynamically
        },
        "Wav2Vec2ConvLSTMModel": {
            "model" : Wav2Vec2ConvLSTMModel,
            "model_name": "facebook/wav2vec2-base",
            "hidden_units": 128,
            "n_lstm": 2,
            "output_size": None  # Will be set dynamically
        },
            "RespBertCNNModelV2": {
            'model' : RespBertCNNModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "output_size": None  
        },
            "RespBertLSTMModelV2": {
            'model': RespBertLSTMModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "n_lstm": 3,
            "output_size": None  
        }
            ,
            "RespBertLSTMCNNTransformerModel": {
            'model': RespBertLSTMCNNTransformerModel,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 1024,
            "n_lstm": 2,
            "output_size": None  
        }
    }
    

    
    # Train and data parameters
    epochs = 60
    batch_size = 10
    window_size = 30
    step_size = 6
    data_parts = 4 # aka folds
    early_stopping_patience = 10
    
    config = model_config["RespBertLSTMCNNTransformerModel"]
    

    ## same wav2vec2 base model and pipeline used in the paper
    #processor = AutoProcessor.from_pretrained(config["model_name"])
    processor = Wav2Vec2FeatureExtractor.from_pretrained(config["model_name"])

    train(
        path_to_data=path+"ComParE2020_Breathing/wav/",
        path_to_labels=path+"ComParE2020_Breathing/lab/",
        window_size=window_size,
        batch_size=batch_size,
        config = config,
        step_size=step_size,
        data_parts= data_parts ,
        early_stopping_patience= early_stopping_patience,
        epochs= epochs,
        processor = processor
    )

Fold 1/4
(864, 480000)
750




torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:   1%|          | 1/87 [00:03<05:06,  3.56s/it, train_loss=1.0101]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:   2%|▏         | 2/87 [00:05<03:26,  2.42s/it, train_loss=1.0236]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:   3%|▎         | 3/87 [00:06<02:52,  2.06s/it, train_loss=1.0235]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:   5%|▍         | 4/87 [00:08<02:42,  1.95s/it, train_loss=1.0186]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:   6%|▌         | 5/87 [00:10<02:33,  1.88s/it, train_loss=1.0244]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:   7%|▋         | 6/87 [00:11<02:24,  1.79s/it, train_loss=1.0170]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:   8%|▊         | 7/87 [00:13<02:18,  1.73s/it, train_loss=1.0124]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:   9%|▉         | 8/87 [00:15<02:15,  1.72s/it, train_loss=1.0165]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  10%|█         | 9/87 [00:16<02:11,  1.69s/it, train_loss=1.0110]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  11%|█▏        | 10/87 [00:18<02:12,  1.72s/it, train_loss=1.0121]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  13%|█▎        | 11/87 [00:20<02:08,  1.69s/it, train_loss=1.0134]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  14%|█▍        | 12/87 [00:22<02:08,  1.71s/it, train_loss=1.0055]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  15%|█▍        | 13/87 [00:23<02:08,  1.74s/it, train_loss=1.0101]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  16%|█▌        | 14/87 [00:25<02:04,  1.70s/it, train_loss=1.0112]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  17%|█▋        | 15/87 [00:27<02:03,  1.71s/it, train_loss=1.0154]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  18%|█▊        | 16/87 [00:28<02:02,  1.72s/it, train_loss=1.0123]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  20%|█▉        | 17/87 [00:30<01:58,  1.69s/it, train_loss=1.0146]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  21%|██        | 18/87 [00:32<01:54,  1.66s/it, train_loss=1.0162]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  22%|██▏       | 19/87 [00:33<01:53,  1.67s/it, train_loss=1.0190]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  23%|██▎       | 20/87 [00:35<01:53,  1.69s/it, train_loss=1.0236]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  24%|██▍       | 21/87 [00:37<01:46,  1.61s/it, train_loss=1.0244]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  25%|██▌       | 22/87 [00:38<01:43,  1.59s/it, train_loss=1.0233]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  26%|██▋       | 23/87 [00:40<01:42,  1.60s/it, train_loss=1.0170]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  28%|██▊       | 24/87 [00:41<01:42,  1.62s/it, train_loss=1.0158]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  29%|██▊       | 25/87 [00:43<01:38,  1.59s/it, train_loss=1.0081]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  30%|██▉       | 26/87 [00:45<01:38,  1.61s/it, train_loss=1.0053]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  31%|███       | 27/87 [00:46<01:37,  1.63s/it, train_loss=1.0021]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  32%|███▏      | 28/87 [00:48<01:38,  1.67s/it, train_loss=1.0011]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  33%|███▎      | 29/87 [00:49<01:31,  1.57s/it, train_loss=1.0008]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  34%|███▍      | 30/87 [00:51<01:32,  1.62s/it, train_loss=1.0009]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  36%|███▌      | 31/87 [00:53<01:31,  1.64s/it, train_loss=0.9993]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  37%|███▋      | 32/87 [00:54<01:30,  1.65s/it, train_loss=0.9994]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  38%|███▊      | 33/87 [00:56<01:30,  1.68s/it, train_loss=0.9980]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  39%|███▉      | 34/87 [00:58<01:27,  1.64s/it, train_loss=0.9978]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  40%|████      | 35/87 [00:59<01:25,  1.65s/it, train_loss=1.0004]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  41%|████▏     | 36/87 [01:01<01:24,  1.66s/it, train_loss=0.9985]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  43%|████▎     | 37/87 [01:03<01:23,  1.66s/it, train_loss=0.9998]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  44%|████▎     | 38/87 [01:04<01:17,  1.57s/it, train_loss=1.0030]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  45%|████▍     | 39/87 [01:06<01:15,  1.57s/it, train_loss=1.0041]

torch.Size([10, 1499, 1024])
torch.Size([10, 1499, 3072])
torch.Size([1499, 10, 3072])
torch.Size([10, 1499, 3072])
torch.Size([10, 750])


Epoch 1/60:  46%|████▌     | 40/87 [01:07<01:17,  1.64s/it, train_loss=1.0038]

In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from datetime import datetime
#from torch.utils.tensorboard import SummaryWriter
import torchaudio
from typing import List, Tuple
from pt_utils import *
from pt_dataset import *
from pt_models import *
from pt_utils import *
from tensorboardX import SummaryWriter
from transformers import get_cosine_schedule_with_warmup


def create_run_directory():
    base_dir = "pt_runs_SOTA"
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir

def _calculate_flattened_accuracy(average, ground_truth_labels):
    s_acc = 0
    for b in range(len(ground_truth_labels)):
        s, _ = scipy.stats.pearsonr(average[b], ground_truth_labels[b])
        s_acc += s
    return s_acc / len(ground_truth_labels)

def _choose_real_labs_only_with_filenames(labels, filenames):
    return labels[labels['filename'].isin(filenames)]

def _get_ground_truth_labels(ground_truth_names, labels):
    ground_truth_labels = []
    for batch_name in ground_truth_names:
        ground_truth_label = _choose_real_labs_only_with_filenames(labels, [batch_name])
        ground_truth_labels.append(ground_truth_label)
    return np.array(ground_truth_labels)[:, :, -1].astype(np.float32)

def train(path_to_data, path_to_labels, window_size=16, step_size=6, data_parts=4, epochs=100, batch_size=10, early_stopping_patience=20, config = None, processor = None):
    run_dir = create_run_directory()
    log_dir = os.path.join(run_dir, "logs")
    os.makedirs(log_dir, exist_ok=True)

    # Parameters
    length_sequence = window_size 
    step_sequence = step_size

    # Load and prepare data
    train_data, train_labels, train_dict, frame_rate = load_data(path_to_data, path_to_labels, 'train')
    devel_data, devel_labels, devel_dict, frame_rate = load_data(path_to_data, path_to_labels, 'devel')
    test_data, test_labels, test_dict, frame_rate = load_data(path_to_data, path_to_labels, 'test')

    # Combine train and devel data
    all_data = np.concatenate((train_data, devel_data), axis=0)
    all_labels = pd.concat([train_labels, devel_labels])
    all_dict = np.concatenate((list(train_dict.values()), list(devel_dict.values())), axis=0)

    # Prepare data
    prepared_data, prepared_labels, prepared_labels_timesteps = prepare_data(all_data, all_labels, all_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)
    prepared_test_data, prepared_test_labels, prepared_test_labels_timesteps = prepare_data(test_data, test_labels, test_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)

    # Create CSV file for storing fold indices
    fold_indices_df = pd.DataFrame(columns=['Fold', 'Train_Indices', 'Val_Indices'])

    # Cross-validation
    kf = KFold(n_splits=data_parts)
    fold_metrics = []
    # To accumulate metrics across folds for each epoch
    train_acc_epoch = []
    val_acc_epoch = []
    test_acc_epoch = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    config["output_size"] = prepared_labels.shape[-1]
    writer = SummaryWriter(log_dir=os.path.join(log_dir,config["model_name"]))

    for fold, (train_index, val_index) in enumerate(kf.split(prepared_data)):
        print(f"Fold {fold + 1}/{data_parts}")
        best_model_path = f"{run_dir}/best_model_fold{fold+1}"
        # Save fold indices
        fold_indices_df = fold_indices_df._append({
            'Fold': fold + 1,
            'Train_Indices': train_index.tolist(),
            'Val_Indices': val_index.tolist()
        }, ignore_index=True)

        # Split data
        train_d, val_d = prepared_data[train_index], prepared_data[val_index]
        train_lbs, val_lbs = prepared_labels[train_index], prepared_labels[val_index]
        train_timesteps, val_timesteps = prepared_labels_timesteps[train_index], prepared_labels_timesteps[val_index]
        
        # Reshape data
        train_d, train_lbs = reshaping_data_for_model(train_d, train_lbs)
        val_d, val_lbs = reshaping_data_for_model(val_d, val_lbs)
        test_d, test_lbs = reshaping_data_for_model(prepared_test_data, prepared_test_labels)
        
        print(train_d.shape)

        # Create datasets
        #train_dataset = BreathingDataset(train_d, train_lbs, processor, window_size, step_sequence, augment=True)
        train_dataset = BreathingDataset(train_d, train_lbs, processor,window_size, step_sequence)
        val_dataset = BreathingDataset(val_d, val_lbs, processor, window_size, step_sequence)
        test_dataset = BreathingDataset(test_d, test_lbs, processor, window_size, step_sequence)

        # Create DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size,num_workers=2, shuffle=True, collate_fn=train_dataset.collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, collate_fn=val_dataset.collate_fn)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, collate_fn=test_dataset.collate_fn)
        print(config["output_size"])
        # Create and initialize model
        model = config["model"](config).to(device)
        
        #### training optimiser parameters fror apple
        #learning_rate = 0.005 
        #optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        #### training optimiser parameters fror harma_2023 VRB model 
        learning_rate = 0.01 # From the paper
        optimizer = optim.Adam(model.parameters())       
        
        best_val_loss = float('inf')
        best_val_loss_flat = float('inf')
        early_stopping_counter = 0
        # To accumulate metrics across folds for each epoch
        train_acc = []
        val_acc = []
        test_acc = []
        for epoch in range(epochs):

            model.train()
            train_loss = 0.0
            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            
            for batch_d, batch_lbs in progress_bar:
                optimizer.zero_grad()
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                progress_bar.set_postfix({'train_loss': f'{train_loss/(progress_bar.n+1):.4f}'})


            train_loss /= len(train_loader)

            # Combined validation loop
            model.eval()
            val_loss = 0.0
            val_pred = []
            with torch.no_grad():
                for batch_d, batch_lbs in val_loader:
                    input_values = batch_d.to(device)
                    batch_lbs = batch_lbs.to(device)
                    
                    outputs = model(input_values)
                    loss = correlation_coefficient_loss(outputs, batch_lbs)
                    val_loss += loss.item()
                    val_pred.extend(outputs.cpu().numpy())

            val_loss /= len(val_loader)

            # Calculate validation metrics
            val_pred = np.array(val_pred).reshape(val_timesteps.shape)
            val_ground_truth = _get_ground_truth_labels([all_dict[i] for i in val_index], all_labels)
            val_pred_flat = unsplit_data_ogsize(val_pred, window_size, step_sequence, 25, val_ground_truth.shape[-1])
            val_prc_coef = _calculate_flattened_accuracy(val_pred_flat, val_ground_truth)
            
            # Accumulate metrics for this fold and epoch
            train_acc.append(1- train_loss)
            val_acc.append(1- val_loss)

            # Log metrics
            writer.add_scalar(f"Loss/train_fold_{fold + 1}", train_loss, epoch)
            writer.add_scalar(f"Loss/val_fold_{fold + 1}", val_loss, epoch)
            writer.add_scalar(f"Pearson/val_fold_{fold + 1}", val_prc_coef, epoch)

            print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Pearson: {val_prc_coef:.4f}")

            # Check if validation loss improved
            if val_loss < best_val_loss:
                print(f"Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving best model...")
                best_val_loss = val_loss
                best_val_loss_flat = val_prc_coef
                early_stopping_counter = 0

                # Save the best model
                torch.save(model.state_dict(), best_model_path)
            else:
                early_stopping_counter += 1
                print(f"Validation loss did not improve for {early_stopping_counter} epochs.")
                #model.load_state_dict(torch.load(best_model_path))


            # Early stopping
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping triggered at epoch {epoch + 1}. Loading best model.")
                # Load the best model's weights
                model.load_state_dict(torch.load(best_model_path))
                break

        test_pred = []
        test_loss = 0.0
        with torch.no_grad():
            for batch_d, batch_lbs in test_loader:
                input_values = batch_d.to(device)
                batch_lbs = batch_lbs.to(device)
                
                outputs = model(input_values)
                loss = correlation_coefficient_loss(outputs, batch_lbs)
                test_loss += loss.item()
                test_pred.extend(outputs.cpu().numpy())

        test_loss /= len(test_loader)
        test_pred = np.array(test_pred).reshape(prepared_test_labels_timesteps.shape)
        test_ground_truth = _get_ground_truth_labels(list(test_dict.values()), test_labels)
        test_pred_flat = unsplit_data_ogsize(test_pred, window_size, step_sequence, 25, test_ground_truth.shape[-1])
        test_prc_coef = _calculate_flattened_accuracy(test_pred_flat, test_ground_truth)

        print(f"Fold {fold + 1}:")
        print(f"  Validation Pearson Coefficient  acc: {1- best_val_loss}")
        print(f"  Validation Pearson Coefficient flat acc: {best_val_loss_flat}")
        print(f"  Test acc: {1- test_loss}")
        print(f"  Test Pearson Coefficient acc(flattened): {test_prc_coef}")

        fold_metrics.append({
            'Fold': fold + 1,
            'val_prc_acc': 1- best_val_loss,
            'val_prc_acc_flat': best_val_loss_flat,
            'test_acc': 1- test_loss,
            'test_prc_flat': test_prc_coef
        })


                # Log fold-specific metrics as tables
        fold_table = f"| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                     f"|------|-----------------|------------------|----------|-------------------|\n" \
                     f"| {fold + 1} | {1 - val_loss:.4f} | {val_prc_coef:.4f} | {1 - test_loss:.4f} | {test_prc_coef:.4f} |\n"
        writer.add_text(f"Fold_{fold + 1}_Metrics", fold_table)
        # Accumulate fold metrics across all folds
        train_acc_epoch.append(train_acc)
        val_acc_epoch.append(train_acc)


    
        # After all folds, compute and log the average metrics per epoch across all folds
    for epoch in range(epochs):
        avg_train_loss = np.mean([fold_losses[epoch] for fold_losses in train_acc_epoch if len(fold_losses) > epoch])
        avg_val_loss = np.mean([fold_losses[epoch] for fold_losses in val_acc_epoch if len(fold_losses) > epoch])

        # Log the averaged metrics for the epoch across all folds
        writer.add_scalar("Average_acc/train", avg_train_loss, epoch)
        writer.add_scalar("Average_acc/val", avg_val_loss, epoch)
            

    # Calculate average metrics
    avg_metrics = {key: np.mean([fold[key] for fold in fold_metrics if key != 'Fold']) for key in fold_metrics[0].keys() if key != 'Fold'}
        # Log the final average table
    avg_table = "| Fold | Val Pearson Acc | Val Pearson Flat | Test Acc | Test Pearson Flat |\n" \
                "|------|-----------------|------------------|----------|-------------------|\n" \
                f"| Average | {avg_metrics['val_prc_acc']:.4f} | {avg_metrics['val_prc_acc_flat']:.4f} | {avg_metrics['test_acc']:.4f} | {avg_metrics['test_prc_flat']:.4f} |\n"
    writer.add_text("Average_Metrics", avg_table)
    # Add average metrics to results
    avg_metrics['Fold'] = 'Average'
    fold_metrics.append(avg_metrics)

    # save averga date to CSV
    results_df = pd.DataFrame(fold_metrics)
    csv_path = os.path.join(run_dir, 'fold_results.csv')
    results_df.to_csv(csv_path, index=False)
    
    # Save fold indices CSV
    fold_indices_df.to_csv(os.path.join(run_dir, 'fold_indices.csv'), index=False)
    
    writer.close()


    print("\nTraining completed.")
    print("Average metrics across all folds:")
    for key, value in avg_metrics.items():
        print(f"  {key}: {value}")


if __name__ == "__main__":
    ## Path to data
    #path = "/home/glenn/Downloads/"
    path = "../DATA/"


    # Model parameters
    model_config = {
        "VRBModel": {
            "model" : VRBModel,
            "model_name": "facebook/hubert-large-ll60k",
            "hidden_units": 64,
            "n_gru": 3,
            "output_size": None  # Will be set dynamically
        },
        "Wav2Vec2ConvLSTMModel": {
            "model" : Wav2Vec2ConvLSTMModel,
            "model_name": "facebook/wav2vec2-base",
            "hidden_units": 128,
            "n_lstm": 2,
            "output_size": None  # Will be set dynamically
        },
            "RespBertCNNModelV2": {
            'model' : RespBertCNNModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "output_size": None  
        },
            "RespBertLSTMModelV2": {
            'model': RespBertLSTMModelV2,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 128,
            "n_lstm": 3,
            "output_size": None  
        }
            ,
            "RespBertLSTMCNNTransformerModel": {
            'model': RespBertLSTMCNNTransformerModel,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 1024,
            "n_lstm": 2,
            "output_size": None  
        }
    }
    

    
    # Train and data parameters
    epochs = 60
    batch_size = 15
    window_size = 30
    step_size = 6
    data_parts = 4 # aka folds
    early_stopping_patience = 10
    
    config = model_config["RespBertLSTMCNNTransformerModel"]
    

    ## same wav2vec2 base model and pipeline used in the paper
    #processor = AutoProcessor.from_pretrained(config["model_name"])
    processor = Wav2Vec2FeatureExtractor.from_pretrained(config["model_name"])

    train(
        path_to_data=path+"ComParE2020_Breathing/wav/",
        path_to_labels=path+"ComParE2020_Breathing/lab/",
        window_size=window_size,
        batch_size=batch_size,
        config = config,
        step_size=step_size,
        data_parts= data_parts ,
        early_stopping_patience= early_stopping_patience,
        epochs= epochs,
        processor = processor
    )

Fold 1/4
(864, 480000)
750


Epoch 1/60:   0%|          | 0/58 [00:02<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 2.02 GiB. GPU 0 has a total capacity of 93.12 GiB of which 590.75 MiB is free. Including non-PyTorch memory, this process has 92.54 GiB memory in use. Of the allocated memory 90.94 GiB is allocated by PyTorch, and 954.10 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)