In [1]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
from datetime import datetime
#from torch.utils.tensorboard import SummaryWriter
from transformers import Wav2Vec2Processor, Wav2Vec2Model
import torchaudio
from typing import List, Tuple
from pt_utils import *
from pt_dataset import *
from pt_models import *
from pt_utils import *
from tensorboardX import SummaryWriter
from transformers import get_cosine_schedule_with_warmup


def create_run_directory():
    base_dir = "pt_runs"
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir

def _calculate_flattened_accuracy(average, ground_truth_labels):
    s_acc = 0
    for b in range(len(ground_truth_labels)):
        s, _ = scipy.stats.pearsonr(average[b], ground_truth_labels[b])
        s_acc += s
    return s_acc / len(ground_truth_labels)

def _choose_real_labs_only_with_filenames(labels, filenames):
    return labels[labels['filename'].isin(filenames)]

def _get_ground_truth_labels(ground_truth_names, labels):
    ground_truth_labels = []
    for batch_name in ground_truth_names:
        ground_truth_label = _choose_real_labs_only_with_filenames(labels, [batch_name])
        ground_truth_labels.append(ground_truth_label)
    return np.array(ground_truth_labels)[:, :, -1].astype(np.float32)

def train(path_to_data, path_to_labels, window_size=16, step_size=6, epochs=100, batch_size=10, config=None, model=None, processor=None):
    run_dir = create_run_directory()
    log_dir = os.path.join(run_dir, "logs")
    os.makedirs(log_dir, exist_ok=True)

    # Parameters
    length_sequence = window_size 
    step_sequence = step_size

    # Load and prepare data
    train_data, train_labels, train_dict, frame_rate = load_data(path_to_data, path_to_labels, 'train')
    devel_data, devel_labels, devel_dict, frame_rate = load_data(path_to_data, path_to_labels, 'devel')
    test_data, test_labels, test_dict, frame_rate = load_data(path_to_data, path_to_labels, 'test')

    # Combine train and devel data
    all_data = np.concatenate((train_data, devel_data), axis=0)
    all_labels = pd.concat([train_labels, devel_labels])
    all_dict = np.concatenate((list(train_dict.values()), list(devel_dict.values())), axis=0)

    # Prepare data
    prepared_data, prepared_labels, prepared_labels_timesteps = prepare_data(all_data, all_labels, all_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)
    prepared_test_data, prepared_test_labels, prepared_test_labels_timesteps = prepare_data(test_data, test_labels, test_dict, frame_rate, length_sequence * 16000, step_sequence * 16000)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    config["output_size"] = prepared_labels.shape[-1]
    writer = SummaryWriter(log_dir=os.path.join(log_dir, config["model_name"]))

    # Reshape data
    train_d, train_lbs = reshaping_data_for_model(prepared_data, prepared_labels)
    test_d, test_lbs = reshaping_data_for_model(prepared_test_data, prepared_test_labels)
    
    print(train_d.shape)

    # Create datasets
    train_dataset = GPUBreathingDataset(train_d, train_lbs, processor, augment=True)
    test_dataset = BreathingDataset(test_d, test_lbs, processor, window_size, step_sequence)

    # Create DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=train_dataset.collate_fn)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=5, collate_fn=test_dataset.collate_fn)

    # Create and initialize model
    model = config["model"](config).to(device)

    # Optimizer and scheduler setup
    learning_rate = 5e-4
    weight_decay = 0.001
    optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    total_steps = len(train_loader) * epochs
    warmup_steps = int(total_steps * 0.1)
    scheduler = get_cosine_schedule_with_warmup(optimizer, 
                                         num_warmup_steps=warmup_steps, 
                                         num_training_steps=total_steps)

    best_train_loss = float('inf')
    best_model_path = f"{run_dir}/best_model"

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
        for input_values, batch_lbs in progress_bar:
            optimizer.zero_grad()
            
            outputs = model(input_values)
            loss = correlation_coefficient_loss(outputs, batch_lbs)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            progress_bar.set_postfix({'train_loss': f'{train_loss/(progress_bar.n+1):.4f}'})
            scheduler.step()

        train_loss /= len(train_loader)

        # Log metrics
        writer.add_scalar("Loss/train", train_loss, epoch)

        print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}")

        # Save the best model
        if train_loss < best_train_loss:
            print(f"Train loss improved from {best_train_loss:.4f} to {train_loss:.4f}. Saving best model...")
            best_train_loss = train_loss
            torch.save(model.state_dict(), best_model_path)

    # Load the best model for final evaluation
    model.load_state_dict(torch.load(best_model_path))

    # Evaluate model on test data
    model.eval()
    test_pred = []
    test_loss = 0.0
    with torch.no_grad():
        for batch_d, batch_lbs in test_loader:
            input_values = batch_d.to(device)
            batch_lbs = batch_lbs.to(device)
            
            outputs = model(input_values)
            loss = correlation_coefficient_loss(outputs, batch_lbs)
            test_loss += loss.item()
            test_pred.extend(outputs.cpu().numpy())

    test_loss /= len(test_loader)
    test_pred = np.array(test_pred).reshape(prepared_test_labels_timesteps.shape)
    test_ground_truth = _get_ground_truth_labels(list(test_dict.values()), test_labels)
    test_pred_flat = unsplit_data_ogsize(test_pred, window_size, step_sequence, 25, test_ground_truth.shape[-1])
    test_prc_coef = _calculate_flattened_accuracy(test_pred_flat, test_ground_truth)

    print("\nTraining completed.")
    print(f"Final Test Loss: {test_loss:.4f}")
    print(f"Final Test Pearson Coefficient (flattened): {test_prc_coef:.4f}")

    # Log final test metrics
    writer.add_scalar("Final/test_loss", test_loss, 0)
    writer.add_scalar("Final/test_pearson_coef", test_prc_coef, 0)

    # Log the final test metrics as a table
    final_table = "| Metric | Value |\n" \
                  "|--------|-------|\n" \
                  f"| Test Loss | {test_loss:.4f} |\n" \
                  f"| Test Pearson Coefficient | {test_prc_coef:.4f} |\n"
    writer.add_text("Final_Test_Metrics", final_table)

    writer.close()

    # Save final results to CSV
    results_df = pd.DataFrame({
        'Test_Loss': [test_loss],
        'Test_Pearson_Coefficient': [test_prc_coef]
    })
    csv_path = os.path.join(run_dir, 'final_results.csv')
    results_df.to_csv(csv_path, index=False)

    print(f"Results saved to {csv_path}")

if __name__ == "__main__":
    ## Path to data
    path = "/home/glenn/Downloads/"
    path = "../DATA/"


    # Model parameters
    model_config = {
        "VRBModel": {
            "model" : VRBModel,
            "model_name": "facebook/hubert-large-ls960-ft",
            "hidden_units": 64,
            "n_gru": 3,
            "output_size": None  # Will be set dynamically
        },
        "Wav2Vec2ConvLSTMModel": {
            "model" : Wav2Vec2ConvLSTMModel,
            "model_name": "facebook/wav2vec2-base",
            "hidden_units": 128,
            "n_lstm": 2,
            "output_size": None  # Will be set dynamically
        },
        "RespBertLSTMModel": {
            'model': RespBertLSTMModel,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 256,
            "n_lstm": 2,
            "output_size": None  
        },
        "RespBertAttionModel": {
            'model' : RespBertAttionModel,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 512,
            "n_attion": 2,
            "output_size": None  
        },
            "RespBertCNNModel": {
            'model' : RespBertCNNModel,
            "model_name": "microsoft/wavlm-large",
            "hidden_units": 256,
            "output_size": None  
        }
    }
    

    
    # Train and data parameters
    epochs = 70
    batch_size = 11
    window_size = 30
    step_size = 6
    data_parts = 4 # aka folds
    early_stopping_patience = 10
    
    config = model_config["RespBertCNNModel"]
    #model
    
    model = None

    #processor = AutoProcessor.from_pretrained(config["model_name"])
    processor = Wav2Vec2FeatureExtractor.from_pretrained(config["model_name"])


    train(
        path_to_data=path+"ComParE2020_Breathing/wav/",
        path_to_labels=path+"ComParE2020_Breathing/lab/",
        window_size=window_size,
        batch_size=batch_size,
        config = config,
        step_size=step_size,
        data_parts= data_parts ,
        early_stopping_patience= early_stopping_patience,
        epochs= epochs,
        model= model,
        processor = processor
    )

Fold 1/4
(864, 480000)
750


  with torch.cuda.amp.autocast():
Epoch 1/70: 100%|██████████| 72/72 [02:27<00:00,  2.05s/it, train_loss=0.9958]


Epoch 1/70 - Train Loss: 0.9958, Val Loss: 0.9521, Val Pearson: 0.0910
Validation loss improved from inf to 0.9521. Saving best model...


Epoch 2/70: 100%|██████████| 72/72 [02:28<00:00,  2.06s/it, train_loss=0.7800]


Epoch 2/70 - Train Loss: 0.7800, Val Loss: 0.4329, Val Pearson: 0.6395
Validation loss improved from 0.9521 to 0.4329. Saving best model...


Epoch 3/70: 100%|██████████| 72/72 [02:28<00:00,  2.06s/it, train_loss=0.4646]


Epoch 3/70 - Train Loss: 0.4646, Val Loss: 0.2692, Val Pearson: 0.7474
Validation loss improved from 0.4329 to 0.2692. Saving best model...


Epoch 4/70: 100%|██████████| 72/72 [02:25<00:00,  2.02s/it, train_loss=0.3839]


Epoch 4/70 - Train Loss: 0.3839, Val Loss: 0.2548, Val Pearson: 0.7570
Validation loss improved from 0.2692 to 0.2548. Saving best model...


Epoch 5/70: 100%|██████████| 72/72 [02:27<00:00,  2.05s/it, train_loss=0.3486]


Epoch 5/70 - Train Loss: 0.3486, Val Loss: 0.2013, Val Pearson: 0.7993
Validation loss improved from 0.2548 to 0.2013. Saving best model...


Epoch 6/70: 100%|██████████| 72/72 [02:26<00:00,  2.04s/it, train_loss=0.3138]


Epoch 6/70 - Train Loss: 0.3138, Val Loss: 0.1922, Val Pearson: 0.8090
Validation loss improved from 0.2013 to 0.1922. Saving best model...


Epoch 7/70: 100%|██████████| 72/72 [02:28<00:00,  2.06s/it, train_loss=0.2747]


Epoch 7/70 - Train Loss: 0.2747, Val Loss: 0.1749, Val Pearson: 0.8219
Validation loss improved from 0.1922 to 0.1749. Saving best model...


Epoch 8/70: 100%|██████████| 72/72 [02:25<00:00,  2.01s/it, train_loss=0.2560]


Epoch 8/70 - Train Loss: 0.2560, Val Loss: 0.1694, Val Pearson: 0.8275
Validation loss improved from 0.1749 to 0.1694. Saving best model...


Epoch 9/70: 100%|██████████| 72/72 [02:29<00:00,  2.07s/it, train_loss=0.2261]


Epoch 9/70 - Train Loss: 0.2261, Val Loss: 0.1730, Val Pearson: 0.8311
Validation loss did not improve for 1 epochs.


Epoch 10/70: 100%|██████████| 72/72 [02:27<00:00,  2.05s/it, train_loss=0.2344]


Epoch 10/70 - Train Loss: 0.2344, Val Loss: 0.1449, Val Pearson: 0.8453
Validation loss improved from 0.1694 to 0.1449. Saving best model...


Epoch 11/70: 100%|██████████| 72/72 [02:25<00:00,  2.02s/it, train_loss=0.2161]


Epoch 11/70 - Train Loss: 0.2161, Val Loss: 0.1590, Val Pearson: 0.8366
Validation loss did not improve for 1 epochs.


Epoch 12/70: 100%|██████████| 72/72 [02:26<00:00,  2.04s/it, train_loss=0.1913]


Epoch 12/70 - Train Loss: 0.1913, Val Loss: 0.1457, Val Pearson: 0.8494
Validation loss did not improve for 2 epochs.


Epoch 13/70: 100%|██████████| 72/72 [02:26<00:00,  2.03s/it, train_loss=0.1885]


Epoch 13/70 - Train Loss: 0.1885, Val Loss: 0.1441, Val Pearson: 0.8516
Validation loss improved from 0.1449 to 0.1441. Saving best model...


Epoch 14/70: 100%|██████████| 72/72 [02:28<00:00,  2.06s/it, train_loss=0.1898]


Epoch 14/70 - Train Loss: 0.1898, Val Loss: 0.1562, Val Pearson: 0.8419
Validation loss did not improve for 1 epochs.


Epoch 15/70: 100%|██████████| 72/72 [02:29<00:00,  2.07s/it, train_loss=0.1789]


Epoch 15/70 - Train Loss: 0.1789, Val Loss: 0.1510, Val Pearson: 0.8424
Validation loss did not improve for 2 epochs.


Epoch 16/70: 100%|██████████| 72/72 [02:29<00:00,  2.07s/it, train_loss=0.1795]


Epoch 16/70 - Train Loss: 0.1795, Val Loss: 0.1505, Val Pearson: 0.8415
Validation loss did not improve for 3 epochs.


Epoch 17/70: 100%|██████████| 72/72 [02:26<00:00,  2.04s/it, train_loss=0.1777]


Epoch 17/70 - Train Loss: 0.1777, Val Loss: 0.1558, Val Pearson: 0.8374
Validation loss did not improve for 4 epochs.


Epoch 18/70:  19%|█▉        | 14/72 [00:28<02:01,  2.10s/it, train_loss=0.1441]