In [1]:
from dotenv import load_dotenv
from pathlib import Path
import sys
import os

# Walk up until we find the project root (folder with the .env)
current_path = Path().resolve()
for parent in [current_path] + list(current_path.parents):
    if (parent / ".env").exists():
        load_dotenv(parent / ".env")
        project_root = os.getenv("PROJECT_ROOT")
        print(project_root)
        sys.path.append(project_root)     
        break


%load_ext autoreload
%autoreload 2

/Users/emmanuel/Documents/belugas/beluga-call-pipeline


In [4]:
import pandas as pd

import torch
from models.resnet import ResnetMultilabel
from models.mobilenet import MobileNetMultilabel
from models.quant_mobilenet import load_mobilenet_v3_quant

from training.cross_validation import run_cross_val, train_model


## Running the Optimization Experiments

This section covers the model optimization experiments:
- Switching from **ResNet18** to **MobileNet V3 Small**,
- Further**Truncating** the MobileNet architecture,
- Applying **8-bit Quantization-Aware Training (QAT)** on the MobileNet model.

Each experiment is executed with **cross-validation** as described in the paper.  
Results are written to a dedicated `results/` directory and subsequently examined in `analysis.ipynb`.



In [5]:
labels_df = pd.read_csv("../data/labels/Overlaps_1s.csv")
labels_df["ClipFilenamePt"] = labels_df["ClipFilename"] + ".pt"


label_columns = ["ECHO", "HFPC", "CC", "Whistle"]

data_dir = "../data"
processed_spects_dir = data_dir + "/Full_Dataset/Overlaps_1s_hp_1024_resize/"

results_dir = "./results/model_optimization"

In [11]:
training_config_default = {
    "batch_size": 32,
    "lr_decay_factor": 0.5,
    "patience_lr": 2,
    "n_epochs": 2, #100
    "min_epochs": 1, #15
    "patience_early_stopping": 5,
    "metric_mode": "max",
    "val_metric": "f1",
}

### Resnet18


In [None]:
run_cross_val(
    labels_df, 
    label_columns, 
    ResnetMultilabel,  
    processed_spects_dir,
    run_name="resnet",
    model_kwargs={
        "pretrained":True,
    }, 
    training_config=training_config_default,
    save_models=True,
    use_quantization=False,
)

Loading resnet18 model on mps




Training model for fold 1 of 5

Fold 1 dataset sizes:
Train: 6996 samples
Val: 1750 samples
Test: 2187 samples



TypeError: Model must be generated with load_mobilenet_v3_quant() when use_quantization=True

#### Running all MobileNet variants: layer depth & quantization 

In [None]:

n_layers_to_test = [2, 4, 6, 8, 10, 12,]  
quantization_options = [False, True]

for n_layers in n_layers_to_test:
    for use_quantization in quantization_options:
        # Create run name based on parameters
        quant_suffix = "_qat" if use_quantization else ""
        run_name = f"mobile_net{quant_suffix}_{n_layers}_layers"
        
        print(f"\n{'='*80}")
        print(f"Running experiment: {run_name}")
        print(f"n_layers: {n_layers}, quantization: {use_quantization}")
        print(f"{'='*80}")

        model_class = load_mobilenet_v3_quant if use_quantization else MobileNetMultilabel

        model_kwargs = {
            "pretrained": True,
            "n_layers": n_layers
        }

        if use_quantization:
            model_kwargs["qat"] = True
        
        try:
            run_cross_val(
                labels_df, 
                label_columns, 
                model_class,  
                processed_spects_dir,
                run_name=run_name,
                model_kwargs=model_kwargs, 
                n_splits=5,
                training_config=training_config_default,
                save_models=True,
                use_quantization=use_quantization,
            )
            print(f"✅ Successfully completed: {run_name}")
            
        except Exception as e:
            print(f"❌ Error in experiment {run_name}: {str(e)}")
            print(f"Continuing with next experiment...")
            continue

print(f"\n{'='*80}")
print("All experiments completed!")
print(f"{'='*80}")


Running experiment: mobile_net_2_layers
n_layers: 2, quantization: False

Running experiment: mobile_net_qat_2_layers
n_layers: 2, quantization: True

Running experiment: mobile_net_4_layers
n_layers: 4, quantization: False

Running experiment: mobile_net_qat_4_layers
n_layers: 4, quantization: True

Running experiment: mobile_net_6_layers
n_layers: 6, quantization: False

Running experiment: mobile_net_qat_6_layers
n_layers: 6, quantization: True

Running experiment: mobile_net_8_layers
n_layers: 8, quantization: False

Running experiment: mobile_net_qat_8_layers
n_layers: 8, quantization: True

Running experiment: mobile_net_10_layers
n_layers: 10, quantization: False

Running experiment: mobile_net_qat_10_layers
n_layers: 10, quantization: True

Running experiment: mobile_net_12_layers
n_layers: 12, quantization: False

Running experiment: mobile_net_qat_12_layers
n_layers: 12, quantization: True

All experiments completed!


<h4>Function call template to run quick experiments<h4>


In [None]:
run_cross_val(
    labels_df, 
    label_columns, 
    MobileNetMultilabel,  
    processed_spects_dir,
    run_name="mobile_net_hp_1024_8_layers_all_absences",
    model_kwargs={
        "pretrained":True,
        "n_layers": 8
    }, 
    n_splits=5,
    training_config=training_config_default,
    save_models=True,
    use_quantization=False,
)

## Site Generalization Experiments


1. **Site-specific models** — train a separate model per site.
2. **Leave-One-Site-Out** — train on all but one site and test on the held-out site to assess generalizability.

All runs follow the protocol described in the paper.


In [9]:
from training.cross_validation import create_test_fold_indices
from sklearn.model_selection import KFold, train_test_split
from models.utils import aggregate_folds_testing_metrics



labels_df = pd.read_csv("../data/labels/Overlaps_1s.csv")
labels_df["ClipFilenamePt"] = labels_df["ClipFilename"] + ".pt"


label_columns = ["ECHO", "HFPC", "CC", "Whistle"]

data_dir = "../data"
processed_spects_dir = data_dir + "/Full_Dataset/Overlaps_1s_hp_1024_resize/"


labels_df = create_test_fold_indices(labels_df, 5)

### Site-specific models

In [17]:
all_sites = ["RDL", "CAC", "BSM", "KAM" ]

use_quantization = False

for train_site in all_sites:

    for fold_idx in range(5):
        
        model_class = load_mobilenet_v3_quant if use_quantization else MobileNetMultilabel

        model = model_class(
            pretrained=True,
            n_layers=8,
            num_classes=len(label_columns)
        )

        train_site_df = labels_df[labels_df["Site"]==train_site]
        train_data = train_site_df[train_site_df["test_fold_idx"] != fold_idx]
        train_data, val_data = train_test_split(train_data, test_size=0.2, random_state=42, stratify=train_data['Site'])
        
        test_data = train_site_df[train_site_df["test_fold_idx"] == fold_idx]

        run_name = f"{train_site}_only"
        if use_quantization:
            run_name = run_name + "_qat"


        run_dir, _, _ = train_model(
            labels_df,
            label_columns,
            model,
            train_data,
            val_data,
            test_data,
            processed_spects_dir=processed_spects_dir,
            fold_idx=fold_idx,
            results_dir="./results/sites_generalization",
            run_name=run_name,
            training_config=training_config_default,
            use_quantization=use_quantization,
            compute_sites_metrics=True
        )

    aggregate_folds_testing_metrics(run_dir)

Loading MobileNetV3_Small model on mps
Number of feature layers: 13
Using up to layer 8
Feature size after backbone: 48

Training Epoch: 0


 56%|█████▌    | 14/25 [00:02<00:01,  5.87it/s]


KeyboardInterrupt: 

### Leave One Site out

In [None]:


all_sites = ["BSM", "RDL", "CAC", "KAM" ]

use_quantization = False

for out_site in all_sites:
    for fold_idx in range(5):
        
        model_class = load_mobilenet_v3_quant if use_quantization else MobileNetMultilabel

        model = model_class(
            pretrained=True,
            n_layers=8,
            num_classes=len(label_columns)
        )

        train_sites = [site for site in all_sites if site != out_site]

        train_df = labels_df[labels_df["test_fold_idx"] != fold_idx]
        
        train_sites_df = train_df[train_df["Site"].isin(train_sites)]

        train_data, val_data = train_test_split(train_sites_df, test_size=0.2, random_state=42, stratify=train_sites_df['Site'])
        
        test_data = labels_df[labels_df["test_fold_idx"] == fold_idx]


        run_name = f"leave_{out_site}_out"
        if use_quantization:
            run_name = run_name + "_qat"

        print(run_name)
        print(f"Site out : {out_site}")
        print("Train df")
        print(train_data["Site"].value_counts())
        print("\nVal df")
        print(val_data["Site"].value_counts())

        # break
        run_dir, _, _ = train_model(
            labels_df,
            label_columns,
            model,
            train_data,
            val_data,
            test_data,
            processed_spects_dir=processed_spects_dir,
            fold_idx=fold_idx,
            run_name=run_name,
            results_dir="./results/sites_generalization",
            training_config=training_config_default,
            use_quantization=use_quantization,
            
        )
    
    aggregate_folds_testing_metrics(run_dir)

    # break

<h2>Training the final model on all the data</h2>

In [None]:
from training.cross_validation_2 import create_test_fold_indices
from sklearn.model_selection import KFold, train_test_split
from models.utils import aggregate_folds_testing_metrics



labels_df = pd.read_csv("../data/labels/Overlaps_1s.csv")
labels_df["ClipFilenamePt"] = labels_df["ClipFilename"] + ".pt"


label_columns = ["ECHO", "HFPC", "CC", "Whistle"]

data_dir = "../data"
processed_spects_dir = data_dir + "/Full_Dataset/Overlaps_1s_hp_1024_resize/"

results_dir = "./final_results"

labels_df = create_test_fold_indices(labels_df, 5)

In [None]:

use_quantization = False
        
model_class = load_mobilenet_v3_quant if use_quantization else MobileNetMultilabel

model = model_class(
    pretrained=True,
    n_layers=8,
    num_classes=len(label_columns)
)

train_data, val_data = train_test_split(train_data, test_size=0.2, random_state=42, stratify=train_data['Site'])
test_data = val_data #Doesn't matter here, won't be used anyway

run_name = f"Final_model"
if use_quantization:
    run_name = run_name + "_qat"

run_dir = train_model(
    labels_df,
    label_columns,
    model,
    train_data,
    val_data,
    test_data,
    fold_idx=0,
    processed_spects_dir=processed_spects_dir,
    run_name=run_name,
    results_dir="results/final_model",
    training_config=training_config_default,
    use_quantization=use_quantization,
    save_model=True
)