# PCG LVEF Colab Pipeline

This notebook rebuilds derived artifacts and runs experiments.
Run cells top-to-bottom.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

REPO_DIR = '/content/drive/MyDrive/phonocardiogram-lvef-deeplearning'
LVEF_CSV = f"{REPO_DIR}/lvef.csv"
HEART_DIR = f"{REPO_DIR}/heart_sounds"

import os
os.chdir(REPO_DIR)
print('Repo:', REPO_DIR)
print('LVEF CSV:', LVEF_CSV)
print('Heart dir:', HEART_DIR)


In [None]:
# Optional: clean derived artifacts to rebuild from scratch.
# !rm -rf cache splits results checkpoints checkpoints_cpu tf_stats.json metadata.csv


In [None]:
!pip install -r requirements.txt


In [None]:
import torch
print('torch:', torch.__version__)
print('cuda:', torch.version.cuda)
print('cuda available:', torch.cuda.is_available())


If your filename pattern differs, edit `FILENAME_RE` / `DEVICE_MAP` in `src/data/build_metadata.py` before running the next cell.


In [None]:
!python -m src.data.build_metadata \
  --lvef_csv "{LVEF_CSV}" \
  --heart_dir "{HEART_DIR}" \
  --output_csv metadata.csv


In [None]:
!python -m src.data.make_patient_splits \
  --metadata_csv metadata.csv \
  --output_dir splits


In [None]:
PER_DEVICE = False  # set True if you want per-device normalization stats
per_device_flag = "--per_device" if PER_DEVICE else ""
!python -m src.data.compute_stats \
  --train_csv splits/metadata_train.csv \
  --representations mfcc gammatone \
  {per_device_flag}


In [None]:
REPS_TO_CACHE = ["mfcc", "gammatone"]
for rep in REPS_TO_CACHE:
    print(f"Caching {rep}...")
    !python -m src.data.precompute_cache \
      --representation {rep} \
      --splits splits/metadata_train.csv splits/metadata_val.csv splits/metadata_test.csv


In [None]:
# Optional QA report
# !mkdir -p reports
# !python -m src.data.qa_report \
#   --metadata_csv metadata.csv \
#   --output_json reports/qa_report.json \
#   --output_csv reports/qa_records.csv \
#   --fixed_duration 4.0


In [None]:
# Optional single-run sanity check
# !python -m src.training.train \
#   --train_csv splits/cached_mfcc_metadata_train.csv \
#   --val_csv splits/cached_mfcc_metadata_val.csv \
#   --test_csv splits/cached_mfcc_metadata_test.csv \
#   --representation mfcc \
#   --backbone mobilenetv2 \
#   --use_cache \
#   --epochs 5 \
#   --batch_size 64 \
#   --tune_threshold \
#   --save_history \
#   --save_predictions


In [None]:
# Sweep runner (uses configs/sweep_example.json)
!python -m src.experiments.run_sweep --config configs/sweep_example.json
