In [1]:
import time
import os
import json
import cv2
import yaml
import numpy as np
from PIL import Image

from scipy.spatial.distance import euclidean
from scipy.stats import pearsonr
from fastdtw import fastdtw
from sklearn.metrics import mean_squared_error

from scripts.grid_detection import get_grid_square_size
from scripts.extract_wave import WaveExtractor
from scripts.digititze import process_ecg_mask
from scripts.lead_segmentation import init_model as init_lead_model, inference_and_label_and_crop


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# --- Load configs ---
with open('./configs/lead_segmentation.yaml', 'r') as f:
    lead_cfg = yaml.safe_load(f)
with open('./configs/wave_extraction.yaml', 'r') as f:
    wave_cfg = yaml.safe_load(f)
with open('./configs/grid_detection.yaml', 'r') as f:
    grid_cfg = yaml.safe_load(f)
with open('./configs/digitize.yaml', 'r') as f:
    digitize_cfg = yaml.safe_load(f)

# --- Paths ---
INPUT_ROOT = '../data/digitization-dataset/digitization-dataset'
CROPPED_SAVE_DIR = lead_cfg['output_dir']
GRID_KERNEL = grid_cfg.get('closing_kernel', 10)
GRID_LENGTH_FRAC = grid_cfg.get('length_frac', 0.05)
WAVE_WEIGHTS_PATH = wave_cfg['weights_path']
WAVE_DEVICE = wave_cfg.get('device', 'cpu')
FINAL_OUTPUT_DIR = './data/test'
YOLO_WEIGHTS_PATH = lead_cfg['model_path']
os.makedirs(CROPPED_SAVE_DIR, exist_ok=True)
os.makedirs(FINAL_OUTPUT_DIR, exist_ok=True)

In [10]:
import os
import time
import json
import cv2
import numpy as np
from PIL import Image
from scripts.grid_detection import get_grid_square_size
from scripts.digititze import process_ecg_mask

# --- Constants ---
lead_names = ["I", "II", "III", "aVR", "aVL", "aVF", "V1", "V2", "V3", "V4", "V5", "V6"]
GRID_KERNEL = grid_cfg.get('closing_kernel', 10)
GRID_LENGTH_FRAC = grid_cfg.get('length_frac', 0.05)

# --- Load models ---
lead_model = init_lead_model(lead_cfg['model_path'])
wave_extractor = WaveExtractor(WAVE_WEIGHTS_PATH, device=WAVE_DEVICE)

initializing wave extractor...
Wave extractor initialized.


In [11]:

# --- Evaluation helpers ---
def match_length(a, b):
    n = min(len(a), len(b))
    return a[:n], b[:n]

def scalar_euclid(a, b):
    # convert floats to 1‑element 1D arrays
    return euclidean([a], [b])


def evaluate_signals(pred, gt):
    p = np.ravel(pred).astype(float)
    g = np.ravel(gt).astype(float)
    p, g = match_length(p, g)
    # MSE & Pearson
    mse  = mean_squared_error(g, p)
    corr = pearsonr(g, p)[0] if len(g) > 2 else np.nan
    # DTW on plain lists
    g = g.tolist()
    p = p.tolist()

    dtw_dist, _ = fastdtw(g, p, dist=scalar_euclid)
    return {"MSE": mse, "Correlation": corr, "DTW": dtw_dist}


In [None]:
lead_names = ["I", "II", "III", "aVR", "aVL", "aVF", "V1", "V2", "V3", "V4", "V5", "V6"]

GRID_KERNEL = 10
GRID_LENGTH_FRAC = 0.05

folders = [d for d in os.listdir(INPUT_ROOT) if '6by2' in d and os.path.isdir(os.path.join(INPUT_ROOT, d))]
total_folders = len(folders)
total_slots = total_folders * len(lead_names)
detected_count = 0
per_lead_detect = {ln: 0 for ln in lead_names}
metrics_detected = []
metrics_all = []

for idx, fld in enumerate(folders, 1):
    if idx % 10 == 0:
        print(f"{idx}/{total_folders} folders processed…")

    fld_path = os.path.join(INPUT_ROOT, fld)
    img_path = os.path.join(fld_path, f"{fld}.jpg")

    if not os.path.isfile(img_path):
        print(f"⚠️ Missing image: {img_path}")
        continue

    # --- Step 1: Lead Segmentation ---
    crops, _ = inference_and_label_and_crop(
        lead_model, img_path, output_dir=fld_path, conf_threshold=lead_cfg['conf_threshold']
    )

    # Mimic main pipeline: save paths
    all_cropped_leads = []
    for crop_img, label in crops:
        crop_path = os.path.join(fld_path, f"{fld}_{label}.jpg")
        cv2.imwrite(crop_path, crop_img)
        all_cropped_leads.append((crop_path, label))

    # --- Step 2: Grid Detection ---
    lead_to_sq = {}
    for crop_path, label in all_cropped_leads:
        img = cv2.imread(crop_path)
        if img is None:
            print(f"⚠️ Failed to read {crop_path}")
            continue
        sq = get_grid_square_size(img, closing_kernel=GRID_KERNEL, length_frac=GRID_LENGTH_FRAC)
        lead_to_sq[label] = sq

    # --- Step 3: Wave Extraction ---
    lead_to_mask = {}
    for crop_path, label in all_cropped_leads:
        mask = wave_extractor.extract_wave(crop_path)
        lead_to_mask[label] = mask

    # --- Step 4: Digitization ---
    lead_waveforms = {}
    for crop_path, label in all_cropped_leads:
        sq = lead_to_sq.get(label)
        mask = lead_to_mask.get(label)
        if sq is None or mask is None:
            continue
        wf = process_ecg_mask(mask, sq)
        lead_waveforms[label] = wf

    # --- Step 5: Compare to Ground-Truth ---
    results_det = {}
    results_all = {}

    for i_lead, lead_label in enumerate(lead_names):
        gt_file = os.path.join(fld_path, f"{fld}_lead_{i_lead}.json")
        if not os.path.isfile(gt_file):
            continue

        gt_wave = np.array(json.load(open(gt_file)))

        pred = None
        for lbl, wf in lead_waveforms.items():
            if lbl.lower() == lead_label.lower():
                pred = wf
                break

        if pred is not None:
            detected_count += 1
            per_lead_detect[lead_label] += 1
            m = evaluate_signals(pred, gt_wave)
            results_det[lead_label] = m
            metrics_detected.append(m)

        # Always evaluate something (zeros if missed)
        p0 = pred if pred is not None else np.zeros_like(gt_wave, dtype=float)
        m_all = evaluate_signals(p0, gt_wave)
        results_all[lead_label] = m_all
        metrics_all.append(m_all)

    # --- Per-folder Summary ---
    print(f"\n📁 {fld}: {len(results_det)}/{len(lead_names)} leads detected")
    for ln, m in results_det.items():
        print(f"  {ln}: MSE={m['MSE']:.4f}, Corr={m['Correlation']:.3f}, DTW={m['DTW']:.1f}")


image 1/1 /home/abdullah-bin-mansoor/Desktop/ECG Project/full-pipeline/../data/digitization-dataset/digitization-dataset/1670_6by2/1670_6by2.jpg: 320x640 12 lead_containers, 1 label_II, 1 label_III, 1 label_aVR, 1 label_aVL, 1 label_aVF, 1 label_V1, 1 label_V2, 1 label_V3, 1 label_V4, 1 label_V5, 1 label_V6, 217.8ms
Speed: 2.8ms preprocess, 217.8ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)
