This notebook evaluates the Peak Signal-to-Noise Ratio (PSNR) on the SRPBS traveling-subject dataset.
For each subject scanned at multiple sites, PSNR is computed between all pairs of sites, after applying a consistent brain crop derived from brain masks. To mitigate the influence of outliers that can artificially inflate the dynamic range and bias similarity metrics, we estimate the intensity range for each volume pair using robust quantiles: the lower bound is defined as the minimum 1st percentile and the upper bound as the maximum 99th percentile across the two images

Input CSV structure:

 - One row per MRI volume
 - Columns:
    - site: acquisition site (e.g. KPM, ATV, …)
    - sub: subject identifier
    - One column per reconstruction method (raw, HACA3, IGUANE, DIFFUSION, …) containing the path to the corresponding NIfTI image
    - One column {method}_mask containing the path to the associated binary brain mask (extracted with HD-BET)

In [None]:
import pandas as pd
import numpy as np
import nibabel as nib
from tqdm import tqdm
import matplotlib.pyplot as plt

from skimage.metrics import peak_signal_noise_ratio as psnr
from itertools import combinations

import plotly.express as px
import plotly.graph_objects as go


In [None]:
# Load the CSV describing images and associated brain masks
df_brain_mask = pd.read_csv("df_MRI_and_masks.csv")

# Display the first rows for sanity check
df_brain_mask.head()

In [None]:
def get_mask_bounds(mask_path):
    """
    Compute the minimal and maximal voxel coordinates of a binary brain mask.

    Parameters
    ----------
    mask_path : str
        Path to a NIfTI brain mask.

    Returns
    -------
    min_coords : np.ndarray or None
        Minimum coordinates along each axis.
    max_coords : np.ndarray or None
        Maximum coordinates along each axis.
    """
    mask_img = nib.load(mask_path)
    mask_data = mask_img.get_fdata() > 0  # Binarize mask

    coords = np.argwhere(mask_data)

    if coords.size == 0:
        return None, None

    return coords.min(axis=0), coords.max(axis=0)

In [None]:
def create_crop_box(df, method):
    """
    Compute a global cropping box based on brain masks.
    The box is defined using robust percentiles across the dataset.

    Parameters
    ----------
    df : pd.DataFrame
        Dataset description.
    method : str
        Reconstruction method name.

    Returns
    -------
    global_min_coords : np.ndarray
        Lower crop bound.
    global_max_coords : np.ndarray
        Upper crop bound.
    """
    min_coords_list = []
    max_coords_list = []

    mask_column = df["DIFF_mask"] if "DIFF" in method else df[f"{method}_mask"]

    for mask_path in mask_column:
        min_c, max_c = get_mask_bounds(mask_path)
        if min_c is not None:
            min_coords_list.append(min_c)
            max_coords_list.append(max_c)

    min_coords_array = np.array(min_coords_list)
    max_coords_array = np.array(max_coords_list)

    # Robust crop using percentiles to avoid outliers
    global_min_coords = np.floor(np.percentile(min_coords_array, 15, axis=0)).astype(int)
    global_max_coords = np.ceil(np.percentile(max_coords_array, 85, axis=0)).astype(int)

    return global_min_coords, global_max_coords

In [None]:
def load_and_normalize_image(image_path):
    """
    Load a NIfTI image and return its data array.

    Parameters
    ----------
    image_path : str
        Path to the NIfTI image.

    Returns
    -------
    np.ndarray
        Image data.
    """
    image = nib.load(image_path)
    return image.get_fdata()

In [None]:
def crop_image(image_data, min_coords, max_coords):
    """
    Crop a 3D image using provided coordinates.

    Parameters
    ----------
    image_data : np.ndarray
        3D image array.
    min_coords : np.ndarray
        Lower crop bound.
    max_coords : np.ndarray
        Upper crop bound.

    Returns
    -------
    np.ndarray
        Cropped image.
    """
    return image_data[
        min_coords[0]:max_coords[0],
        min_coords[1]:max_coords[1],
        min_coords[2]:max_coords[2],
    ]

In [None]:
def crop_images(df, method, global_min_coords, global_max_coords):
    """
    Load and crop images for a given method, grouped by subject and site.

    Parameters
    ----------
    df : pd.DataFrame
        Dataset description.
    method : str
        Reconstruction method name.
    global_min_coords : np.ndarray
        Lower crop bound.
    global_max_coords : np.ndarray
        Upper crop bound.

    Returns
    -------
    dict
        Nested dictionary: cropped_images[sub][site] = image array.
    """
    cropped_images = {}

    for _, row in df.iterrows():
        sub = row["sub"]
        site = row["site"]
        image_path = row[method]

        cropped_images.setdefault(sub, {})

        image_data = load_and_normalize_image(image_path)
        cropped_images[sub][site] = crop_image(
            image_data, global_min_coords, global_max_coords
        )

    return cropped_images

In [None]:
# Reconstruction methods for which PSNR is evaluated
methods = [
    "DIFF_preprocessed",
    "IGUANE",
    "STGAN",
    "HACA3",
    "DIFF_CFGpp_02",
    "DIFF_CFGpp_04",
    "DIFF_CFGpp_06",
    "DIFF_CFGpp_08",
    "DIFF_CFGpp_1",
]

psnr_results = {}

for method in methods:
    global_min_coords, global_max_coords = create_crop_box(df_brain_mask, method)
    cropped_images = crop_images(
        df_brain_mask, method, global_min_coords, global_max_coords
    )

    psnr_results[method] = {}

    for sub in tqdm(cropped_images):
        sites = cropped_images[sub]

        for site1, site2 in combinations(sites.keys(), 2):
            img1 = sites[site1]
            img2 = sites[site2]

            # Robust intensity range for PSNR computation
            p2 = min(np.percentile(img1, 1), np.percentile(img2, 1))
            p98 = max(np.percentile(img1, 99), np.percentile(img2, 99))
            data_range = p98 - p2

            psnr_value = psnr(img1, img2, data_range=data_range)

            key = f"{site1}-{site2}-{sub}"
            psnr_results[method][key] = {"psnr": psnr_value}

In [None]:
# Convert PSNR results to a flat DataFrame
rows = [
    {
        "method": method,
        "site_pair_sub": key,
        "psnr": values["psnr"],
    }
    for method, subdict in psnr_results.items()
    for key, values in subdict.items()
]

df_psnr = pd.DataFrame(rows)

# df_psnr.to_csv("df_psnr.csv", index=False)

In [None]:
# Aggregate PSNR statistics per method
df_summary = df_psnr.groupby("method").agg(
    psnr_post_mean=("psnr", "mean"),
    psnr_post_std=("psnr", "std"),
)

df_summary.T.round(4)