In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def plot_sensitivity_specificity_scatter(csv_path, metric="icbhi_score", figsize=(8,6), save_path=None):
    """
    Plot a sensitivity vs specificity scatter per device and per site.

    Parameters
    ----------
    csv_path : str
        Path to your 'group_performance_summary_foldX.csv'
    metric : str
        Metric to use for color (e.g., 'icbhi_score' or 'f1_macro')
    figsize : tuple
        Size of the matplotlib figure
    save_path : str or None
        Optional path to save the figure (PNG)
    """
    df = pd.read_csv(csv_path, index_col=0)
    df = df.reset_index().rename(columns={"index": "group"})

    # Split type (device/site)
    df["type"] = df["group"].apply(lambda x: "Device" if x.startswith("device") else "Site")
    df["name"] = df["group"].apply(lambda x: x.split("::")[1])

    # Sanity filter
    df = df.dropna(subset=["sensitivity", "specificity", metric])

    # Plot setup
    sns.set(style="whitegrid", context="talk")
    fig, ax = plt.subplots(figsize=figsize)

    # Scatter points
    sns.scatterplot(
        data=df, x="specificity", y="sensitivity",
        hue=metric, style="type", s=200, edgecolor="black", palette="coolwarm", ax=ax
    )

    # Annotate with device/site names
    for _, row in df.iterrows():
        ax.text(row["specificity"] + 0.002, row["sensitivity"] + 0.002,
                row["name"], fontsize=10, weight="bold")

    # Reference lines (mean Sp, Se)
    ax.axvline(df["specificity"].mean(), color="gray", ls="--", lw=1)
    ax.axhline(df["sensitivity"].mean(), color="gray", ls="--", lw=1)

    ax.set_xlim(0.6, 1.0)
    ax.set_ylim(0.0, 1.0)
    ax.set_xlabel("Specificity (True Negative Rate)")
    ax.set_ylabel("Sensitivity (True Positive Rate)")
    ax.set_title("Domain Bias Analysis: Sensitivity vs Specificity per Device/Site")

    # Legend & layout
    plt.legend(title=metric, bbox_to_anchor=(1.05, 1), loc="upper left")
    plt.tight_layout()

    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches="tight")
        print(f"Saved figure to: {save_path}")

    plt.show()


Each point = one device or site:

X-axis: Specificity → right = fewer false positives.

Y-axis: Sensitivity → up = better detection of abnormalities.

Color: ICBHI (HS) or F1-macro score.

Marker style: circles for devices, triangles for sites.

Dashed lines: average Sp and Se (visualize bias region).

How to read it

Top-right quadrant: balanced & robust domain (ideal).

Bottom-right: conservative (few false positives, misses many positives).

Top-left: noisy (detects everything, but poor precision).

Bottom-left: failure zone (poor both ways).

In [None]:
plot_sensitivity_specificity_scatter(
    "summaries/group_performance_summary_fold0.csv",
    metric="icbhi_score",
    save_path="figures/sp_vs_se_per_group.png"
)