# Nocturnal Hypoglycemia (Moderate) Batch Visualizer

This notebook gathers all detections of `nocturnal_hypoglycemia_moderate`,
extracts the nightly windows using the recorded sleep window hours, fetches
the raw CGM traces, and produces combined and averaged plots to sanity-check
the pattern.


In [1]:
# --- Parameters: edit these values ---
pattern_id = "nocturnal_hypoglycemia_moderate"
#detections_glob = "/Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/**/*.json"
detections_glob = "/Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/0_detections.json"
patient_filter = None
max_examples = None
minutes_padding = 30
resample_minutes = 5
n_clusters = 3


In [2]:
import glob
import json
import os
import sys
from datetime import datetime, date, time, timedelta, timezone
from typing import Any, Dict, Iterable, List, Optional, Tuple
from pathlib import Path

import numpy as np
import pandas as pd
import plotly.graph_objects as go
from sklearn.cluster import KMeans

repo_root = os.path.abspath("..")
if repo_root not in sys.path:
    sys.path.insert(0, repo_root)

from cgm_patterns.CGM_fetcher import iter_cgm_days
from cgm_patterns.models import CGMDay


def align_on_nadir(frames, freq_minutes=5):
    shifted_data = []
    min_shift = float('inf')
    max_shift = float('-inf')
    for frame in frames:
        rel = frame.loc[:, "relative_minutes"].to_numpy()
        glucose = frame.loc[:, "glucose_mg_dL"].to_numpy()
        if len(rel) < 3:
            continue
        nadir_idx = np.argmin(glucose)
        nadir_time = rel[nadir_idx]
        shifted = rel - nadir_time
        order = np.argsort(shifted)
        shifted = shifted[order]
        glucose = glucose[order]
        min_shift = min(min_shift, shifted[0])
        max_shift = max(max_shift, shifted[-1])
        shifted_data.append((shifted, glucose))
    if not shifted_data:
        return None, None
    length = max(abs(min_shift), abs(max_shift))
    grid = np.arange(-length, length + freq_minutes, freq_minutes)
    aligned = []
    for shifted, glucose in shifted_data:
        aligned.append(np.interp(grid, shifted, glucose, left=np.nan, right=np.nan))
    return grid, np.vstack(aligned)

def summarize_profile(stacked):
    median = np.nanmedian(stacked, axis=0)
    p10 = np.nanpercentile(stacked, 10, axis=0)
    p90 = np.nanpercentile(stacked, 90, axis=0)
    count = np.sum(~np.isnan(stacked), axis=0)
    return median, p10, p90, count


In [3]:
def _parse_service_date(value: str) -> date:
    return datetime.fromisoformat(value).date()


def _load_detection_examples(pattern_id: str, paths: Iterable[str], patient_filter: Optional[str] = None) -> List[Dict[str, Any]]:
    examples: List[Dict[str, Any]] = []
    for path in paths:
        try:
            payload = json.loads(Path(path).read_text())
        except FileNotFoundError:
            continue
        for patient_id, patient_payload in payload.items():
            if patient_filter and patient_id != patient_filter:
                continue
            detections_by_date = patient_payload.get("detections", {})
            for _, detections in detections_by_date.items():
                for detection in detections:
                    if detection.get("pattern_id") != pattern_id:
                        continue
                    metrics = detection.get("metrics", {})
                    evidence = detection.get("evidence", {})
                    for example in evidence.get("examples", []):
                        examples.append(
                            {
                                "patient_id": patient_id,
                                "example": example,
                                "metrics": metrics,
                                "source_file": path,
                            }
                        )
    return examples


def _fetch_day(patient_id: str, target_date: date) -> Optional[CGMDay]:
    start = datetime.combine(target_date, time.min, tzinfo=timezone.utc)
    end = start + timedelta(days=1)
    for day in iter_cgm_days(patient_id, start=start, end=end):
        if day.service_date == target_date:
            return day
    return None


def _day_frame(day: CGMDay) -> pd.DataFrame:
    df = day.readings.copy()
    df["timestamp"] = pd.to_datetime(df["timestamp"], utc=True)
    if day.local_timezone:
        try:
            df["local_time"] = df["timestamp"].dt.tz_convert(day.local_timezone)
        except Exception:
            df["local_time"] = df["timestamp"]
    else:
        df["local_time"] = df["timestamp"]
    return df.sort_values("local_time")


def _window_slice(df: pd.DataFrame, start_hour: float, end_hour: float, padding_minutes: float = 0.0) -> pd.DataFrame:
    if df.empty:
        return df.iloc[0:0]
    base_time = df["local_time"].iloc[0]
    midnight = base_time.replace(hour=0, minute=0, second=0, microsecond=0)
    start = midnight + timedelta(hours=start_hour) - timedelta(minutes=padding_minutes)
    end = midnight + timedelta(hours=end_hour) + timedelta(minutes=padding_minutes)
    mask = (df["local_time"] >= start) & (df["local_time"] <= end)
    return df.loc[mask].copy()


def _relative_minutes(df: pd.DataFrame, start_hour: float) -> pd.Series:
    if df.empty:
        return pd.Series(dtype=float)
    base_time = df["local_time"].iloc[0]
    midnight = base_time.replace(hour=0, minute=0, second=0, microsecond=0)
    window_start = midnight + timedelta(hours=start_hour)
    return (df["local_time"] - window_start).dt.total_seconds() / 60.0


def build_average_profile(frames: List[pd.DataFrame], duration_hours: float, freq_minutes: int = 5) -> pd.DataFrame:
    if not frames:
        return pd.DataFrame()
    duration_minutes = int(duration_hours * 60)
    grid = np.arange(0, duration_minutes + 1, freq_minutes)
    interpolated = []
    for frame in frames:
        rel = frame.loc[:, "relative_minutes"].to_numpy()
        glucose = frame.loc[:, "glucose_mg_dL"].to_numpy()
        if len(rel) < 2:
            continue
        order = np.argsort(rel)
        rel = rel[order]
        glucose = glucose[order]
        interp = np.interp(grid, rel, glucose, left=np.nan, right=np.nan)
        interpolated.append(interp)
    if not interpolated:
        return pd.DataFrame()
    stacked = np.vstack(interpolated)
    mean = np.nanmean(stacked, axis=0)
    count = np.sum(~np.isnan(stacked), axis=0)
    return pd.DataFrame({
        "relative_minutes": grid,
        "mean_glucose": mean,
        "sample_count": count,
    })


def align_on_nadir(frames, freq_minutes=5):
    shifted_data = []
    min_shift = float('inf')
    max_shift = float('-inf')
    for frame in frames:
        rel = frame.loc[:, "relative_minutes"].to_numpy()
        glucose = frame.loc[:, "glucose_mg_dL"].to_numpy()
        if len(rel) < 3:
            continue
        nadir_idx = np.argmin(glucose)
        nadir_time = rel[nadir_idx]
        shifted = rel - nadir_time
        order = np.argsort(shifted)
        shifted = shifted[order]
        glucose = glucose[order]
        min_shift = min(min_shift, shifted[0])
        max_shift = max(max_shift, shifted[-1])
        shifted_data.append((shifted, glucose))
    if not shifted_data:
        return None, None
    grid = np.arange(np.floor(min_shift), np.ceil(max_shift) + freq_minutes, freq_minutes)
    aligned = []
    for shifted, glucose in shifted_data:
        aligned.append(np.interp(grid, shifted, glucose, left=np.nan, right=np.nan))
    return grid, np.vstack(aligned)


def summarize_profile(stacked):
    median = np.nanmedian(stacked, axis=0)
    p10 = np.nanpercentile(stacked, 10, axis=0)
    p90 = np.nanpercentile(stacked, 90, axis=0)
    count = np.sum(~np.isnan(stacked), axis=0)
    return median, p10, p90, count


def align_on_window_start(frames, duration_hours, freq_minutes=5):
    duration_minutes = int(duration_hours * 60)
    grid = np.arange(0, duration_minutes + freq_minutes, freq_minutes)
    aligned = []
    for frame in frames:
        rel = frame.loc[:, "relative_minutes"].to_numpy()
        glucose = frame.loc[:, "glucose_mg_dL"].to_numpy()
        if len(rel) < 2:
            continue
        order = np.argsort(rel)
        rel = rel[order]
        glucose = glucose[order]
        aligned.append(np.interp(grid, rel, glucose, left=np.nan, right=np.nan))
    if not aligned:
        return None, None
    return grid, np.vstack(aligned)


In [4]:
detection_paths = glob.glob(detections_glob, recursive=True)
examples = _load_detection_examples(pattern_id, detection_paths, patient_filter=patient_filter)
if not examples:
    raise ValueError("No detection examples found for the specified parameters.")
if max_examples is not None:
    examples = examples[:max_examples]
window_frames: List[pd.DataFrame] = []
window_metadata: List[dict] = []
window_start_hour = None
window_end_hour = None
for idx, payload in enumerate(examples, start=1):
    patient_id = payload["patient_id"]
    example = payload["example"]
    metrics = payload.get("metrics", {})
    start_hour = float(metrics.get("sleep_window_start", 0.0))
    end_hour = float(metrics.get("sleep_window_end", 6.0))
    if window_start_hour is None:
        window_start_hour = start_hour
        window_end_hour = end_hour
    service_date = _parse_service_date(example["service_date"])
    day = _fetch_day(patient_id, service_date)
    if day is None:
        print(f"No CGM day found for {patient_id} on {service_date}")
        continue
    frame = _day_frame(day)
    window_df = _window_slice(frame, start_hour, end_hour, padding_minutes=minutes_padding)
    if window_df.empty:
        print(f"Empty window for {patient_id} on {service_date}")
        continue
    window_df["relative_minutes"] = _relative_minutes(window_df, start_hour)
    window_df["glucose_mg_dL"] = pd.to_numeric(window_df["glucose_mg_dL"], errors="coerce")
    window_df = window_df.dropna(subset=["glucose_mg_dL", "relative_minutes"])
    if window_df.empty:
        continue
    window_frames.append(window_df[["relative_minutes", "glucose_mg_dL"]])
    window_metadata.append({
        "patient_id": patient_id,
        "service_date": service_date,
        "minutes_low": example.get("minutes_low"),
        "lowest_glucose": example.get("lowest_glucose"),
        "start_hour": start_hour,
        "end_hour": end_hour,
    })
    if idx % 50 == 0:
        print(f"Processed {idx} examples so far...")
print(f"Collected {len(window_frames)} windows from {len(examples)} detections.")


Processed 50 examples so far...
Processed 100 examples so far...
Processed 150 examples so far...
Processed 200 examples so far...
Collected 245 windows from 245 detections.


In [8]:

import plotly.io as pio
pio.renderers.default = "browser"
if window_frames and window_start_hour is not None and window_end_hour is not None:
    combined_df = pd.concat(window_frames, ignore_index=True)
    fig_overlay = go.Figure()
    for frame in window_frames:
        fig_overlay.add_trace(
            go.Scatter(
                x=frame["relative_minutes"],
                y=frame["glucose_mg_dL"],
                mode="lines",
                line=dict(width=1, color="rgba(0, 0, 255, 0.1)"),
                showlegend=False,
            )
        )
    fig_overlay.add_hrect(y0=0, y1=70, fillcolor="red", opacity=0.1, line_width=0)
    fig_overlay.update_layout(
        title="All nocturnal hypoglycemia windows",
        xaxis_title="Minutes since sleep window start",
        yaxis_title="Glucose (mg/dL)",
    )
    fig_overlay.show()

    fig_hist = go.Figure()
    fig_hist.add_trace(
        go.Histogram(
            x=combined_df["relative_minutes"],
            nbinsx=60,
            marker_color="rgba(0, 0, 255, 0.4)",
        )
    )
    fig_hist.update_layout(
        title="Distribution of minutes since sleep window start",
        xaxis_title="Minutes since window start",
        yaxis_title="Count",
    )
    fig_hist.show()

    fig_scatter = go.Figure(
        data=go.Scattergl(
            x=combined_df["relative_minutes"],
            y=combined_df["glucose_mg_dL"],
            mode="markers",
            marker=dict(size=3, opacity=0.1, color="blue"),
        )
    )
    fig_scatter.add_hrect(y0=0, y1=70, fillcolor="red", opacity=0.1, line_width=0)
    fig_scatter.update_layout(
        title="Scatter distribution of nocturnal hypoglycemia windows",
        xaxis_title="Minutes since window start",
        yaxis_title="Glucose (mg/dL)",
    )
    fig_scatter.show()

    fig_overlay_time = go.Figure()
    for frame in window_frames:
        fig_overlay_time.add_trace(
            go.Scatter(
                x=window_start_hour + frame["relative_minutes"] / 60.0,
                y=frame["glucose_mg_dL"]
                ,
                mode="lines",
                line=dict(width=1, color="rgba(0, 0, 255, 0.1)"),
                showlegend=False,
            )
        )
    fig_overlay_time.add_hrect(y0=0, y1=70, fillcolor="red", opacity=0.1, line_width=0)
    tickhours = [window_start_hour + offset / 60.0 for offset in range(0, int((window_end_hour - window_start_hour) * 60) + 1, 60)]
    ticklabels = [f"{int(hour % 24):02d}:00" for hour in tickhours]
    fig_overlay_time.update_layout(
        title="All nocturnal hypoglycemia windows | Clock time",
        xaxis_title="Clock time (hours)",
        xaxis=dict(tickmode="array", tickvals=tickhours, ticktext=ticklabels),
        yaxis_title="Glucose (mg/dL)",
    )
    fig_overlay_time.show()

    fig_hist_time = go.Figure()
    fig_hist_time.add_trace(
        go.Histogram(
            x=window_start_hour + combined_df["relative_minutes"] / 60.0,
            nbinsx=60,
            marker_color="rgba(0, 0, 255, 0.4)",
        )
    )
    fig_hist_time.update_layout(
        title="Distribution of clock time",
        xaxis_title="Clock time (hours)",
        yaxis_title="Count",
    )
    fig_hist_time.show()
else:
    print("No windows available for overlay plot.")


In [1]:

if window_frames and window_start_hour is not None and window_end_hour is not None:
    duration_hours = window_end_hour - window_start_hour
    cluster_metrics = []
    metadata_df = pd.DataFrame(window_metadata) if window_metadata else pd.DataFrame()

    grid_nadir, stacked_nadir = align_on_nadir(window_frames, freq_minutes=resample_minutes)
    if grid_nadir is None:
        print("No nadir-aligned data available for clustering.")
    else:
        clean_nadir = np.where(np.isnan(stacked_nadir), np.nanmedian(stacked_nadir, axis=0), stacked_nadir)
        clean_nadir = clean_nadir[:, ~np.all(np.isnan(clean_nadir), axis=0)]
        if clean_nadir.size == 0:
            print("All nadir-aligned columns are NaN; skipping nadir clustering.")
        else:
            col_mean_nadir = np.nanmean(clean_nadir, axis=0)
            clean_nadir = np.nan_to_num(clean_nadir, nan=col_mean_nadir)
            if clean_nadir.shape[0] >= n_clusters and not np.all(clean_nadir == clean_nadir[0]):
                labels_nadir = KMeans(n_clusters=n_clusters, random_state=42).fit_predict(clean_nadir)
                print(f"Nadir cluster assignments: {labels_nadir[:20]} ...")
                for cluster_id in range(n_clusters):
                    subset = stacked_nadir[labels_nadir == cluster_id]
                    if subset.size == 0:
                        continue
                    median, p10, p90, count = summarize_profile(subset)
                    fig = go.Figure()
                    fig.add_trace(go.Scatter(x=grid_nadir, y=median, mode='lines', name=f'Cluster {cluster_id + 1} median'))
                    fig.add_trace(go.Scatter(x=grid_nadir, y=p90, mode='lines', name='90th percentile', line=dict(dash='dash')))
                    fig.add_trace(go.Scatter(x=grid_nadir, y=p10, mode='lines', name='10th percentile', line=dict(dash='dash')))
                    fig.add_hrect(y0=0, y1=70, fillcolor='red', opacity=0.1, line_width=0)
                    fig.update_layout(
                        title=f"Nadir-aligned nocturnal hypoglycemia | Cluster {cluster_id + 1} (n={subset.shape[0]})",
                        xaxis_title="Minutes relative to nadir",
                        yaxis_title="Glucose (mg/dL)",
                    )
                    fig.show()
                    cluster_metrics.append({"alignment": "nadir", "cluster": cluster_id + 1, "count": subset.shape[0]})
                    if not metadata_df.empty:
                        idxs = np.where(labels_nadir == cluster_id)[0]
                        cluster_metrics[-1].update({
                            "minutes_low_mean": metadata_df.iloc[idxs]['minutes_low'].mean(),
                            "lowest_glucose_mean": metadata_df.iloc[idxs]['lowest_glucose'].mean(),
                        })
            else:
                print("Not enough nadir-aligned data for clustering.")

    grid_start, stacked_start = align_on_window_start(window_frames, duration_hours, freq_minutes=resample_minutes)
    if grid_start is None:
        print("No window-start aligned data available for clustering.")
    else:
        clean_start = np.where(np.isnan(stacked_start), np.nanmedian(stacked_start, axis=0), stacked_start)
        clean_start = clean_start[:, ~np.all(np.isnan(clean_start), axis=0)]
        if clean_start.size == 0:
            print("All window-start columns are NaN; skipping window-start clustering.")
        else:
            col_mean_start = np.nanmean(clean_start, axis=0)
            clean_start = np.nan_to_num(clean_start, nan=col_mean_start)
            if clean_start.shape[0] >= n_clusters and not np.all(clean_start == clean_start[0]):
                labels_start = KMeans(n_clusters=n_clusters, random_state=42).fit_predict(clean_start)
                print(f"Window-start cluster assignments: {labels_start[:20]} ...")
                for cluster_id in range(n_clusters):
                    subset = stacked_start[labels_start == cluster_id]
                    if subset.size == 0:
                        continue
                    median, p10, p90, count = summarize_profile(subset)
                    fig = go.Figure()
                    fig.add_trace(go.Scatter(x=grid_start, y=median, mode='lines', name=f'Cluster {cluster_id + 1} median'))
                    fig.add_trace(go.Scatter(x=grid_start, y=p90, mode='lines', name='90th percentile', line=dict(dash='dash')))
                    fig.add_trace(go.Scatter(x=grid_start, y=p10, mode='lines', name='10th percentile', line=dict(dash='dash')))
                    fig.add_hrect(y0=0, y1=70, fillcolor='red', opacity=0.1, line_width=0)
                    fig.update_layout(
                        title=f"Window-start aligned nocturnal hypoglycemia | Cluster {cluster_id + 1} (n={subset.shape[0]})",
                        xaxis_title="Minutes since window start",
                        yaxis_title="Glucose (mg/dL)",
                    )
                    fig.show()
                    cluster_metrics.append({"alignment": "window_start", "cluster": cluster_id + 1, "count": subset.shape[0]})
                    if not metadata_df.empty:
                        idxs = np.where(labels_start == cluster_id)[0]
                        cluster_metrics[-1].update({
                            "minutes_low_mean": metadata_df.iloc[idxs]['minutes_low'].mean(),
                            "lowest_glucose_mean": metadata_df.iloc[idxs]['lowest_glucose'].mean(),
                        })
            else:
                print("Not enough window-start data for clustering.")

    if cluster_metrics:
        cluster_summary_df = pd.DataFrame(cluster_metrics)
        print("Cluster summary (counts and average metrics):")
        display(cluster_summary_df)
else:
    print("No frames available for clustering.")


NameError: name 'window_frames' is not defined

In [None]:
if window_frames and window_start_hour is not None and window_end_hour is not None:
    duration_hours = window_end_hour - window_start_hour
    metadata_df = pd.DataFrame(window_metadata) if 'window_metadata' in globals() else pd.DataFrame()
    cluster_metrics = []
    grid_nadir, stacked_nadir = align_on_nadir(window_frames, freq_minutes=resample_minutes)
    if grid_nadir is None:
        print("No nadir-aligned data available for clustering.")
    else:
        clean_nadir = np.where(np.isnan(stacked_nadir), np.nanmedian(stacked_nadir, axis=0), stacked_nadir)
        clean_nadir = clean_nadir[:, ~np.all(np.isnan(clean_nadir), axis=0)]
        if clean_nadir.size == 0:
            print("All nadir-aligned columns are NaN; skipping nadir clustering.")
        else:
            col_mean_nadir = np.nanmean(clean_nadir, axis=0)
            clean_nadir = np.nan_to_num(clean_nadir, nan=col_mean_nadir)
            if clean_nadir.shape[0] >= n_clusters and not np.all(clean_nadir == clean_nadir[0]):
                labels_nadir = KMeans(n_clusters=n_clusters, random_state=42).fit_predict(clean_nadir)
                print(f"Nadir cluster assignments: {labels_nadir[:20]} ...")
                for cluster_id in range(n_clusters):
                    idxs = np.where(labels_nadir == cluster_id)[0]
                    subset = stacked_nadir[idxs]
                    if subset.size == 0:
                        continue
                    median, p10, p90, count = summarize_profile(subset)
                    fig = go.Figure()
                    fig.add_trace(go.Scatter(x=grid_nadir, y=median, mode='lines', name=f'Cluster {cluster_id + 1} median'))
                    fig.add_trace(go.Scatter(x=grid_nadir, y=p90, mode='lines', name='90th percentile', line=dict(dash='dash')))
                    fig.add_trace(go.Scatter(x=grid_nadir, y=p10, mode='lines', name='10th percentile', line=dict(dash='dash')))
                    fig.add_hrect(y0=0, y1=70, fillcolor='red', opacity=0.1, line_width=0)
                    fig.update_layout(
                        title=f"Nadir-aligned nocturnal hypoglycemia | Cluster {cluster_id + 1} (n={subset.shape[0]})",
                        xaxis_title="Minutes relative to nadir",
                        yaxis_title="Glucose (mg/dL)",
                    )
                    fig.show()
                    metrics = {"alignment": "nadir", "cluster": cluster_id + 1, "count": subset.shape[0]}
                    if not metadata_df.empty:
                        metrics["minutes_low_mean"] = metadata_df.iloc[idxs]['minutes_low'].mean()
                        metrics["lowest_glucose_mean"] = metadata_df.iloc[idxs]['lowest_glucose'].mean()
                    cluster_metrics.append(metrics)
            else:
                print("Not enough nadir-aligned data for clustering.")
    grid_start, stacked_start = align_on_window_start(window_frames, duration_hours, freq_minutes=resample_minutes)
    if grid_start is None:
        print("No window-start aligned data available for clustering.")
    else:
        clean_start = np.where(np.isnan(stacked_start), np.nanmedian(stacked_start, axis=0), stacked_start)
        clean_start = clean_start[:, ~np.all(np.isnan(clean_start), axis=0)]
        if clean_start.size == 0:
            print("All window-start columns are NaN; skipping window-start clustering.")
        else:
            col_mean_start = np.nanmean(clean_start, axis=0)
            clean_start = np.nan_to_num(clean_start, nan=col_mean_start)
            if clean_start.shape[0] >= n_clusters and not np.all(clean_start == clean_start[0]):
                labels_start = KMeans(n_clusters=n_clusters, random_state=42).fit_predict(clean_start)
                print(f"Window-start cluster assignments: {labels_start[:20]} ...")
                for cluster_id in range(n_clusters):
                    idxs = np.where(labels_start == cluster_id)[0]
                    subset = stacked_start[idxs]
                    if subset.size == 0:
                        continue
                    median, p10, p90, count = summarize_profile(subset)
                    fig = go.Figure()
                    fig.add_trace(go.Scatter(x=grid_start, y=median, mode='lines', name=f'Cluster {cluster_id + 1} median'))
                    fig.add_trace(go.Scatter(x=grid_start, y=p90, mode='lines', name='90th percentile', line=dict(dash='dash')))
                    fig.add_trace(go.Scatter(x=grid_start, y=p10, mode='lines', name='10th percentile', line=dict(dash='dash')))
                    fig.add_hrect(y0=0, y1=70, fillcolor='red', opacity=0.1, line_width=0)
                    fig.update_layout(
                        title=f"Window-start aligned nocturnal hypoglycemia | Cluster {cluster_id + 1} (n={subset.shape[0]})",
                        xaxis_title="Minutes since window start",
                        yaxis_title="Glucose (mg/dL)",
                    )
                    fig.show()
                    metrics = {"alignment": "window_start", "cluster": cluster_id + 1, "count": subset.shape[0]}
                    if not metadata_df.empty:
                        metrics["minutes_low_mean"] = metadata_df.iloc[idxs]['minutes_low'].mean()
                        metrics["lowest_glucose_mean"] = metadata_df.iloc[idxs]['lowest_glucose'].mean()
                    cluster_metrics.append(metrics)
            else:
                print("Not enough window-start data for clustering.")
    if cluster_metrics:
        cluster_summary_df = pd.DataFrame(cluster_metrics)
        display(cluster_summary_df)
    else:
        print("No cluster metrics computed.")
else:
    print("No frames available for clustering.")


In [None]:

if 'cluster_summary_df' in globals():
    print("Counts by alignment and cluster:")
    display(cluster_summary_df.groupby(['alignment', 'cluster'])['count'].sum())
else:
    print("No cluster summary available.")

