# Afternoon Hypoglycemia Batch Visualizer

This notebook scans detection exports, finds every `afternoon_hypoglycemia` example,
fetches the corresponding CGM traces, plots each window, and builds an aggregated
profile to show the average afternoon low pattern.


In [1]:
# --- Parameters: edit these values ---
pattern_id = "afternoon_hypoglycemia"
detections_glob = "/Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/**/*.json"
patient_filter = None
max_examples = None
minutes_padding = 30
resample_minutes = 5
batch_size = 200


In [2]:
import glob
import json
import os
import sys
from datetime import datetime, date, time, timedelta, timezone
from typing import Any, Dict, Iterable, List, Optional, Tuple
from pathlib import Path

import numpy as np
import pandas as pd
import plotly.graph_objects as go

repo_root = os.path.abspath("..")
if repo_root not in sys.path:
    sys.path.insert(0, repo_root)

from cgm_patterns.CGM_fetcher import iter_cgm_days
from cgm_patterns.models import CGMDay


In [3]:

import glob
sorted(glob.glob(detections_glob, recursive=True))


['/Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/0_detections.json',
 '/Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/v1_1-10_detections.json',
 '/Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/v1_11-20_detections.json',
 '/Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/v1_111_175_detections.json',
 '/Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/v1_21_50_detections.json',
 '/Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/v1_51_80_detections.json',
 '/Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/v1_81_110_detections.json']

In [4]:
def _parse_service_date(value: str) -> date:
    return datetime.fromisoformat(value).date()


def _load_detection_examples(pattern_id: str, paths: Iterable[str], patient_filter: Optional[str] = None) -> List[Dict[str, Any]]:
    examples: List[Dict[str, Any]] = []
    for path in paths:
        try:
            payload = json.loads(Path(path).read_text())
        except FileNotFoundError:
            continue
        for patient_id, patient_payload in payload.items():
            if patient_filter and patient_id != patient_filter:
                continue
            detections_by_date = patient_payload.get("detections", {})
            for _, detections in detections_by_date.items():
                for detection in detections:
                    if detection.get("pattern_id") != pattern_id:
                        continue
                    metrics = detection.get("metrics", {})
                    evidence = detection.get("evidence", {})
                    for example in evidence.get("examples", []):
                        examples.append(
                            {
                                "patient_id": patient_id,
                                "example": example,
                                "metrics": metrics,
                                "source_file": path,
                            }
                        )
    return examples


def _fetch_day(patient_id: str, target_date: date) -> Optional[CGMDay]:
    start = datetime.combine(target_date, time.min, tzinfo=timezone.utc)
    end = start + timedelta(days=1)
    for day in iter_cgm_days(patient_id, start=start, end=end):
        if day.service_date == target_date:
            return day
    return None


def _day_frame(day: CGMDay) -> pd.DataFrame:
    df = day.readings.copy()
    df["timestamp"] = pd.to_datetime(df["timestamp"], utc=True)
    if day.local_timezone:
        try:
            df["local_time"] = df["timestamp"].dt.tz_convert(day.local_timezone)
        except Exception:
            df["local_time"] = df["timestamp"]
    else:
        df["local_time"] = df["timestamp"]
    return df.sort_values("local_time")


def _window_slice(df: pd.DataFrame, target_date: date, start_hour: float, end_hour: float, padding_minutes: float = 0.0) -> pd.DataFrame:
    if df.empty:
        return df.iloc[0:0]
    base_time = df["local_time"].iloc[0]
    if pd.isna(base_time):
        return df.iloc[0:0]
    midnight = base_time.replace(hour=0, minute=0, second=0, microsecond=0)
    start = midnight + timedelta(hours=start_hour) - timedelta(minutes=padding_minutes)
    end = midnight + timedelta(hours=end_hour) + timedelta(minutes=padding_minutes)
    mask = (df["local_time"] >= start) & (df["local_time"] <= end)
    return df.loc[mask].copy()


def _relative_minutes(df: pd.DataFrame, target_date: date, window_start_hour: float) -> pd.Series:
    if df.empty:
        return pd.Series(dtype=float)
    base_time = df["local_time"].iloc[0]
    midnight = base_time.replace(hour=0, minute=0, second=0, microsecond=0)
    window_start = midnight + timedelta(hours=window_start_hour)
    return (df["local_time"] - window_start).dt.total_seconds() / 60.0


def plot_example(df: pd.DataFrame, example: Dict[str, Any], output_path: Optional[Path] = None) -> None:
    if df.empty:
        print("No CGM data for example", example)
        return
    fig = go.Figure()
    fig.add_trace(
        go.Scatter(
            x=df["local_time"],
            y=df["glucose_mg_dL"],
            mode="lines+markers",
            name="CGM",
        )
    )
    fig.add_hrect(y0=0, y1=70, fillcolor="red", opacity=0.1, line_width=0)
    fig.update_layout(
        title=f"Afternoon hypoglycemia | {example.get('service_date')}",
        xaxis_title="Local time",
        yaxis_title="Glucose (mg/dL)",
    )
    if output_path:
        output_path.parent.mkdir(parents=True, exist_ok=True)
        fig.write_html(str(output_path))
    fig.show()


def build_average_profile(frames: List[pd.DataFrame], window_start_hour: float, window_end_hour: float, freq_minutes: int = 5) -> pd.DataFrame:
    if not frames:
        return pd.DataFrame()
    duration_minutes = int((window_end_hour - window_start_hour) * 60)
    grid = np.arange(0, duration_minutes + 1, freq_minutes)
    interpolated = []
    for frame in frames:
        rel = frame.loc[:, "relative_minutes"].to_numpy()
        glucose = frame.loc[:, "glucose_mg_dL"].to_numpy()
        if len(rel) < 2:
            continue
        order = np.argsort(rel)
        rel = rel[order]
        glucose = glucose[order]
        interp = np.interp(grid, rel, glucose, left=np.nan, right=np.nan)
        interpolated.append(interp)
    if not interpolated:
        return pd.DataFrame()
    stacked = np.vstack(interpolated)
    mean = np.nanmean(stacked, axis=0)
    count = np.sum(~np.isnan(stacked), axis=0)
    return pd.DataFrame({
        "relative_minutes": grid,
        "mean_glucose": mean,
        "sample_count": count,
    })


def plot_average_profile(avg_df: pd.DataFrame, window_start_hour: float) -> None:
    if avg_df.empty:
        print("No data for aggregated profile.")
        return
    timestamps = [window_start_hour * 60 + m for m in avg_df["relative_minutes"]]
    fig = go.Figure()
    fig.add_trace(
        go.Scatter(
            x=avg_df["relative_minutes"],
            y=avg_df["mean_glucose"],
            mode="lines+markers",
            name="Average glucose",
        )
    )
    fig.add_hrect(y0=0, y1=70, fillcolor="red", opacity=0.1, line_width=0)
    fig.update_layout(
        title="Average afternoon hypoglycemia profile",
        xaxis_title="Minutes since window start",
        yaxis_title="Glucose (mg/dL)",
    )
    fig.show()


In [5]:
import json
import glob
from pathlib import Path

detections_glob = "/Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/**/*.json"

for path in glob.glob(detections_glob, recursive=True):
    data = json.loads(Path(path).read_text())
    if "afternoon_hypoglycemia" in json.dumps(data):
        print("Found in", path)

Found in /Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/v1_51_80_detections.json
Found in /Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/v1_11-20_detections.json
Found in /Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/v1_81_110_detections.json
Found in /Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/v1_111_175_detections.json
Found in /Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/v1_21_50_detections.json
Found in /Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/v1_1-10_detections.json
Found in /Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/0_detections.json


In [6]:
detection_paths = glob.glob(detections_glob, recursive=True)
examples = _load_detection_examples(pattern_id, detection_paths, patient_filter=patient_filter)
if not examples:
    raise ValueError("No detection examples found for the specified parameters.")

if max_examples is not None:
    examples = examples[:max_examples]

window_frames: List[pd.DataFrame] = []
window_start_hour = None
window_end_hour = None

for idx, payload in enumerate(examples, start=1):
    patient_id = payload["patient_id"]
    example = payload["example"]
    metrics = payload.get("metrics", {})
    start_hour = float(metrics.get("window_start_hour", 12.0))
    end_hour = float(metrics.get("window_end_hour", 17.0))
    if window_start_hour is None:
        window_start_hour = start_hour
        window_end_hour = end_hour

    service_date = _parse_service_date(example["service_date"])
    day = _fetch_day(patient_id, service_date)
    if day is None:
        print(f"No CGM day found for {patient_id} on {service_date}")
        continue

    frame = _day_frame(day)
    window_df = _window_slice(frame, service_date, start_hour, end_hour, padding_minutes=minutes_padding)
    if window_df.empty:
        print(f"Empty window for {patient_id} on {service_date}")
        continue

    window_df["relative_minutes"] = _relative_minutes(window_df, service_date, start_hour)
    window_df["glucose_mg_dL"] = pd.to_numeric(window_df["glucose_mg_dL"], errors="coerce")
    window_df = window_df.dropna(subset=["glucose_mg_dL", "relative_minutes"])
    if window_df.empty:
        continue

    window_frames.append(window_df[["relative_minutes", "glucose_mg_dL"]])
    if idx % 50 == 0:
        print(f"Processed {idx} examples so far...")



Processed 50 examples so far...
Processed 100 examples so far...
Processed 150 examples so far...
Processed 200 examples so far...
Processed 250 examples so far...
Processed 300 examples so far...
Processed 350 examples so far...
Processed 400 examples so far...
Processed 450 examples so far...
Processed 500 examples so far...
Processed 550 examples so far...
Processed 600 examples so far...
Processed 650 examples so far...
Processed 700 examples so far...
Processed 750 examples so far...
Processed 800 examples so far...
Processed 850 examples so far...
Processed 900 examples so far...
Processed 950 examples so far...
Processed 1000 examples so far...
Processed 1050 examples so far...
Processed 1100 examples so far...
Processed 1150 examples so far...
Processed 1200 examples so far...
Processed 1250 examples so far...
Processed 1300 examples so far...
Processed 1350 examples so far...
Processed 1400 examples so far...
Processed 1450 examples so far...
Processed 1500 examples so far...


In [14]:
!pip install nbformat==4.3.0

Collecting nbformat==4.3.0
  Downloading nbformat-4.3.0-py2.py3-none-any.whl.metadata (1.1 kB)
Downloading nbformat-4.3.0-py2.py3-none-any.whl (154 kB)
Installing collected packages: nbformat
  Attempting uninstall: nbformat
    Found existing installation: nbformat 4.2.0
    Uninstalling nbformat-4.2.0:
      Successfully uninstalled nbformat-4.2.0
Successfully installed nbformat-4.3.0


In [19]:
import plotly.io as pio
pio.renderers.default = "browser"
if window_frames:
    fig_overlay = go.Figure()
    for frame in window_frames:
        fig_overlay.add_trace(
            go.Scatter(
                x=frame["relative_minutes"],
                y=frame["glucose_mg_dL"],
                mode="lines",
                line=dict(width=1, color="rgba(0, 0, 255, 0.1)"),
                showlegend=False,
            )
        )
    fig_overlay.add_hrect(y0=0, y1=70, fillcolor="red", opacity=0.1, line_width=0)
    fig_overlay.update_layout(
        title="All afternoon hypoglycemia windows",
        xaxis_title="Minutes since window start",
        yaxis_title="Glucose (mg/dL)",
    )
    fig_overlay.show()
else:
    print("No windows available for overlay plot.")


In [18]:
if window_frames and window_start_hour is not None and window_end_hour is not None:
    for example_start in range(0, len(window_frames), batch_size):
        batch_frames = window_frames[example_start:example_start + batch_size]
        avg_df = build_average_profile(batch_frames, window_start_hour, window_end_hour, freq_minutes=resample_minutes)
        if avg_df.empty:
            print(f"No data for batch {example_start // batch_size + 1}.")
            continue
        fig = go.Figure()
        fig.add_trace(
            go.Scatter(
                x=avg_df["relative_minutes"],
                y=avg_df["mean_glucose"],
                mode="lines+markers",
                name="Average glucose",
            )
        )
        fig.add_hrect(y0=0, y1=70, fillcolor="red", opacity=0.1, line_width=0)
        fig.update_layout(
            title=f"Average afternoon hypoglycemia profile | Batch {example_start // batch_size + 1}",
            xaxis_title="Minutes since window start",
            yaxis_title="Glucose (mg/dL)",
        )
        fig.show()
else:
    print("No frames available for aggregation.")



Mean of empty slice

