# Pattern Debugger Notebook

Use this notebook to inspect why a specific CGM pattern did or did not trigger for a patient.
Update the parameters below, run the cells in order, and review the metrics/evidence that each
analysis day produced.


In [1]:
# --- Parameters ---
pattern_id = "overnight_hyperglycemia"  
patient_id = "6663751ba288866831d13caf"
start_date = "2025-01-01"  
end_date = "2025-10-22"    
analysis_days = 5          


In [2]:
# Optional: load environment variables (tokens, etc.) from a .env file if present.
try:
    from dotenv import load_dotenv
    load_dotenv()
except ModuleNotFoundError:
    pass
import os
import sys

repo_root = os.path.abspath("..")  # adjust if your notebook isn't one level down
if repo_root not in sys.path:
    sys.path.insert(0, repo_root)

In [3]:
from __future__ import annotations

from datetime import datetime, timezone
from typing import Optional

import pandas as pd

from cgm_patterns.CGM_fetcher import iter_cgm_days
from cgm_patterns.engine import SlidingWindowEngine
from cgm_patterns.models import PatternStatus
from cgm_patterns.registry import registry
import cgm_patterns.rules  # ensures rules are registered


class NotebookCGMSource:
    """Adapter that feeds CGM days from iter_cgm_days into the sliding window engine."""

    def __init__(self, start: Optional[datetime] = None, end: Optional[datetime] = None) -> None:
        self._start = start
        self._end = end

    def iter_days(self, patient_id: str):
        yield from iter_cgm_days(patient_id, start=self._start, end=self._end)


def _parse_date(value: Optional[str]) -> Optional[datetime]:
    if value in (None, "", "None"):
        return None
    if isinstance(value, datetime):
        return value
    parsed = datetime.fromisoformat(value)
    if parsed.tzinfo is None:
        parsed = parsed.replace(tzinfo=timezone.utc)
    return parsed


def run_pattern_analysis(
    pattern_id: str,
    patient_id: str,
    *,
    start: Optional[str] = None,
    end: Optional[str] = None,
    analysis_days: int = 14,
    validation_days: int = 30,
):
    if pattern_id not in dict(registry.items()):
        raise ValueError(f"Pattern '{pattern_id}' is not registered. Check the ID and try again.")

    start_dt = _parse_date(start)
    end_dt = _parse_date(end)

    engine = SlidingWindowEngine(
        NotebookCGMSource(start=start_dt, end=end_dt),
        registry,
        analysis_days=analysis_days,
        validation_days=validation_days,
    )

    detections_by_date = engine.run_patient(
        patient_id,
        rule_filter=lambda rule: rule.id == pattern_id,
    )

    rows = []
    for analysis_date, detections in sorted(detections_by_date.items()):
        detection = detections[0] if detections else None
        if detection is None:
            continue
        rows.append(
            {
                "analysis_date": analysis_date.isoformat(),
                "status": detection.status.value,
                "confidence": detection.confidence,
                "metrics": dict(detection.metrics),
                "evidence": dict(detection.evidence),
            }
        )

    return pd.DataFrame(rows)


In [4]:
results_df = run_pattern_analysis(
    pattern_id=pattern_id,
    patient_id=patient_id,
    start=start_date,
    end=end_date,
    analysis_days=analysis_days,
)

if results_df.empty:
    print("No detections returned. Check CGM availability, date range, or parameters.")
else:
    display(results_df)


KeyboardInterrupt: 

In [58]:
print("Daily coverage ratios in the selected range:")
coverage_rows = []
source = NotebookCGMSource(start=_parse_date(start_date), end=_parse_date(end_date))
for day in source.iter_days(patient_id):
    coverage_rows.append({
        "service_date": day.service_date.isoformat(),
        "coverage_ratio": day.coverage_ratio(),
        "reading_count": len(day.readings),
    })
coverage_df = pd.DataFrame(coverage_rows)
if coverage_df.empty:
    print("No CGM days returned for this patient/date window.")
else:
    display(coverage_df.sort_values("service_date"))


Daily coverage ratios in the selected range:


Unnamed: 0,service_date,coverage_ratio,reading_count
0,2025-04-16,0.333333,96
1,2025-04-16,0.333333,96
2,2025-04-17,0.333333,96
3,2025-04-17,0.333333,96
4,2025-04-18,0.333333,96
...,...,...,...
373,2025-10-19,0.333333,96
374,2025-10-20,0.333333,96
375,2025-10-20,0.333333,96
376,2025-10-21,0.236111,68


In [None]:
if not results_df.empty:
    metrics_data = results_df["metrics"].apply(lambda m: m or {})
    metrics_expanded = pd.concat(
        [
            results_df.drop(columns=["metrics", "evidence"]),
            pd.json_normalize(metrics_data).add_prefix("metric."),
        ],
        axis=1,
    )
    display(metrics_expanded)

    status_counts = metrics_expanded["status"].value_counts()
    print("\nStatus counts across analysis dates:")
    display(status_counts)

    not_detected = metrics_expanded[metrics_expanded["status"] != PatternStatus.DETECTED.value]
    if not not_detected.empty:
        print("\nAnalysis dates where the pattern did not detect:")
        display(not_detected)

    evidence_series = results_df[["analysis_date", "evidence"]]
    print("\nEvidence payloads per analysis date:")
    display(evidence_series)


In [None]:
# Optional: inspect pattern metadata and defaults
rule = registry.get(pattern_id)
print(f"Pattern description: {rule.description}")
if hasattr(rule, "metadata") and rule.metadata:
    display(rule.metadata)
