# Combined Pattern Summary
Explore detections across multiple JSON files using the shared helpers under `scripts/`.

In [49]:
from pathlib import Path
import sys

import pandas as pd

REPO_ROOT = Path('..').resolve()
SCRIPTS_PATH = REPO_ROOT / 'scripts'
if str(SCRIPTS_PATH) not in sys.path:
    sys.path.insert(0, str(SCRIPTS_PATH))

from report_utils import (
    build_report,
    iter_cohort_pattern_rows,
    load_detections,
    report_to_rows,
)


In [50]:
detections_dir = REPO_ROOT / 'detection_v1'
detection_files = sorted(detections_dir.glob('*_detections.json'))
detection_files

[PosixPath('/Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/v1_1-10_detections.json'),
 PosixPath('/Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/v1_11-20_detections.json'),
 PosixPath('/Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/v1_111_175_detections.json'),
 PosixPath('/Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/v1_21_50_detections.json'),
 PosixPath('/Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/v1_51_80_detections.json'),
 PosixPath('/Users/ellaquan/Downloads/cgm_pattern_lib/detection_v1/v1_81_110_detections.json')]

In [51]:
combined_data = {}
for path in detection_files:
    combined_data.update(load_detections(path))
len(combined_data)

175

In [53]:
patient_ids = sorted(combined_data)
report = build_report(combined_data, patient_ids)
rows = report_to_rows(report)
patient_pattern_df = pd.DataFrame(rows)
patient_pattern_df = patient_pattern_df[
    ~patient_pattern_df['pattern_id'].isin(['recurrent_post_meal_spike', 'sensor_swap_step_change'])
]
patient_pattern_df

Unnamed: 0,patient_id,pattern_id,detections,days
0,5ce2ddba274fa00013919af8,day_to_day_variability,1,1
1,5ce2ddba274fa00013919af8,dual_peak,4,4
2,5ce2ddba274fa00013919af8,frequent_spike,2,2
3,5d153d8448b93400134746bc,afternoon_hypoglycemia,8,8
4,5d153d8448b93400134746bc,day_to_day_variability,1,1
...,...,...,...,...
1090,68d1751dfe0c216c23682de5,morning_hyperglycemia,24,24
1091,68d1751dfe0c216c23682de5,nocturnal_hypoglycemia_moderate,4,4
1092,68d1751dfe0c216c23682de5,nocturnal_hypoglycemia_severe,6,6
1093,68d1751dfe0c216c23682de5,overnight_hyperglycemia,11,11


In [63]:
from collections import defaultdict

pattern_dates = defaultdict(lambda: defaultdict(set))
for patient_id, block in combined_data.items():
    for service_date, entries in block.get('detections', {}).items():
        if not entries:
            continue
        for entry in entries:
            pattern_id = entry.get('pattern_id')
            if pattern_id:
                pattern_dates[patient_id][pattern_id].add(service_date)

patient_pattern_dates_df = pd.DataFrame(
    [
        {
            'patient_id': patient_id,
            'pattern_id': pattern_id,
            'days': len(dates),
            'dates': sorted(dates),
        }
        for patient_id, patterns in pattern_dates.items()
        for pattern_id, dates in patterns.items()
    ]
)

pd.set_option('display.max_colwidth', None)

patient_pattern_dates_df = patient_pattern_dates_df[
    patient_pattern_dates_df['pattern_id'] == 'persistent_hyperglycemia'
]
patient_pattern_dates_df

Unnamed: 0,patient_id,pattern_id,days,dates


In [55]:
summary_df = (
    patient_pattern_df.groupby('pattern_id')[['detections']]
    .sum()
    .sort_values('detections', ascending=False)
)
summary_df


Unnamed: 0_level_0,detections
pattern_id,Unnamed: 1_level_1
morning_hyperglycemia,6448
dual_peak,2673
frequent_spike,2594
persistent_hyperglycemia,2439
high_glycemic_variability,1996
overnight_hyperglycemia,1546
dawn_phenomenon,1186
day_to_day_variability,1148
nocturnal_hypoglycemia_moderate,829
nocturnal_hypoglycemia_severe,708


In [67]:
pattern_patient_df = patient_pattern_df[['pattern_id', 'patient_id']]
for pattern in pattern_patient_df['pattern_id'].unique():
    patients = pattern_patient_df.loc[pattern_patient_df['pattern_id'] == pattern, 'patient_id']
    print(f"{pattern}: {', '.join(patients)}")

pattern_patient_df

day_to_day_variability: 5ce2ddba274fa00013919af8, 5d153d8448b93400134746bc, 5d30acacc530770014f21463, 5d7148f5f9d673001361c375, 5d83a997892e7e0013912b5f, 5d8bafa53f797500152b6135, 5d936eec3f797500157701e3, 5dd746bf587db3001347ef16, 5df28953b2564b0014afa33c, 5e5da041c71c0700139e258a, 5e8ccb12f6c26e00132a5b2b, 5e8fa484f6c26e00132b3d62, 5e94e56ff6c26e00132c4256, 5e97591a350f7d29abad6580, 5ed96814bcf7710014acbfcf, 5f494a03019de515e8e25651, 5f494a0f019de515e8e25698, 5f494a4e019de515e8e257f3, 5f494a61019de515e8e2585f, 5f494a72019de515e8e258b9, 5f494b2f019de515e8e25cdc, 5f494b52019de515e8e25da7, 5f494b61019de515e8e25def, 5f527cb43a24eb0013307c4e, 5f6bd625ccc82900137bf0da, 5fc7f98886e57f001325eec6, 5ff3615d67978c00147ad7ea, 6007076b2b6db585fcb9e99d, 600f21b8bb37c80012d823ef, 60382ad614e3220013b748e2, 603a89360234d0001550fae9, 604ba2098dc6120013d45cde, 60b96b9358fd550012dfbfce, 60dce9410f1d01001371c2a2, 612ff19042d1e00012da3596, 6132b193f36f740013992af5, 61577728e2268f0012920280, 61a7cbbba200ce

Unnamed: 0,pattern_id,patient_id
0,day_to_day_variability,5ce2ddba274fa00013919af8
1,dual_peak,5ce2ddba274fa00013919af8
2,frequent_spike,5ce2ddba274fa00013919af8
3,afternoon_hypoglycemia,5d153d8448b93400134746bc
4,day_to_day_variability,5d153d8448b93400134746bc
...,...,...
1090,morning_hyperglycemia,68d1751dfe0c216c23682de5
1091,nocturnal_hypoglycemia_moderate,68d1751dfe0c216c23682de5
1092,nocturnal_hypoglycemia_severe,68d1751dfe0c216c23682de5
1093,overnight_hyperglycemia,68d1751dfe0c216c23682de5


In [48]:
patient_counts = (
    patient_pattern_df.groupby('pattern_id')['patient_id']
    .nunique()
    .sort_values(ascending=False)
    .rename('patient_count')
)
patient_counts.to_frame()


Unnamed: 0_level_0,patient_count
pattern_id,Unnamed: 1_level_1
day_to_day_variability,136
morning_hyperglycemia,127
dual_peak,117
frequent_spike,110
persistent_hyperglycemia,76
high_glycemic_variability,74
dawn_phenomenon,72
overnight_hyperglycemia,64
nocturnal_hypoglycemia_severe,53
nocturnal_hypoglycemia_moderate,50


In [13]:
cohort_df = pd.DataFrame(iter_cohort_pattern_rows(report))
cohort_df.sort_values('detections', ascending=False).head()


Unnamed: 0,pattern_id,detections,days
10,morning_hyperglycemia,1016,1016
15,persistent_hyperglycemia,559,559
3,dual_peak,421,421
7,frequent_spike,364,364
8,high_glycemic_variability,315,315


In [14]:
pivot = patient_pattern_df.pivot_table(
    index='patient_id',
    columns='pattern_id',
    values='detections',
    fill_value=0
)
pivot.head()


pattern_id,afternoon_hypoglycemia,dawn_phenomenon,day_to_day_variability,dual_peak,early_morning_hypoglycemia,evening_hypoglycemia,frequent_hypoglycemia,frequent_spike,high_glycemic_variability,mid_morning_hypoglycemia,morning_hyperglycemia,nocturnal_hypoglycemia_moderate,nocturnal_hypoglycemia_severe,overnight_compression_low,overnight_hyperglycemia,persistent_hyperglycemia,prebed_hypoglycemia,somogyi_effect
patient_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
5e97591a350f7d29abad6580,0.0,25.0,25.0,162.0,0.0,0.0,0.0,168.0,128.0,0.0,176.0,0.0,4.0,0.0,0.0,171.0,0.0,0.0
5f494a03019de515e8e25651,0.0,6.0,28.0,111.0,0.0,0.0,0.0,35.0,0.0,0.0,187.0,0.0,0.0,0.0,23.0,156.0,0.0,0.0
5f494b2f019de515e8e25cdc,38.0,44.0,25.0,5.0,12.0,28.0,11.0,14.0,5.0,8.0,0.0,80.0,15.0,4.0,0.0,0.0,20.0,0.0
645193ac60335a00138916f7,0.0,8.0,24.0,19.0,0.0,0.0,0.0,0.0,0.0,0.0,166.0,0.0,0.0,0.0,60.0,48.0,0.0,0.0
64d12900769f200014c57632,4.0,5.0,23.0,35.0,0.0,0.0,0.0,6.0,46.0,0.0,151.0,2.0,4.0,4.0,79.0,98.0,0.0,14.0
