In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from patsy import dmatrices
from statsmodels.stats.multitest import multipletests
from scipy import stats
from tableone import TableOne
# Hide annoying ipykernel warnings
import warnings;
warnings.filterwarnings('ignore');

# Day and Night HbA1c

In [None]:
# Import data - run GitHub/BDC-Code/Viral Shah/Day and Night CGM/clean_cgms.py first
wd = '/Users/timvigers/Dropbox/Work/Viral Shah/Day and Night CGM/'
df = pd.read_csv(wd+'Data_Clean/analysis_data.csv')

# Data Characteristics

- Participants with no CGM data 2 weeks prior to most recent HbA1c were removed.
- Several participants' CGM files did not contain glucose data.
- Mean glucose was used in place of AUC because participants did not always have the exact same number of day and night observations, and night was defined as only 7 hours long compared to 17 hours of daytime. Mean glucose and AUC per hour are essentially equivalent.

## Table 1

In [None]:
cgm_vars = ['14 Day Mean','14 Day TIR','14 Night Mean','14 Night TIR',
            '30 Day Mean','30 Day TIR','30 Night Mean','30 Night TIR',
            '60 Day Mean','60 Day TIR','60 Night Mean','60 Night TIR',
            '90 Day Mean','90 Day TIR','90 Night Mean','90 Night TIR']
cols = ['Gender','Insulin','Age','HbA1c']+cgm_vars
# Print
t1 = TableOne(df,columns=cols)
t1

## Variable Plots

All variables appear to be reasonably Gaussian, although this is not a requirement for Pearson's correlation anyway.

In [None]:
fig = plt.figure(figsize = (15,20))
ax = fig.gca()
df.hist(ax = ax);# Semicolon hides the matplotlib descriptions in Jupyter

# Correlation with HbA1c

All following p values were FDR-adjusted for multiple comparisons.

## Full Cohort

All CGM metrics were significantly correlated with HbA1c. Correlations were not as strong for nighttime variables. Correlation between mean glucose and HbA1c was slightly better than the correlation between TIR and HbA1c, but generally they seem to be comparable.

In [None]:
# Correlation for each CGM variable with HbA1c
res = {"Correlation":[],"p":[]}
for v in cgm_vars:
    res['Correlation'].append(stats.pearsonr(x = df[v],y = df['HbA1c'])[0])
    res['p'].append(stats.pearsonr(x = df[v],y = df['HbA1c'])[1])
res=pd.DataFrame(data=res)
res.index = cgm_vars
# Adjust p values
res['p'] = multipletests(res['p'],alpha=0.05,method='fdr_bh')[1]
# Format
res['p'] = ["< 0.001" if p < 0.001 else p for p in res['p']]
res['Correlation'] = round(res['Correlation'],3)
res

## Stratified by HbA1c Group

Participants were grouped into three HbA1c categories: < 7%, 7.1-8.5% and > 8.5%:

In [None]:
# Categorize HbA1c
df['HbA1c Group'] = pd.cut(df['HbA1c'],[0,7,8.5,20],labels=['< 7%','7-8.5%','> 8.5%'])
display(pd.DataFrame(df['HbA1c Group'].value_counts()))

In [None]:
# Model formula
for g in df['HbA1c Group'].unique():
    res = {"Correlation":[],"p":[]}
    df_cat = df.loc[df['HbA1c Group'] == g]
    for v in cgm_vars:
        res['Correlation'].append(stats.pearsonr(x = df_cat[v],y = df_cat['HbA1c'])[0])
        res['p'].append(stats.pearsonr(x = df_cat[v],y = df_cat['HbA1c'])[1])
    res=pd.DataFrame(data=res)
    res.index = cgm_vars
    # Adjust p values
    res['p'] = multipletests(res['p'],alpha=0.05,method='fdr_bh')[1]
    # Format
    res['p'] = ["< 0.001" if p < 0.001 else p for p in res['p']]
    res['Correlation'] = round(res['Correlation'],3)
    print('HbA1c group: '+g)
    display(pd.DataFrame(res))