In [None]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import seaborn as sns
import statsmodels.formula.api as smf
import statsmodels.api as sm
from patsy import dmatrices
from tableone import TableOne
wd = '/Volumes/PEDS/RI Biostatistics Core/Shared/Shared Projects/Laura/BDC/Projects/Viral Shah/Day and Night CGM/'
sns.set_theme(style="whitegrid")
sns.set_palette("coolwarm")

In [None]:
# Import data
df = pd.read_csv(wd+'Data_Clean/analysis_data.csv',usecols=['ID','Age','HbA1c',"14 Overall Mean","14 Overall TIR 70 - 140",
"14 Overall TIR 70 - 180","14 Overall TIR Over 180","14 Day Mean","14 Day TIR 70 - 140","14 Day TIR 70 - 180","14 Day TIR Over 180",
"14 Night Mean","14 Night TIR 70 - 140","14 Night TIR 70 - 180","14 Night TIR Over 180"])
df.columns = ['id','age','a1c','mbg','total_tir_70_140','total_tir_70_180','total_tir_over_180','day_mbg','day_tir_70_140','day_tir_70_180','day_tir_over_180',
'night_mbg',"night_tir_70_140","night_tir_70_180","night_tir_over_180"]
# Get row with most sensor values for each person in JDRF data
jdrf = pd.read_csv(wd+'Data_Clean/analysis_data_jdrf.csv')
idx = jdrf.groupby('id',sort=False)['sensor_readings'].transform(max) == jdrf['sensor_readings']
jdrf = jdrf[idx]
idx = jdrf["sensor_readings"] >= 2822 # 70% cutoff for 2 weeks of data
jdrf = jdrf[idx]
# Combine
jdrf = jdrf[['id','age','a1c','mbg','total_tir_70_140','total_tir_70_180','total_tir_over_180',
'day_mbg','day_tir_70_140','day_tir_70_180','day_tir_over_180',
'night_mbg',"night_tir_70_140","night_tir_70_180","night_tir_over_180"]]
df = pd.concat([df,jdrf])
df.reset_index(inplace=True,drop=True)

In [None]:
# Demographics
kaan_demo = pd.read_csv(wd+"Data_Clean/demographics.csv",
                        usecols=["FirstName","LastName","Age","Gender",
                                 "DiabetesDuration_MostRecentVisitDate",
                                 "BMI","Hemoglobin_Value","MCV_Value","eGFR_Value",
                                 "Retinopathy_OD","Retinopathy_OS",
                                 "Hypothyroidism_YesNo","Hyperthyroidism_YesNo",	
                                 "GravesDisease_YesNo","CeliacDisease_YesNo",
                                 "AddisonsDisease_YesNo"])
kaan_demo["id"] = pd.Series([n.lower() for n in kaan_demo["FirstName"]])\
    + "_" + pd.Series([n.lower() for n in kaan_demo["LastName"]])
# Combine categorical variables
kaan_demo["Autoimmune Disease?"] = \
    kaan_demo[["Hypothyroidism_YesNo","Hyperthyroidism_YesNo",	
               "GravesDisease_YesNo","CeliacDisease_YesNo",
               "AddisonsDisease_YesNo"]].replace(["No","Yes"],[0,1])\
        .sum(axis=1).replace([0,1,2],["No","Yes","Yes"])
# Retinopathy in either eye?
kaan_demo["Retinopathy_OD"] = kaan_demo["Retinopathy_OD"].str.strip() 
kaan_demo["Retinopathy_OS"] = kaan_demo["Retinopathy_OS"].str.strip() 
ret_values = kaan_demo["Retinopathy_OD"].tolist() + kaan_demo["Retinopathy_OS"].tolist()
ret_values = list(set([v for v in ret_values if str(v) != "nan"]))
ret_values = dict.fromkeys(ret_values,1)
ret_values["No Retinopathy"] = 0
kaan_demo["Retinopathy?"] = kaan_demo[["Retinopathy_OD","Retinopathy_OS"]]\
    .replace(ret_values).sum(axis=1,skipna=False).replace({0:"No",1:"Yes",2:"Yes"})


In [None]:
# JDRF demographics - manually changed column names to match Kaan's data
jdrf_demo = pd.read_csv(wd+"Data_Clean/jdrf_demographics.csv",
                        usecols=["id","FirstName","LastName","MRN","Age","Gender",
                                 "DiabetesDuration_MostRecentVisitDate",
                                 "BMI","Retinopathy_OD","Retinopathy_OS",
                                 "Hypothyroidism_YesNo","Hyperthyroidism_YesNo",	
                                 "GravesDisease_YesNo","CeliacDisease_YesNo",
                                 "AddisonsDisease_YesNo"])
# JDRF labs
jdrf_mcv = pd.read_excel(wd + "Data_Clean/jdrf_labs.xlsx",sheet_name='MCV')
jdrf_hb = pd.read_excel(wd + "Data_Clean/jdrf_labs.xlsx",sheet_name='Hemoglobin')
jdrf_egfr = pd.read_excel(wd + "Data_Clean/jdrf_labs.xlsx",sheet_name='eGFR')
# Get most recent values
jdrf_hb['Time'] = abs(jdrf_hb['ResultDate'] - jdrf_hb['Date of eye exam'])
jdrf_hb = jdrf_hb.groupby('MRN').apply(lambda x: x[x['Time'] == x['Time'].min()])
jdrf_hb = jdrf_hb[['MRN','OrderValue']]
jdrf_hb.columns = ['MRN','Hemoglobin_Value']
jdrf_hb = jdrf_hb.reset_index(drop = True).drop_duplicates()
jdrf_mcv['Time'] = abs(jdrf_mcv['ResultDate'] - jdrf_mcv['Date of eye exam'])
jdrf_mcv = jdrf_mcv.groupby('MRN').apply(lambda x: x[x['Time'] == x['Time'].min()])
jdrf_mcv = jdrf_mcv[['MRN','OrderValue']]
jdrf_mcv.columns = ['MRN','MCV_Value']
jdrf_mcv = jdrf_mcv.reset_index(drop = True).drop_duplicates()
jdrf_egfr['Time'] = abs(jdrf_egfr['ResultDate'] - jdrf_egfr['Date of eye exam'])
jdrf_egfr = jdrf_egfr.groupby('MRN').apply(lambda x: x[x['Time'] == x['Time'].min()])
jdrf_egfr = jdrf_egfr[jdrf_egfr['LabComponent'].isin(['EGFR NON-AFR. AMERICAN','EGFR IF NONAFRICN AM'])]
jdrf_egfr = jdrf_egfr[['MRN','OrderValue']]
jdrf_egfr.columns = ['MRN','eGFR_Value']
jdrf_egfr = jdrf_egfr.reset_index(drop = True).drop_duplicates()
# Merge with demographics
jdrf_demo = pd.merge(jdrf_demo,jdrf_hb,on='MRN',how='outer')
jdrf_demo = pd.merge(jdrf_demo,jdrf_mcv,on='MRN',how='outer')
jdrf_demo = pd.merge(jdrf_demo,jdrf_egfr,on='MRN',how='outer')
# Combine categorical variables
jdrf_demo["Autoimmune Disease?"] = \
    jdrf_demo[["Hypothyroidism_YesNo","Hyperthyroidism_YesNo",	
           "GravesDisease_YesNo","CeliacDisease_YesNo",
           "AddisonsDisease_YesNo"]].replace(["No","Yes"],[0,1])\
               .sum(axis=1).replace([0,1,2],["No","Yes","Yes"])
# Retinopathy in either eye?
jdrf_demo["Retinopathy_OD"] = jdrf_demo["Retinopathy_OD"].str.strip() 
jdrf_demo["Retinopathy_OS"] = jdrf_demo["Retinopathy_OS"].str.strip() 
ret_values = jdrf_demo["Retinopathy_OD"].tolist() + jdrf_demo["Retinopathy_OS"].tolist()
ret_values = list(set([v for v in ret_values if str(v) != "nan"]))
ret_values = dict.fromkeys(ret_values,1)
ret_values["No Retinopathy"] = 0
jdrf_demo["Retinopathy?"] = jdrf_demo[["Retinopathy_OD","Retinopathy_OS"]]\
    .replace(ret_values).sum(axis=1,skipna=False).replace({0:"No",1:"Yes",2:"Yes"})
# Put everything together
demo = pd.concat([jdrf_demo,kaan_demo])
df = pd.merge(df,demo,how='left')
# As numeric
df['Hemoglobin_Value'] = pd.to_numeric(df['Hemoglobin_Value'],errors='coerce')
df['MCV_Value'] = pd.to_numeric(df['MCV_Value'],errors='coerce')
df['eGFR_Value'] = pd.to_numeric(df['eGFR_Value'],errors='coerce')

# Data Characteristics
- TIR was calculated using 2 weeks of CGM data prior to an office visit.
- For participants with multiple HbA1c measures, the CGM wear with the largest number of sensor readings was used. This was done to prevent auto-correlation between measures within a subject.
- CGM with < 70% wear were excluded.

In [None]:
# Split into HbA1c groups
df['a1c_group'] = pd.cut(df['a1c'],[0,7,8,9,10,float('inf')],labels = ["<7%","[7.0%, 8.0%)","[8.0%, 9.0%)","[9.0%, 10.0%)",">10%"],right=False)
# Difference between day and night
df['tir_70_140_diff'] = df['day_tir_70_140'] - df['night_tir_70_140']
df['tir_70_180_diff'] = df['day_tir_70_180'] - df['night_tir_70_180']
df['tir_over_180_diff'] = df['day_tir_over_180'] - df['night_tir_over_180']
df['mbg_diff'] = df['day_mbg'] - df['night_mbg']

In [None]:
# Write data for re-analysis in R
df.to_csv(wd+"Data_Clean/combined_analysis_data.csv",index=False)

# Participant Characteristics

In [None]:
columns = ['a1c','Age','Gender', 'BMI', 'DiabetesDuration_MostRecentVisitDate','Hemoglobin_Value','MCV_Value', 'eGFR_Value', 'Autoimmune Disease?', 'Retinopathy?']
categorical = ['Gender','Autoimmune Disease?', 'Retinopathy?']
group = ['a1c_group']
TableOne(df, columns, categorical,groupby = group)

## Summary Table and Variable Distributions

In [None]:
columns = ['age','a1c','day_mbg','day_tir_70_140','day_tir_70_180','day_tir_over_180',
'night_mbg',"night_tir_70_140","night_tir_70_180","night_tir_over_180"]
group = ['a1c_group']
TableOne(df, columns, groupby = group)

## Mean Day and Night TIR

### By HbA1c Group

#### TIR

In [None]:
# Wide to long
plot_df = df.melt(id_vars=['a1c','a1c_group'],value_vars=['day_tir_70_140',"night_tir_70_140"], value_name='TIR')
plot_df['variable'].replace({'day_tir_70_140':'Day','night_tir_70_140':'Night'},inplace=True)
# Plot
plot = sns.boxplot(x='a1c_group',y='TIR',hue='variable',data=plot_df)
plot.set(xlabel='HbA1c Group',ylabel='% Time in Range (70-140 mg/dL)')
plot.legend(title='');

In [None]:
# Wide to long
plot_df = df.melt(id_vars=['a1c','a1c_group'],value_vars=['day_tir_70_140','day_tir_70_180','day_tir_over_180',
"night_tir_70_140","night_tir_70_180","night_tir_over_180"], value_name='TIR')
plot_df['variable'].replace({'day_tir':'Day','night_tir':'Night'},inplace=True)
# Plot
plot = sns.boxplot(x='a1c_group',y='TIR',hue='variable',data=plot_df)
plot.set(xlabel='HbA1c Group',ylabel='% Time in Range (70-140 mg/dL)')
plot.legend(title='');

In [None]:
# Wide to long
plot_df = df.melt(id_vars=['a1c','a1c_group'],value_vars=['day_tir_70_140','day_tir_70_180','day_tir_over_180',
"night_tir_70_140","night_tir_70_180","night_tir_over_180"], value_name='TIR')
plot_df['variable'].replace({'day_tir':'Day','night_tir':'Night'},inplace=True)
# Plot
plot = sns.boxplot(x='a1c_group',y='TIR',hue='variable',data=plot_df)
plot.set(xlabel='HbA1c Group',ylabel='% Time in Range (70-140 mg/dL)')
plot.legend(title='');

In [None]:
plot = sns.boxplot(x='a1c_group',y='tir_70_140_diff',data=df)
plot.set(xlabel='HbA1c Group',ylabel='Day vs. Night TIR 70 - 140 Difference');

In [None]:
plot = sns.boxplot(x='a1c_group',y='tir_70_180_diff',data=df)
plot.set(xlabel='HbA1c Group',ylabel='Day vs. Night TIR 70 - 180 Difference');

In [None]:
plot = sns.boxplot(x='a1c_group',y='tir_over_180_diff',data=df)
plot.set(xlabel='HbA1c Group',ylabel='Day vs. Night TIR Over 180 Difference');

#### Mean Sensor Glucose

In [None]:
# Wide to long
plot_df = df.melt(id_vars=['a1c','a1c_group'],value_vars=['day_mbg','night_mbg'], value_name='MSG')
plot_df['variable'].replace({'day_mbg':'Day','night_mbg':'Night'},inplace=True)
# Plot
plot = sns.boxplot(x='a1c_group',y='MSG',hue='variable',data=plot_df)
plot.set(xlabel='HbA1c Group',ylabel='Mean Sensor Glucose (mg/dL)')
plot.legend(title='');

In [None]:
plot = sns.boxplot(x='a1c_group',y='mbg_diff',data=df)
plot.set(xlabel='HbA1c Group',ylabel='Day vs. Night SG Difference');

Error bars indicate 95% confidence interval around the mean.

In [None]:
# Tests
lm = smf.ols('day_mbg ~ a1c_group',data=df).fit()
sm.stats.anova_lm(lm, typ=2)


There were significant differences in day time mean sensor glucose between HbA1c group (p < 0.001).

In [None]:
lm = smf.ols('night_mbg ~ a1c_group',data=df).fit()
sm.stats.anova_lm(lm, typ=2)

There were significant differences in night time mean sensor glucose between HbA1c group (p < 0.001).

In [None]:
lm = smf.ols('day_tir_70_140 ~ a1c_group',data=df).fit()
sm.stats.anova_lm(lm, typ=2)

There were significant differences in day time TIR 70 - 140 between HbA1c group (p < 0.001).

In [None]:
lm = smf.ols('day_tir_70_180 ~ a1c_group',data=df).fit()
sm.stats.anova_lm(lm, typ=2)

There were significant differences in day time TIR 70 - 180 between HbA1c group (p < 0.001).

In [None]:
lm = smf.ols('day_tir_over_180 ~ a1c_group',data=df).fit()
sm.stats.anova_lm(lm, typ=2)

There were significant differences in day time TIR over 180 between HbA1c group (p < 0.001).

In [None]:
lm = smf.ols('night_tir_70_140 ~ a1c_group',data=df).fit()
sm.stats.anova_lm(lm, typ=2)

There were significant differences in night time TIR 70 - 140 between HbA1c group (p < 0.001).

In [None]:
lm = smf.ols('night_tir_70_180 ~ a1c_group',data=df).fit()
sm.stats.anova_lm(lm, typ=2)

There were significant differences in night time TIR 70 - 180 between HbA1c group (p < 0.001).

In [None]:
lm = smf.ols('night_tir_over_180 ~ a1c_group',data=df).fit()
sm.stats.anova_lm(lm, typ=2)

There were significant differences in night time TIR over 180 between HbA1c group (p < 0.001).

In [None]:
lm = smf.ols('mbg_diff ~ a1c_group',data=df).fit()
sm.stats.anova_lm(lm)

The difference between day and night MSG was not different between HbA1c groups (p = 0.48).

In [None]:
lm = smf.ols('tir_70_140_diff ~ a1c_group',data=df).fit()
sm.stats.anova_lm(lm)

The difference between day and night TIR 70 - 140 was not different between HbA1c groups (p = 0.96).

In [None]:
lm = smf.ols('tir_70_180_diff ~ a1c_group',data=df).fit()
sm.stats.anova_lm(lm)

The difference between day and night TIR 70 - 180 was not different between HbA1c groups (p = 0.35).

In [None]:
lm = smf.ols('tir_over_180_diff ~ a1c_group',data=df).fit()
sm.stats.anova_lm(lm)

The difference between day and night TIR over 180 was not different between HbA1c groups (p = 0.27).

## Correlations Between All Variables

### Full Cohort

In [None]:
corr_mat = df[['a1c','mbg','total_tir_70_140','total_tir_70_180','total_tir_over_180',
              'day_mbg','day_tir_70_140','day_tir_70_180','day_tir_over_180',
              'night_mbg',"night_tir_70_140","night_tir_70_180","night_tir_over_180"]]
c_full = corr_mat.corr()
sns.heatmap(c_full,annot=True,cmap="coolwarm");

In [None]:
corr_mat.corr()