In [9]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import seaborn as sns
import statsmodels.formula.api as smf
import statsmodels.api as sm
from patsy import dmatrices
from tableone import TableOne
wd = '/Volumes/PEDS/RI Biostatistics Core/Shared/Shared Projects/Laura/BDC/Projects/Viral Shah/Day and Night CGM/'
sns.set_theme(style="whitegrid")
sns.set_palette("coolwarm")

In [10]:
# Import data
df = pd.read_csv(wd+'Data_Clean/analysis_data.csv',usecols=['ID','Age','HbA1c','14 Day TIR','14 Night TIR','14 Day Mean','14 Night Mean'])
df.columns = ['id','age','a1c','day_mbg','day_tir','night_mbg','night_tir']
# Get row with most sensor values for each person in JDRF data
jdrf = pd.read_csv(wd+'Data_Clean/analysis_data_jdrf.csv')
idx = jdrf.groupby('id',sort=False)['sensor_readings'].transform(max) == jdrf['sensor_readings']
jdrf = jdrf[idx]
idx = jdrf["sensor_readings"] >= 2822 # 70% cutoff for 2 weeks of data
jdrf = jdrf[idx]
# Combine
jdrf = jdrf[['id','age','a1c','day_mbg','day_tir','night_mbg','night_tir']]
df = pd.concat([df,jdrf])
df.reset_index(inplace=True,drop=True)

In [11]:
# Demographics
kaan_demo = pd.read_csv(wd+"Data_Clean/demographics.csv",
                        usecols=["FirstName","LastName","Age","Gender",
                                 "DiabetesDuration_MostRecentVisitDate",
                                 "BMI","MostRecentA1C","Hemoglobin_Value",
                                 "MCV_Value","eGFR_Value",
                                 "Retinopathy_OD","Retinopathy_OS",
                                 "Hypothyroidism_YesNo","Hyperthyroidism_YesNo",	
                                 "GravesDisease_YesNo","CeliacDisease_YesNo",
                                 "AddisonsDisease_YesNo"])
kaan_demo["id"] = pd.Series([n.lower() for n in kaan_demo["FirstName"]])\
    + "_" + pd.Series([n.lower() for n in kaan_demo["LastName"]])
# Manually fixed some names:
# tomas_delong
# carrie_kroll
# ryan_bennett2
# joshua_shelton
# cortney _fernandez de castro
# conner_mckern
# josh_kamin
# kaelen_davis
# cathy_krendl
# kelli_kinkaid
# charlie_bevis
# susan michelle_clay
# lynell_rice-brinkworth
# ed_wiley
# Variables:
# age, sex, diabetes duration, BMI, mean A1c, mean HB, mean MCV, mean eGFR, 
# % of patients with retinopathy (any type, categorical variable), 
# % of patients with autoimmune disease (combine hypothyroidism, 
# hyperthyroidism, Graves, Celiac and Addison-If yes, autoimmune disease present)
# Sort out data
# Combine categorical variables
kaan_demo["Autoimmune Disease?"] = \
    kaan_demo[["Hypothyroidism_YesNo","Hyperthyroidism_YesNo",	
               "GravesDisease_YesNo","CeliacDisease_YesNo",
               "AddisonsDisease_YesNo"]].replace(["No","Yes"],[0,1])\
        .sum(axis=1).replace([0,1],["No","Yes"])
# Retinopathy in either eye?
kaan_demo["Retinopathy_OD"] = kaan_demo["Retinopathy_OD"].str.strip() 
kaan_demo["Retinopathy_OS"] = kaan_demo["Retinopathy_OS"].str.strip() 
ret_values = kaan_demo["Retinopathy_OD"].tolist() + kaan_demo["Retinopathy_OS"].tolist()
ret_values = list(set([v for v in ret_values if str(v) != "nan"]))
ret_values = dict.fromkeys(ret_values,1)
ret_values["No Retinopathy"] = 0
kaan_demo["Retinopathy?"] = kaan_demo[["Retinopathy_OD","Retinopathy_OS"]]\
    .replace(ret_values).sum(axis=1,skipna=False).replace({0:"No",1:"Yes",2:"Yes"})


In [12]:
# JDRF demographics - manually changed column names to match Kaan's data
jdrf_demo = pd.read_csv(wd+"Data_Clean/jdrf_demographics.csv",
                        usecols=["FirstName","LastName","MRN","Age","Gender",
                                 "DiabetesDuration_MostRecentVisitDate",
                                 "BMI","Retinopathy_OD","Retinopathy_OS",
                                 "Hypothyroidism_YesNo","Hyperthyroidism_YesNo",	
                                 "GravesDisease_YesNo","CeliacDisease_YesNo",
                                 "AddisonsDisease_YesNo"])
# JDRF labs
jdrf_mcv = pd.read_excel(wd + "Data_Clean/jdrf_labs.xlsx",sheet_name='MCV')
jdrf_hb = pd.read_excel(wd + "Data_Clean/jdrf_labs.xlsx",sheet_name='Hemoglobin')
# Get most recent values
jdrf_hb['Time'] = abs(jdrf_hb['ResultDate'] - jdrf_hb['Date of eye exam'])
jdrf_hb = jdrf_hb.groupby('MRN').apply(lambda x: x[x['Time'] == x['Time'].min()])
jdrf_hb = jdrf_hb[['MRN','OrderValue']]
jdrf_hb.columns = ['MRN','Hemoglobin']
jdrf_hb = jdrf_hb.reset_index(drop = True).drop_duplicates()
jdrf_mcv['Time'] = abs(jdrf_mcv['ResultDate'] - jdrf_mcv['Date of eye exam'])
jdrf_mcv = jdrf_mcv.groupby('MRN').apply(lambda x: x[x['Time'] == x['Time'].min()])
jdrf_mcv = jdrf_mcv[['MRN','OrderValue']]
jdrf_mcv.columns = ['MRN','MCV']
jdrf_mcv = jdrf_mcv.reset_index(drop = True).drop_duplicates()
# Merge with demographics
jdrf_demo = pd.merge(jdrf_demo,jdrf_hb,on='MRN',how='outer')
jdrf_demo = pd.merge(jdrf_demo,jdrf_mcv,on='MRN',how='outer')

# Data Characteristics
- TIR was calculated using 2 weeks of CGM data prior to an office visit.
- For participants with multiple HbA1c measures, the CGM wear with the largest number of sensor readings was used. This was done to prevent auto-correlation between measures within a subject.
- CGM with < 70% wear were excluded.

In [None]:
# Split into HbA1c groups
df['a1c_group'] = pd.cut(df['a1c'],[0,7,8,9,10,float('inf')],labels = ["<7%","[7.0%, 8.0%)","[8.0%, 9.0%)","[9.0%, 10.0%)",">10%"],right=False)
# Difference between day and night
df['tir_diff'] = df['day_tir'] - df['night_tir']
df['mbg_diff'] = df['day_mbg'] - df['night_mbg']

## Summary Table and Variable Distributions

In [None]:
columns = ['age','a1c','day_mbg','day_tir','night_mbg','night_tir','a1c_group','tir_diff','mbg_diff']
group = ['a1c_group']
TableOne(df, columns, groupby = group)

## Mean Day and Night TIR

### By HbA1c Group

#### TIR

In [None]:
# Wide to long
plot_df = df.melt(id_vars=['a1c','a1c_group'],value_vars=['day_tir','night_tir'], value_name='TIR')
plot_df['variable'].replace({'day_tir':'Day','night_tir':'Night'},inplace=True)
# Plot
plot = sns.barplot(x='a1c_group',y='TIR',hue='variable',data=plot_df,capsize=.1,errwidth=1)
plot.set(xlabel='HbA1c Group',ylabel='% Time in Range (70-140 mg/dL)')
plot.legend(title='');

In [None]:
plot = sns.barplot(x='a1c_group',y='tir_diff',data=df,capsize=.1,errwidth=1)
plot.set(xlabel='HbA1c Group',ylabel='Day vs. Night TIR Difference');

#### Mean Sensor Glucose

In [None]:
# Wide to long
plot_df = df.melt(id_vars=['a1c','a1c_group'],value_vars=['day_mbg','night_mbg'], value_name='MSG')
plot_df['variable'].replace({'day_mbg':'Day','night_mbg':'Night'},inplace=True)
# Plot
plot = sns.barplot(x='a1c_group',y='MSG',hue='variable',data=plot_df,capsize=.1,errwidth=1)
plot.set(xlabel='HbA1c Group',ylabel='Mean Sensor Glucose (mg/dL)')
plot.legend(title='');

In [None]:
plot = sns.barplot(x='a1c_group',y='mbg_diff',data=df,capsize=.1,errwidth=1)
plot.set(xlabel='HbA1c Group',ylabel='Day vs. Night SG Difference');

Error bars indicate 95% confidence interval around the mean.

In [None]:
# Tests
lm = smf.ols('day_mbg ~ a1c_group',data=df).fit()
sm.stats.anova_lm(lm, typ=2)


In [None]:
lm = smf.ols('night_mbg ~ a1c_group',data=df).fit()
sm.stats.anova_lm(lm, typ=2)

In [None]:
lm = smf.ols('tir_diff ~ a1c_group',data=df).fit()
sm.stats.anova_lm(lm)

In [None]:
lm = smf.ols('mbg_diff ~ a1c_group',data=df).fit()
sm.stats.anova_lm(lm)

There were significant differences in day TIR, night TIR, day MSG, and night MSG by HbA1c group (p < 0.001 for all). However, the differences between day and night TIR and day and night MSG were not different between HbA1c groups (p = 0.08 and p = 0.42, respectively).

## Correlations Between All Variables

### Full Cohort

In [None]:
corr_mat = df[['a1c','day_mbg','day_tir','night_mbg','night_tir','a1c_group','tir_diff','mbg_diff']]
corr_mat.columns = ['HbA1c','Day MSG','Day TIR','Night MSG','Night TIR','HbA1c Group','Day vs. Night TIR Difference','Day vs. Night SG Difference']
c_full = corr_mat.corr()
sns.heatmap(c_full,annot=True,cmap="coolwarm");

In [None]:
corr_mat.corr()

### By HbA1c Group

In [None]:
g = sns.FacetGrid(corr_mat, col='HbA1c Group')
g.fig.set_figwidth(15)
g.fig.set_figheight(10)
g.map_dataframe(lambda data, color: sns.heatmap(data.corr(),cmap="coolwarm"));

In [None]:
corr_mat.groupby('HbA1c Group').corr()