In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from tableone import TableOne
from statsmodels.api import OLS
from statsmodels.stats import proportion
from patsy import dmatrices
from scipy.stats import ttest_1samp
wd = "/Users/timvigers/Dropbox/Work/Viral Shah/ADA 2022/"

## Data
- Participants with HbA1c >= 6.5 were excluded from Kaan's CGM dataset. The remaining participants were combined with publicly available data on non-diabetics from the JAEB (https://public.jaeb.org/datasets/diabetes).
- For each participant, mean blood glucose was calculated from CGM data, and GMI calculated according to the standard equation (3.31 + 0.02392 * MBG). 
- GMI was compared to lab HbA1c, and a new GMI regression equation was estimated for this population. 

In [None]:
# Import Kaan's data
df = pd.read_csv(wd + "Data_Clean/analysis_data.csv",usecols=["ID","Age","14 Day Mean","HbA1c"])
# Only HbA1c < 6.5
df = df[df["HbA1c"] < 6.5]
df.columns = ["PtID","AgeAsOfEnrollDt","HbA1c","MBG"]
df["MBG"] = [round(m,2) for m in df["MBG"]]
df["Cohort"] = "BDC"
# Add JAEB data
jaeb_cgm = pd.read_csv(wd + "Data_Raw/CGMND/NonDiabDeviceCGM.csv")
jaeb_cgm = jaeb_cgm[jaeb_cgm["RecordType"] == "CGM"]
# Split by ID
dataframes = []
for _, d in jaeb_cgm.groupby(["PtID"]):
    dataframes.append(d)
# Calculate mean glucose for each participant
mean_bgs = {"PtID":[],"MBG":[]}
for d in dataframes:
    mean_bgs["PtID"].append(d["PtID"].iloc[0])
    mean_bgs["MBG"].append(round(d["Value"].mean(),2))
mean_bgs = pd.DataFrame(mean_bgs)
# Exclude patients (per Stephanie DuBose)
exclude = pd.read_csv(wd+"Data_Raw/List of Excluded Pts.csv")
idx = ~mean_bgs['PtID'].isin(exclude['PtID'])
mean_bgs = mean_bgs[idx]
# Add HbA1c
jaeb_a1c = pd.read_csv(wd + "Data_Raw/CGMND/NonDiabSampleResults.csv")
jaeb_a1c = jaeb_a1c[jaeb_a1c["Analyte"] == "HBA1C"]
jaeb_a1c = jaeb_a1c[["PtID","Value"]]
jaeb_a1c.columns = ["PtID","HbA1c"]
jaeb_a1c["PtID"] = [str(p) for p in jaeb_a1c["PtID"]]
mean_bgs["PtID"] = [str(p) for p in mean_bgs["PtID"]]
mean_bgs = mean_bgs.merge(jaeb_a1c,on = "PtID")
# Age
jaeb_age = pd.read_csv(wd + "Data_Raw/CGMND/NonDiabPtRoster.csv",usecols=["PtID","AgeAsOfEnrollDt"])
jaeb_age["PtID"] = [str(p) for p in jaeb_age["PtID"]]
mean_bgs = mean_bgs.merge(jaeb_age,on = "PtID")
mean_bgs = mean_bgs[mean_bgs["HbA1c"] < 6.5]
mean_bgs["Cohort"] = "JAEB"
# Combine
df = pd.concat([df,mean_bgs])
# Calculate GMI
df["GMI"] = [3.31 + 0.02392*m for m in df["MBG"]]
df["Diff"] = df["HbA1c"] - df["GMI"]
df.reset_index(inplace=True,drop=True)
# Write
df.to_csv(wd + 'Data_Clean/combined_data.csv',index=False)
# Split by HbA1c < 5.7 and 5.7 - 6.5
nondiab = df[df['HbA1c'] < 5.7]
prediab = df[(df['HbA1c'] >= 5.7) & (df['HbA1c'] < 6.5)]
# Reindex
nondiab.reset_index(inplace=True,drop=True)
prediab.reset_index(inplace=True,drop=True)

In [None]:
TableOne(df,columns=["HbA1c","AgeAsOfEnrollDt"],groupby=["Cohort"])

# HbA1c < 5.7

## Mean Glucose and HbA1c
Red line indicates GMI regression line.

In [None]:
grid = sns.JointGrid(x=nondiab["MBG"],y=nondiab["HbA1c"])
grid.plot_joint(plt.scatter)
plt.plot([80,175],[5.2236,7.496], linewidth=2, color = "r");

## Differences between HbA1c and GMI

In [None]:
sns.histplot(data=nondiab, x="Diff")
plt.xlabel("HbA1c - GMI");

In [None]:
plt.savefig(wd + "Reports/figure_under_5.7.png")

In [None]:
t = ttest_1samp(nondiab["Diff"],popmean = 0)
t.pvalue
#nondiab["Diff"].mean()

The differences between HbA1c and GMI were largely negative, suggesting that the standard GMI equation overestimates HbA1c in this cohort. On average, GMI was 0.52% higher, and this was significantly different from 0 (p < 0.0001).

## Regression Results

In [None]:
outcome,predictors = dmatrices("HbA1c ~ MBG",data = nondiab)
reg = OLS(outcome,predictors).fit()

In [None]:
reg.summary()

Based on this dataset, the regression equation for GMI would be:

GMI = 3.4551 + 0.0180 * MBG

In [None]:
sns.lmplot(x="MBG",y="HbA1c",data=nondiab,ci=None);

# HbA1c 5.7 - 6.5
## Mean Glucose and HbA1c
Red line indicates GMI regression line.

In [None]:
grid = sns.JointGrid(x=prediab["MBG"],y=prediab["HbA1c"])
grid.plot_joint(plt.scatter)
plt.plot([80,175],[5.2236,7.496], linewidth=2, color = "r");

## Differences between HbA1c and GMI

In [None]:
sns.histplot(data=prediab, x="Diff")
plt.xlabel("HbA1c - GMI");

In [None]:
plt.savefig(wd + "Reports/figure.png")

In [None]:
t = ttest_1samp(prediab["Diff"],popmean = 0)
t.pvalue
#prediab["Diff"].mean()

The differences between HbA1c and GMI were largely negative, suggesting that the standard GMI equation overestimates HbA1c in this cohort. On average, GMI was 0.5% higher, and this was significantly different from 0 (p < 0.0001).

## Regression Results

In [None]:
outcome,predictors = dmatrices("HbA1c ~ MBG",data = prediab)
reg = OLS(outcome,predictors).fit()
reg.summary()

In [None]:
sns.lmplot(x="MBG",y="HbA1c",data=prediab,ci=None);