In [11]:
# Turn off annoying ipykernel warnings
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [10]:
import pandas as pd
import matplotlib.pyplot as plt
from tableone import TableOne
from sklearn import linear_model
from patsy import dmatrices
import statsmodels.api as sm
import statsmodels.formula.api as smf

# New Onset Dosing in T1D

In [12]:
# Data import and cleaning
wd = '/Volumes/PEDS/RI Biostatistics Core/Shared/Shared Projects/Laura/BDC/Projects/Kimber Simmons/New Onset Dosing/Data_Cleaned/'
df = pd.read_csv(wd+'new_onset_doses.csv',na_values='.',skipinitialspace = True)
df.set_index(['MRN'],inplace=True)
# Remove T2D
df = df[df['Diabetes Type']=="T1D"]
# Format columns
# Convert all ages to months
df['Age'] = [int(a.split()[0])*12 if 'year' in a else int(a.split()[0]) for a in df['Age']]
# Replace detection limit of bicarb with half the value
df['bicarb'] = [2.5 if '<' in str(b) and '5' in str(b) else 1.5 if '<' in str(b) and '3' in str(b) else float(b) for b in df['bicarb']]
# Convert to numeric
df[['week1_carbbreakfast','week1_carblunch','week1_carbdinner']].astype(float,errors='ignore')
df['week1_carbbreakfast']=pd.to_numeric(df['week1_carbbreakfast'],errors='coerce')
df['week1_carblunch']=pd.to_numeric(df['week1_carblunch'],errors='coerce')
df['week1_carbdinner']=pd.to_numeric(df['week1_carbdinner'],errors='coerce')
# Combine levels of race
race = []
for r in df['Race']:
    if '\n' in str(r) or 'more than one' in str(r).lower():
        race.append('Multiracial')
    elif 'white' in str(r).lower():
        race.append('White')
    elif 'asian' in str(r).lower():
        race.append('Asian')
    elif 'black' in str(r).lower():
        race.append('Black/African American')
    elif 'native' in str(r).lower():
        race.append('Native American/Pacific Islander') 
    else: 
        race.append('Unknown/Not Reported')
for e in range(0,df.shape[0]):
    if df['Ethnicity'].reset_index(drop=True).iloc[e] == 'Hispanic or Latino [1]' and race[e] != 'Multiracial':
        race[e] = 'Hispanic'
df['Race'] = race
# Public vs. private insurance
# Public – Medicaid, TriCare, CHP, Indian Health Service, Denver Health (Medicaid)
# Private – Aetna, Anthem, Cigna, UHC, Kaiser, Cofinity, Christian Care
# Self-pay
public = ['medicaid','tricare','chp','indian','denver health']
private = ['aetna','anthem','cigna','uhc','kaiser','cofinity',
           'christian','commercial','friday','samaritan','rocky mountain','liberty','assurant']
insurance = []
for i in df['Insurance']:
    ins = str(i).lower()
    if 'Private' in ['Private' for p in private if p in ins]:
        insurance.append('Private')
    elif 'Public' in ['Public' for p in public if p in ins]: 
        insurance.append('Public')
    elif 'self' in ins:
        insurance.append('Self-pay')
    else: 
        insurance.append('Unknown/Not Reported')
df['Insurance'] = insurance

## Table 1: Participant Characteristics at Visit 1

In [13]:
# Make table 1
cols = ['Age','Sex','puberty_yn','Race','Insurance',
       'Initial_A1c','Hospitalization','DKA','pH','bicarb','Diabetes Type','bmi_onset','Wt_onset']
# Print
t1 = TableOne(df,columns=cols,display_all=True)
t1

Unnamed: 0,Unnamed: 1,Missing,Overall
n,,,336
"Age, mean (SD)",,0.0,117.4 (52.7)
"Sex, n (%)",Female,0.0,157 (46.7)
"Sex, n (%)",Male,,179 (53.3)
"puberty_yn, n (%)",no,0.0,182 (54.2)
"puberty_yn, n (%)",yes,,154 (45.8)
"Race, n (%)",Asian,0.0,4 (1.2)
"Race, n (%)",Black/African American,,12 (3.6)
"Race, n (%)",Hispanic,,45 (13.4)
"Race, n (%)",Multiracial,,17 (5.1)


# Methods
All analyses were performed using Python version 3.9.5. Model selection was based on 10-fold cross validation with elastic net as implemented in the scikit-learn package, version 0.24.2. Model selection was performed on variables: Age, Sex, puberty_yn, Race, Ethnicity, Insurance, Initial_A1c, Hospitalization, DKA, pH, bicarb, bmi_onset, Wt_onset, week1_carbbreakfast, week1_carblunch, week1_carbdinner, cgm_started, wt_1mo, and BMI_1mo with TDD_kg as the outcome. 

The elasticnet model selected the Age, week1_carbbreakfast, week1_carblunch, week1_carbdinner, and wt_1mo variables. Next, a linear mixed model with random effect for provider at onset was re-fit using the statsmodels library version 0.12.2.

In [14]:
# Get model matrices
y,X = dmatrices('TDD_kg ~ Age+Sex+puberty_yn+Race+Ethnicity+Insurance+Initial_A1c+Hospitalization+DKA+pH+bicarb+bmi_onset+Wt_onset+week1_carbbreakfast+week1_carblunch+week1_carbdinner+cgm_started+wt_1mo+BMI_1mo',data = df)
# EN cross validation
regr = linear_model.ElasticNetCV(cv=10, random_state=1017)
cv = regr.fit(X, y.ravel())
# Print terms where coefficient is not 0
#[X.design_info.column_names[i] for i in range(0,cv.coef_.shape[0]) if cv.coef_[i] != 0]

In [16]:
with warnings.catch_warnings():
    warnings.simplefilter('ignore')
    # Fit mixed model with random effect for provider
    md = smf.mixedlm("TDD_kg ~ Age+week1_carbbreakfast+week1_carblunch+week1_carbdinner+wt_1mo", data=df, groups=df["Provider_onset"],missing='drop')
    mdf = md.fit()
    print(mdf.summary())

             Mixed Linear Model Regression Results
Model:                MixedLM    Dependent Variable:    TDD_kg 
No. Observations:     249        Method:                REML   
No. Groups:           21         Scale:                 0.0402 
Min. group size:      1          Log-Likelihood:        13.6299
Max. group size:      34         Converged:             Yes    
Mean group size:      11.9                                     
---------------------------------------------------------------
                    Coef.  Std.Err.    z    P>|z| [0.025 0.975]
---------------------------------------------------------------
Intercept            1.905    0.059  32.339 0.000  1.789  2.020
Age                 -0.001    0.000  -2.715 0.007 -0.002 -0.000
week1_carbbreakfast -0.017    0.003  -6.763 0.000 -0.022 -0.012
week1_carblunch     -0.001    0.003  -0.289 0.773 -0.007  0.005
week1_carbdinner    -0.004    0.003  -1.248 0.212 -0.010  0.002
wt_1mo              -0.011    0.001 -11.066 0.000 -0.