# Analysis of the effect of Metformin on COVID-19 mortality

### Load and clean data.table

In [28]:
options(warn=-1)

homedir <- "C:/Users/breng/Dropbox/COVID19 metformin"
fname <- "./data/2020.5.13 English DM COVID19 Spreadsheet.csv"
funcsfile <- "COVID_functions.r"

setwd(homedir)
source(funcsfile)

suppressMessages({
    library(Matching)
    library(rbounds)
    library(data.table)
    library(ggplot2)
    library(ggpubr)
    library(dplyr)
    library(gridExtra)
    library(utils)
})

### Load and clean dataset

In [29]:
dt <- fread(fname)[,c(1:12,14,16,18:32,13,15,17,33:52)]
colnames(dt)[3] <- "ID"
names(dt) <- gsub(" ", "_", tolower(names(dt)))
#### convert outcome to a numeric column
dt[,outcome:=ifelse(outcome=="D",0,1)]
#### convert columns into numeric column
dt[,c("weight","o2_saturation","hba1c") := .(as.numeric(weight),
                                             as.numeric(gsub("%","",o2_saturation)),
                                             as.numeric(gsub("%","",hba1c)))]
#### Convert columns into binary classifiers
dt[,c("secretagogues_b", "secretagogues_a", "glycosidase_inhibitors_b",
      "glycosidase_inhibitors_a", "dpp4_inhibitor_b", "dpp4_inhibitor_a",
      "tzd_b", "tzd_a", "meds_acei_arb", "statins", "life_style_modification",
     "cad_meds", "procalcitonin") := .(ifelse(secretagogues_b!='',"Y","N"),
                                                         ifelse(secretagogues_a!='',"Y","N"),
                                                         ifelse(glycosidase_inhibitors_b!='',"Y","N"),
                                                         ifelse(glycosidase_inhibitors_a!='',"Y","N"),
                                                         ifelse(dpp4_inhibitor_b!='',"Y","N"),
                                                         ifelse(dpp4_inhibitor_a!='',"Y","N"),
                                                         ifelse(tzd_b!='',"Y","N"),
                                                         ifelse(tzd_a!='',"Y","N"),
                                                         ifelse(meds_acei_arb!='',"Y","N"),
                                                         ifelse(statins!='',"Y","N"),
                                                         ifelse(life_style_modification!='',"Y","N"),
                                                         ifelse(cad_meds!='',"Y","N"),
                                                         ifelse(procalcitonin!='',"Y","N"))]
dt[,c("smoking_history") := .(ifelse(grepl("[0-9]",smoking_history) | smoking_history=="Y","Y","N"))]
dt[,c("hypertension") := .(ifelse(grepl("[0-9]",hypertension) | hypertension=="Y","Y","N"))]
dt[,c("cad_years") := .(ifelse(grepl("[0-9]",cad_years) | cad_years=="Y","Y","N"))]
#### clean missing values based on the information provided by the individuals who procured the dataset.
dt[dt==''|dt==' ']<-"N"
dt[dt=='N/A']<-NA
#### remove these columns that are empty
remove <- c("glp_1_a", "glp_1_b", 'osa', 'sglt_2_inhibitor_a', 'sglt_2_inhibitor_b') 
dt <- dt[,! ..remove]
#colnames(dt)
#str(dt)

## Section I: Explore data attributes 

### Obtain frequency counts in binomial categorical columns

In [3]:
bincols <- c("secretagogues_b", "secretagogues_a", "glycosidase_inhibitors_b",
             "glycosidase_inhibitors_a", "dpp4_inhibitor_b", "dpp4_inhibitor_a", "metformin_b", "metformin_a",
             "tzd_b", "tzd_a", "meds_acei_arb", "smoking_history", "hypertension", "cad_years", "statins", "steroid_use", 
             "life_style_modification", "cad_meds")
t(sapply(X = dt[, ..bincols], FUN = table)) 

Unnamed: 0,N,Y
secretagogues_b,109,22
secretagogues_a,100,31
glycosidase_inhibitors_b,92,39
glycosidase_inhibitors_a,74,57
dpp4_inhibitor_b,123,8
dpp4_inhibitor_a,120,11
metformin_b,94,37
metformin_a,94,37
tzd_b,129,2
tzd_a,128,3


In [4]:
contcols <- c("outcome", "new_number", "id", "mrn")
f <- function(b) head(freqsdt("dt",b), 2)
lapply(contcols,f)

outcome,frequency,percent
<dbl>,<int>,<dbl>
1,108,82.44275
0,23,17.55725

new_number,frequency,percent
<int>,<int>,<dbl>
1,1,0.7633588
2,1,0.7633588

id,frequency,percent
<int>,<int>,<dbl>
729524,1,0.7633588
884781,1,0.7633588

mrn,frequency,percent
<int>,<int>,<dbl>
363855,1,0.7633588
364834,1,0.7633588


### Assess distribution of numeric columns

In [5]:
cols <- c(6, 9:12,17, 19:24, 26:27)
summary(dt[,cols, with = FALSE])

      age        lenth_of_hospital_stay     weight           height     
 Min.   :34.00   Min.   : 1.00          Min.   : 40.00   Min.   :152.0  
 1st Qu.:60.00   1st Qu.: 6.50          1st Qu.: 60.00   1st Qu.:160.0  
 Median :66.00   Median :13.00          Median : 67.00   Median :165.5  
 Mean   :66.85   Mean   :14.93          Mean   : 66.03   Mean   :165.2  
 3rd Qu.:76.00   3rd Qu.:21.50          3rd Qu.: 71.25   3rd Qu.:170.0  
 Max.   :92.00   Max.   :67.00          Max.   :120.00   Max.   :180.0  
                                        NA's   :27       NA's   :29     
      bmi        o2_saturation         chol             tg      
 Min.   :15.63   Min.   : 45.00   Min.   :1.710   Min.   :0.47  
 1st Qu.:22.30   1st Qu.: 94.00   1st Qu.:3.160   1st Qu.:0.83  
 Median :24.28   Median : 96.00   Median :3.700   Median :1.09  
 Mean   :24.23   Mean   : 93.88   Mean   :3.839   Mean   :1.32  
 3rd Qu.:25.84   3rd Qu.: 98.00   3rd Qu.:4.405   3rd Qu.:1.52  
 Max.   :38.30   Max.   :1

#### Conclusions from these analysis demonstrate that there is a limited sample size for the number of variables being assesses. Therefore, this dataset cannot be partitioned into tresting and training datasets, and thus, model validation cannot be conducted to rule out spurious correlations or perform predicive modeling. We also conclude that there was no discontinuation or addition of metformin therapy to any particular patients upon hospital admission. 

# Section II: Explore statistical relationships between variables
## Analyze the significance of all univariant models on outcome (survival)

In [22]:
str(dt[,c(5:6,9:17, 19:24, 26:27, 29:45,47)])

Classes 'data.table' and 'data.frame':	131 obs. of  37 variables:
 $ sex                     : chr  "M" "M" "M" "M" ...
 $ age                     : int  85 63 75 57 85 68 77 55 57 78 ...
 $ lenth_of_hospital_stay  : int  1 13 1 2 4 3 29 13 48 3 ...
 $ weight                  : num  60 60 NA NA NA 68 65 70 75 NA ...
 $ height                  : int  165 166 NA NA NA 160 170 160 166 NA ...
 $ bmi                     : num  22 21.8 NA NA NA 26.6 22.5 27.3 27.2 NA ...
 $ cad_meds                : chr  "Y" "N" "Y" "N" ...
 $ meds_acei_arb           : chr  "Y" "N" "Y" "N" ...
 $ statins                 : chr  "N" "N" "N" "N" ...
 $ smoking_history         : chr  "N" "N" "N" "N" ...
 $ o2_saturation           : num  NA 98 45 NA NA NA NA NA NA NA ...
 $ chol                    : num  NA NA NA 6.42 2.88 3.58 2.14 4.27 NA NA ...
 $ tg                      : num  NA NA NA 1.11 0.73 1.51 0.77 2.84 NA NA ...
 $ hdl_c                   : num  NA NA NA 0.64 0.86 1.11 0.36 0.99 NA NA ...
 $ ldl_c    

In [23]:
dt_sub <- dt[,c(5:6,9:17, 19:24, 26:27, 29:45,47)]
k <- 1:length(colnames(dt_sub))-1
univariantglmR(dt_sub, key = k, significant = "F")
univariantglmR(dt_sub, key = k, significant = "T")



Estimate,Std. Error,z value,Pr(>|z|),names,formula,Significance
<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
0.21148161,0.4605546,0.459189,0.6460985,sexM,outcome ~ sex,Not-Significant
-0.04506966,0.02145035,-2.10111495,0.03563088,age,outcome ~ age,Significant
0.19766489,0.04986864,3.96371102,7.379357e-05,lenth_of_hospital_stay,outcome ~ lenth_of_hospital_stay,Significant
-0.03439789,0.02569332,-1.33878733,0.1806399,weight,outcome ~ weight,Not-Significant
0.01038553,0.03992568,0.26012164,0.7947699,height,outcome ~ height,Not-Significant
-0.14006494,0.07856238,-1.78285001,0.07461072,bmi,outcome ~ bmi,Not-Significant
-0.50274352,0.5379195,-0.93460738,0.3499907,cad_medsY,outcome ~ cad_meds,Not-Significant
-0.41078427,0.462779,-0.88764679,0.3747308,meds_acei_arbY,outcome ~ meds_acei_arb,Not-Significant
-0.18232156,0.690712,-0.26396175,0.7918094,statinsY,outcome ~ statins,Not-Significant
16.08644219,1495.296,0.01075803,0.9914165,smoking_historyY,outcome ~ smoking_history,Not-Significant




Estimate,Std. Error,z value,Pr(>|z|),names,formula,Significance
<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
-0.04506966,0.021450352,-2.101115,0.03563088,age,outcome ~ age,Significant
0.19766489,0.049868642,3.963711,7.379357e-05,lenth_of_hospital_stay,outcome ~ lenth_of_hospital_stay,Significant
0.33304203,0.096715478,3.443524,0.0005741864,o2_saturation,outcome ~ o2_saturation,Significant
-0.15147311,0.055096292,-2.749243,0.005973304,glucose,outcome ~ glucose,Significant
-0.01255921,0.003747838,-3.351055,0.0008050433,crp,outcome ~ crp,Significant
-0.11199732,0.034837119,-3.214885,0.00130497,d_dimmer,outcome ~ d_dimmer,Significant
1.61626388,0.768024142,2.104444,0.03533973,metformin_b,outcome ~ metformin_b,Significant
1.20682587,0.54096528,2.230875,0.02568942,glycosidase_inhibitors_aY,outcome ~ glycosidase_inhibitors_a,Significant
-2.94694211,0.656922711,-4.48598,7.257957e-06,steroid_useY,outcome ~ steroid_use,Significant


#### These data indicate that age, length of hospital stay, O2 saturation, glucose, crp, d_dimer, metformin, glycosidase inhibitors, and steroid use were all predictors of death or survival outcome in diabetic patients admitted to the hospital with COVID-19.

## Identify any variables that might confound metformin administration as a predictor of outcome.

### 1) Use Metformin as a dependent variable and run univariant logistic regressions to identify potential confounding variables of metformin therapy on reducing mortality.

In [7]:
dt_sub1 <- dt[,c(5:6,9:17, 19:24, 26:27, 29:45)][,outcome := metformin_b][,metformin_b := NULL]
dt_sub1[,outcome:=ifelse(outcome=="Y",1,0)] # transform metformin_b into a binary numeric column
k <- 1:length(colnames(dt_sub1))-1
univariantglmR(dt_sub1, key = k, significant = "F")
univariantglmR(dt_sub1, key = k, significant = "T")



Estimate,Std. Error,z value,Pr(>|z|),names,formula,Significance
<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
0.1694182,0.3939052,0.43009882,0.66712376,sexM,outcome ~ sex,Not-Significant
-0.02402227,0.01707019,-1.40726467,0.15934892,age,outcome ~ age,Not-Significant
0.01926493,0.01686854,1.14206246,0.25342806,lenth_of_hospital_stay,outcome ~ lenth_of_hospital_stay,Not-Significant
4.779949e-05,0.02125745,0.0022486,0.99820588,weight,outcome ~ weight,Not-Significant
-0.01904235,0.03194909,-0.59602172,0.55116071,height,outcome ~ height,Not-Significant
0.007461977,0.06446596,0.11575065,0.90785016,bmi,outcome ~ bmi,Not-Significant
-0.01492565,0.4948773,-0.0301603,0.97593921,cad_medsY,outcome ~ cad_meds,Not-Significant
0.1819006,0.3884768,0.46824057,0.63961256,meds_acei_arbY,outcome ~ meds_acei_arb,Not-Significant
-0.08926787,0.6190797,-0.14419446,0.88534692,statinsY,outcome ~ statins,Not-Significant
0.01699758,0.86012,0.01976187,0.98423334,smoking_historyY,outcome ~ smoking_history,Not-Significant




Estimate,Std. Error,z value,Pr(>|z|),names,formula,Significance
<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
0.4867592,0.1793752,2.713637,0.0066549,hba1c,outcome ~ hba1c,Significant
-0.9877497,0.4505566,-2.192288,0.02835875,steroid_useY,outcome ~ steroid_use,Significant


### 2) Conduct multivariant analyses predicting survival or death outcome (dependent variable) using metformin in combination with a second independent variable

In [8]:
check_column <- 25
combs <- data.table(t(combn(x = (ncol(dt_sub)-1), m=2, simplify = TRUE)))[V1 == check_column | V2 == check_column,]
k <- paste(combs$V1, combs$V2, sep = "_");#k
glmcompileR(DT = dt_sub, key = k, significant = "F")[!names == 'metformin_bY',]
glmcompileR(DT = dt_sub, key = k, significant = "T")#[!names == 'metformin_bY',]
glmcompileR(DT = dt_sub, key = k, significant = "T")[!names == 'metformin_bY',]



Estimate,Std. Error,z value,Pr(>|z|),names,formula,Significance
<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
-0.04073725,0.02185636,-1.863862691,0.06234099,age,outcome ~ age + metformin_b,Not-Significant
0.19060181,0.05004187,3.808846968,0.0001396163,lenth_of_hospital_stay,outcome ~ lenth_of_hospital_stay + metformin_b,Significant
-0.04146158,0.02749836,-1.507783393,0.13161,weight,outcome ~ weight + metformin_b,Not-Significant
0.01429862,0.04006634,0.356873524,0.7211865,height,outcome ~ height + metformin_b,Not-Significant
-0.15162613,0.0805883,-1.881490748,0.05990519,bmi,outcome ~ bmi + metformin_b,Not-Significant
-0.52303119,0.5523401,-0.946936884,0.3436709,cad_medsY,outcome ~ cad_meds + metformin_b,Not-Significant
-0.47407354,0.4732287,-1.001785316,0.3164473,meds_acei_arbY,outcome ~ meds_acei_arb + metformin_b,Not-Significant
-0.16843007,0.7059575,-0.238583866,0.8114283,statinsY,outcome ~ statins + metformin_b,Not-Significant
16.06325069,1446.083,0.011108111,0.9911372,smoking_historyY,outcome ~ smoking_history + metformin_b,Not-Significant
0.33075579,0.09675719,3.418410617,0.00062988,o2_saturation,outcome ~ o2_saturation + metformin_b,Significant




Estimate,Std. Error,z value,Pr(>|z|),names,formula,Significance
<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
1.5182811,0.774056619,1.96146,0.04982539,metformin_bY,outcome ~ age + metformin_b,Significant
0.19060181,0.050041867,3.808847,0.0001396163,lenth_of_hospital_stay,outcome ~ lenth_of_hospital_stay + metformin_b,Significant
1.62378914,0.769567823,2.110001,0.03485824,metformin_bY,outcome ~ cad_meds + metformin_b,Significant
1.64712315,0.770384101,2.138054,0.03251233,metformin_bY,outcome ~ meds_acei_arb + metformin_b,Significant
1.61521744,0.768103228,2.102865,0.03547757,metformin_bY,outcome ~ statins + metformin_b,Significant
1.62837511,0.769823323,2.115258,0.03440795,metformin_bY,outcome ~ smoking_history + metformin_b,Significant
0.33075579,0.096757186,3.418411,0.00062988,o2_saturation,outcome ~ o2_saturation + metformin_b,Significant
-0.17569175,0.061258015,-2.868061,0.004129953,glucose,outcome ~ glucose + metformin_b,Significant
2.27839065,1.150973334,1.979534,0.04775594,metformin_bY,outcome ~ glucose + metformin_b,Significant
-0.01554955,0.004247696,-3.660703,0.0002515239,crp,outcome ~ crp + metformin_b,Significant




Estimate,Std. Error,z value,Pr(>|z|),names,formula,Significance
<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
0.19060181,0.050041867,3.808847,0.0001396163,lenth_of_hospital_stay,outcome ~ lenth_of_hospital_stay + metformin_b,Significant
0.33075579,0.096757186,3.418411,0.00062988,o2_saturation,outcome ~ o2_saturation + metformin_b,Significant
-0.17569175,0.061258015,-2.868061,0.004129953,glucose,outcome ~ glucose + metformin_b,Significant
-0.01554955,0.004247696,-3.660703,0.0002515239,crp,outcome ~ crp + metformin_b,Significant
-0.11366028,0.036425623,-3.120339,0.001806432,d_dimmer,outcome ~ d_dimmer + metformin_b,Significant
1.10753133,0.549060256,2.01714,0.04368092,glycosidase_inhibitors_aY,outcome ~ metformin_b + glycosidase_inhibitors_a,Significant
-2.83176935,0.66196393,-4.27783,1.887238e-05,steroid_useY,outcome ~ metformin_b + steroid_use,Significant


#### In summary, these analyses suggest that hba1c, steroid_use, length of hospital stay, O2 saturation, glucose, crp, d_dimmer, and glycosidase inhibitors may confound metformin therapy on predicting survival outcome. 

#### Of these variables, O2 saturation, glucose and crp are physiological measurements that metformin may influence. For example, metformin lowers blood glucose and crp. Also, according to our hypothesis, metformin administration may increase O2 saturation. This hypothesis is supported by the observation that metformin remains a significant independent variable that predicts survival in combination with glucose or crp. 

#### To verify if this is the case, we will analyze if metformin therapy can predict other medications.

In [None]:
dt_sub2 <- dt

In [None]:
#### change Y/N binary columns into 1,0 numeric columns. 
dt_sub2 <- dt_sub2[,c("secretagogues_b", "secretagogues_a", "glycosidase_inhibitors_b",
     "glycosidase_inhibitors_a", "dpp4_inhibitor_b", "dpp4_inhibitor_a",
     "tzd_b", "tzd_a", "meds_acei_arb", "statins", "life_style_modification",
     "cad_meds", "smoking_history", "hypertension", "cad_years","procalcitonin", 
     "hyperlipidemia", "insulin_b", "insulin_a", "steroid_use") := .(ifelse(dt_sub2$secretagogues_b=='Y',1,0),
                                                         ifelse(dt_sub2$secretagogues_a=='Y',1,0),
                                                         ifelse(dt_sub2$glycosidase_inhibitors_b=='Y',1,0),
                                                         ifelse(dt_sub2$glycosidase_inhibitors_a=='Y',1,0),
                                                         ifelse(dt_sub2$dpp4_inhibitor_b=='Y',1,0),
                                                         ifelse(dt_sub2$dpp4_inhibitor_a=='Y',1,0),
                                                         ifelse(dt_sub2$tzd_b=='Y',1,0),
                                                         ifelse(dt_sub2$tzd_a=='Y',1,0),
                                                         ifelse(dt_sub2$meds_acei_arb=='Y',1,0),
                                                         ifelse(dt_sub2$statins=='Y',1,0),
                                                         ifelse(dt_sub2$life_style_modification=='Y',1,0),
                                                         ifelse(dt_sub2$cad_meds=='Y',1,0),
                                                         ifelse(dt_sub2$smoking_history=='Y',1,0),
                                                         ifelse(dt_sub2$hypertension=='Y',1,0),
                                                         ifelse(dt_sub2$cad_years=='Y',1,0),
                                                    ifelse(dt_sub2$procalcitonin=='Y',1,0),
                                                    ifelse(dt_sub2$hyperlipidemia=='Y',1,0),
                                                    ifelse(dt_sub2$insulin_b=='Y',1,0),
                                                    ifelse(dt_sub2$insulin_a=='Y',1,0),
                                                    ifelse(dt_sub2$steroid_use=='Y',1,0))][,c(13:16, 29:44)][,c(1:9, 11:15, 17:20, 10, 16)]


In [8]:
#dt_sub2 <- dt_sub2[,c(13:16, 29:44)][,c(1:9, 11:15, 17:20, 10, 16)]
k <- 1:(length(colnames(dt_sub2))-2)
k
dependentglmR(DT = dt_sub2, key = k, independent_col = 19, significant = "F")



Estimate,Std. Error,z value,Pr(>|z|),names,formula,Significance
<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
0.18190059,0.3884769,0.4682404,0.63961269,metformin_bY,meds_acei_arb ~ metformin_b,Not-Significant
-0.08926786,0.6190628,-0.1441984,0.88534381,metformin_bY,statins ~ metformin_b,Not-Significant
0.01699758,0.8600968,0.0197624,0.98423291,metformin_bY,smoking_history ~ metformin_b,Not-Significant
-0.0822381,0.4920928,-0.1671191,0.86727635,metformin_bY,cad_years ~ metformin_b,Not-Significant
0.19633229,0.3980268,0.493264,0.62182607,metformin_bY,hypertension ~ metformin_b,Not-Significant
-0.41892426,0.6825314,-0.6137802,0.53936058,metformin_bY,hyperlipidemia ~ metformin_b,Not-Significant
-0.99884776,0.5311454,-1.8805544,0.06003256,metformin_bY,life_style_modification ~ metformin_b,Not-Significant
0.37561214,0.467585,0.8033025,0.42179993,metformin_bY,insulin_b ~ metformin_b,Not-Significant
0.69451986,0.4858949,1.4293624,0.15290011,metformin_bY,secretagogues_b ~ metformin_b,Not-Significant
0.94908055,1.4277678,0.6647303,0.50622299,metformin_bY,tzd_b ~ metformin_b,Not-Significant


#### In the code above, the algorithm did not converge on 16 of the 18 logistic regression models. 

### 3) detect any differences in any data attributes between taking metformin and those who are not.  

In [18]:
met <- dt[metformin_b == "Y",]
nomet <- dt[metformin_b == "N",]

In [14]:
# continuous variables: use mann-whitney U non-parametric test
metcon <- met[,c(9:12, 17, 19:24, 26:27)][,c("lenth_of_hospital_stay", "height") := .(as.numeric(lenth_of_hospital_stay), as.numeric(height))]
nometcon <- nomet[,c(9:12, 17, 19:24, 26:27)][,c("lenth_of_hospital_stay", "height") := .(as.numeric(lenth_of_hospital_stay), as.numeric(height))]
keys <- intersect(colnames(metcon), colnames(nometcon))
continuous_compare(DT1 = met, DT2 = nomet, compair = "metformin-no_metformin", key = keys)

comparison,parameter,Mann_Whitney_U_p_value,significant
<chr>,<chr>,<dbl>,<chr>
metformin-no_metformin,lenth_of_hospital_stay,0.086480349,False
metformin-no_metformin,weight,0.389536242,False
metformin-no_metformin,height,0.545738247,False
metformin-no_metformin,bmi,0.956579031,False
metformin-no_metformin,o2_saturation,0.489625398,False
metformin-no_metformin,chol,0.154980981,False
metformin-no_metformin,tg,0.482703885,False
metformin-no_metformin,hdl_c,0.177163972,False
metformin-no_metformin,ldl_c,0.494004269,False
metformin-no_metformin,glucose,0.828731844,False


In [30]:
# discontinuous variables: use chai squared test
metdis <- met[,c(5, 13:16, 25, 29:45)]
nometdis <- nomet[,c(5, 13:16, 25, 29:45)]
keys <- intersect(colnames(metdis), colnames(nometdis));#keys
categorical_compare(DT = dt, compair = "metformin-no_metformin", compair_column = 'metformin_b', key = keys)

comparison,parameter,Chai_squared_p_value,significant
<chr>,<chr>,<dbl>,<chr>
metformin-no_metformin,sex,0.8145363,False
metformin-no_metformin,cad_meds,1.0,False
metformin-no_metformin,meds_acei_arb,0.7838725,False
metformin-no_metformin,statins,1.0,False
metformin-no_metformin,smoking_history,1.0,False
metformin-no_metformin,procalcitonin,0.5353149,False
metformin-no_metformin,cad_years,1.0,False
metformin-no_metformin,hypertension,0.7668795,False
metformin-no_metformin,hyperlipidemia,0.7612328,False
metformin-no_metformin,life_style_modification,0.08762389,False


### Metformin therapy after admission 

In [31]:
met <- dt[metformin_a == "Y",]
nomet <- dt[metformin_a == "N",]

In [32]:
# continuous variables: use mann-whitney U non-parametric test
metcon <- met[,c(9:12, 17, 19:24, 26:27)][,c("lenth_of_hospital_stay", "height") := .(as.numeric(lenth_of_hospital_stay), as.numeric(height))]
nometcon <- nomet[,c(9:12, 17, 19:24, 26:27)][,c("lenth_of_hospital_stay", "height") := .(as.numeric(lenth_of_hospital_stay), as.numeric(height))]
keys <- intersect(colnames(metcon), colnames(nometcon))
continuous_compare(DT1 = met, DT2 = nomet, compair = "metformin-no_metformin", key = keys)

comparison,parameter,Mann_Whitney_U_p_value,significant
<chr>,<chr>,<dbl>,<chr>
metformin-no_metformin,lenth_of_hospital_stay,0.086480349,False
metformin-no_metformin,weight,0.389536242,False
metformin-no_metformin,height,0.545738247,False
metformin-no_metformin,bmi,0.956579031,False
metformin-no_metformin,o2_saturation,0.489625398,False
metformin-no_metformin,chol,0.154980981,False
metformin-no_metformin,tg,0.482703885,False
metformin-no_metformin,hdl_c,0.177163972,False
metformin-no_metformin,ldl_c,0.494004269,False
metformin-no_metformin,glucose,0.828731844,False


In [33]:
# discontinuous variables: use chai squared test
metdis <- met[,c(5, 13:16, 25, 29:45)]
nometdis <- nomet[,c(5, 13:16, 25, 29:45)]
keys <- intersect(colnames(metdis), colnames(nometdis));#keys
categorical_compare(DT = dt, compair = "metformin-no_metformin", compair_column = 'metformin_b', key = keys)

comparison,parameter,Chai_squared_p_value,significant
<chr>,<chr>,<dbl>,<chr>
metformin-no_metformin,sex,0.8145363,False
metformin-no_metformin,cad_meds,1.0,False
metformin-no_metformin,meds_acei_arb,0.7838725,False
metformin-no_metformin,statins,1.0,False
metformin-no_metformin,smoking_history,1.0,False
metformin-no_metformin,procalcitonin,0.5353149,False
metformin-no_metformin,cad_years,1.0,False
metformin-no_metformin,hypertension,0.7668795,False
metformin-no_metformin,hyperlipidemia,0.7612328,False
metformin-no_metformin,life_style_modification,0.08762389,False


#### Taken together, the analyses of this section indicate that steroids and glycosidase inhibitors appear to be the predominant confounding medications/variables on determining if metformin improves outcome in people hospitalized with COVID-19.
#### In the next section, we will controll for glycosidase inhibitors and/or steroid use to assess the independent effect of metformin on improvind survival in people hospitalized with COVID-19. 

# Section III: Controll for confounding variables when assessing the effect of metformin on survival/mortality as an outcome.

## Conduct propensity score matching on variables that confound metformin treatment on predicting outcome (survival)

#### Eliminate any confounding variables with missing values (NA).

In [74]:
contcols <- c("hba1c", "steroid_use", "glycosidase_inhibitors_a")
f <- function(b) head(freqsdt("dt",b), 1)
lapply(contcols,f)

hba1c,frequency,percent
<dbl>,<int>,<dbl>
,71,54.19847

steroid_use,frequency,percent
<chr>,<int>,<dbl>
N,83,63.35878

glycosidase_inhibitors_a,frequency,percent
<chr>,<int>,<dbl>
N,74,56.48855


#### Transform metformin therapy before admission into a binary numeric column.

In [12]:
dt2 <- dt[,c("metformin_b") := .(ifelse(metformin_b=="Y",1,0))]

## Propensity score match on steroid use

In [13]:
glm.fit <-  glm(metformin_b ~ steroid_use, data = dt2, family = binomial)#glycosidase_inhibitors_a+steroid_use+
#summary(glm.fit)
rr1 <- Match(Y = dt$outcome, Tr = dt$metformin_b, glm.fit$fitted, M=1) # M simulates the number of iterations 
summary(rr1)
mb <- MatchBalance(metformin_b ~ steroid_use, match.out = rr1, nboots = 1, data = dt)


Estimate...  0.077678 
AI SE......  0.053012 
T-stat.....  1.4653 
p.val......  0.14284 

Original number of observations..............  131 
Original number of treated obs...............  37 
Matched number of observations...............  37 
Matched number of observations  (unweighted).  1886 


***** (V1) steroid_useY *****
                       Before Matching 	 	 After Matching
mean treatment........    0.21622 	 	    0.21622 
mean control..........    0.42553 	 	    0.21622 
std mean diff.........    -50.155 	 	          0 

mean raw eQQ diff.....    0.21622 	 	          0 
med  raw eQQ diff.....          0 	 	          0 
max  raw eQQ diff.....          1 	 	          0 

mean eCDF diff........    0.10466 	 	          0 
med  eCDF diff........    0.10466 	 	          0 
max  eCDF diff........    0.20932 	 	          0 

var ratio (Tr/Co).....    0.70492 	 	          1 
T-test p-value........   0.016789 	 	          1 



## Propensity score match on glycosidase_inhibitors_a

In [15]:
glm.fit <-  glm(metformin_b ~ glycosidase_inhibitors_a, data = dt2, family = binomial)#glycosidase_inhibitors_a+steroid_use+
#summary(glm.fit)
rr1 <- Match(Y = dt$outcome, Tr = dt$metformin_b, glm.fit$fitted, M=0) # M simulates the number of iterations 
summary(rr1)
mb <- MatchBalance(metformin_b ~ glycosidase_inhibitors_a, match.out = rr1, nboots = 1, data = dt)


Estimate...  0.14141 
AI SE......  0.052368 
T-stat.....  2.7004 
p.val......  0.0069255 

Original number of observations..............  131 
Original number of treated obs...............  37 
Matched number of observations...............  37 
Matched number of observations  (unweighted).  1709 


***** (V1) glycosidase_inhibitors_aY *****
                       Before Matching 	 	 After Matching
mean treatment........    0.54054 	 	    0.54054 
mean control..........    0.39362 	 	    0.54054 
std mean diff.........     29.081 	 	          0 

mean raw eQQ diff.....    0.16216 	 	          0 
med  raw eQQ diff.....          0 	 	          0 
max  raw eQQ diff.....          1 	 	          0 

mean eCDF diff........   0.073462 	 	          0 
med  eCDF diff........   0.073462 	 	          0 
max  eCDF diff........    0.14692 	 	          0 

var ratio (Tr/Co).....     1.0581 	 	          1 
T-test p-value........     0.1359 	 	          1 



### Bootstrap the data to determine if the number of records were increased and the data are representative of the general population would there be any significance in the data.

In [17]:
glm.fit <-  glm(metformin_b ~ steroid_use, data = dt2, family = binomial)#glycosidase_inhibitors_a+steroid_use+
#summary(glm.fit)
rr1 <- Match(Y = dt$outcome, Tr = dt$metformin_b, glm.fit$fitted, M=100) # M simulates the number of iterations 
summary(rr1)
mb <- MatchBalance(metformin_b ~ steroid_use, match.out = rr1, nboots = 10, data = dt)


Estimate...  0.16935 
AI SE......  0.050443 
T-stat.....  3.3573 
p.val......  0.00078717 

Original number of observations..............  131 
Original number of treated obs...............  37 
Matched number of observations...............  37 
Matched number of observations  (unweighted).  3478 


***** (V1) steroid_useY *****
                       Before Matching 	 	 After Matching
mean treatment........    0.21622 	 	    0.21622 
mean control..........    0.42553 	 	    0.42553 
std mean diff.........    -50.155 	 	    -50.155 

mean raw eQQ diff.....    0.21622 	 	    0.20932 
med  raw eQQ diff.....          0 	 	          0 
max  raw eQQ diff.....          1 	 	          1 

mean eCDF diff........    0.10466 	 	    0.10466 
med  eCDF diff........    0.10466 	 	    0.10466 
max  eCDF diff........    0.20932 	 	    0.20932 

var ratio (Tr/Co).....    0.70492 	 	    0.69324 
T-test p-value........   0.016789 	 	   0.055505 



In [18]:
glm.fit <-  glm(metformin_b ~ glycosidase_inhibitors_a, data = dt2, family = binomial)#glycosidase_inhibitors_a+steroid_use+
#summary(glm.fit)
rr1 <- Match(Y = dt$outcome, Tr = dt$metformin_b, glm.fit$fitted, M=100) # M simulates the number of iterations 
summary(rr1)
mb <- MatchBalance(metformin_b ~ glycosidase_inhibitors_a, match.out = rr1, nboots = 10, data = dt)


Estimate...  0.16935 
AI SE......  0.050443 
T-stat.....  3.3573 
p.val......  0.00078717 

Original number of observations..............  131 
Original number of treated obs...............  37 
Matched number of observations...............  37 
Matched number of observations  (unweighted).  3478 


***** (V1) glycosidase_inhibitors_aY *****
                       Before Matching 	 	 After Matching
mean treatment........    0.54054 	 	    0.54054 
mean control..........    0.39362 	 	    0.39362 
std mean diff.........     29.081 	 	     29.081 

mean raw eQQ diff.....    0.16216 	 	    0.14692 
med  raw eQQ diff.....          0 	 	          0 
max  raw eQQ diff.....          1 	 	          1 

mean eCDF diff........   0.073462 	 	   0.073462 
med  eCDF diff........   0.073462 	 	   0.073462 
max  eCDF diff........    0.14692 	 	    0.14692 

var ratio (Tr/Co).....     1.0581 	 	     1.0405 
T-test p-value........     0.1359 	 	    0.20853 



#### The results above suggest that the effect of metformin on survival/death is independent of glycosidase inhibitors but that steroid use is a significant confounding variable

### Assess frequency counts to gain a better understanding of the accuracy of the results

In [92]:
freqsdt('dt','steroid_use,metformin_b,outcome')[]
freqsdt('dt','steroid_use,glycosidase_inhibitors_a,outcome')[]

steroid_use,metformin_b,outcome,frequency,percent
<chr>,<dbl>,<dbl>,<int>,<dbl>
N,0,1,52,39.6946565
N,1,1,28,21.3740458
Y,0,1,21,16.0305344
Y,0,0,19,14.5038168
Y,1,1,7,5.3435115
N,0,0,2,1.5267176
N,1,0,1,0.7633588
Y,1,0,1,0.7633588


steroid_use,glycosidase_inhibitors_a,outcome,frequency,percent
<chr>,<chr>,<dbl>,<int>,<dbl>
N,N,1,42,32.0610687
N,Y,1,38,29.0076336
Y,N,0,16,12.2137405
Y,Y,1,14,10.6870229
Y,N,1,14,10.6870229
Y,Y,0,4,3.0534351
N,N,0,2,1.5267176
N,Y,0,1,0.7633588


#### In assessing the numbers, there is not enough data to make an accurate determination between the effect of steroid use as a true confounding variable to metformin therapy on predicting outcome. However, steroid administration may be a significant confounding variable to the effect of metformin on reducing hospital morbidity and mortality. 

## Assess metformin therapy after admission 

In [34]:
dt2 <- dt[,c("metformin_a") := .(ifelse(metformin_a=="Y",1,0))]

## Propensity score match on steroid use

In [35]:
glm.fit <-  glm(metformin_a ~ steroid_use, data = dt2, family = binomial)#glycosidase_inhibitors_a+steroid_use+
#summary(glm.fit)
rr1 <- Match(Y = dt$outcome, Tr = dt$metformin_a, glm.fit$fitted, M=1) # M simulates the number of iterations 
summary(rr1)
mb <- MatchBalance(metformin_a ~ steroid_use, match.out = rr1, nboots = 1, data = dt)


Estimate...  0.077678 
AI SE......  0.053012 
T-stat.....  1.4653 
p.val......  0.14284 

Original number of observations..............  131 
Original number of treated obs...............  37 
Matched number of observations...............  37 
Matched number of observations  (unweighted).  1886 


***** (V1) steroid_useY *****
                       Before Matching 	 	 After Matching
mean treatment........    0.21622 	 	    0.21622 
mean control..........    0.42553 	 	    0.21622 
std mean diff.........    -50.155 	 	          0 

mean raw eQQ diff.....    0.21622 	 	          0 
med  raw eQQ diff.....          0 	 	          0 
max  raw eQQ diff.....          1 	 	          0 

mean eCDF diff........    0.10466 	 	          0 
med  eCDF diff........    0.10466 	 	          0 
max  eCDF diff........    0.20932 	 	          0 

var ratio (Tr/Co).....    0.70492 	 	          1 
T-test p-value........   0.016789 	 	          1 



## Propensity score match on glycosidase_inhibitors_a

In [36]:
glm.fit <-  glm(metformin_a ~ glycosidase_inhibitors_a, data = dt2, family = binomial)#glycosidase_inhibitors_a+steroid_use+
#summary(glm.fit)
rr1 <- Match(Y = dt$outcome, Tr = dt$metformin_a, glm.fit$fitted, M=0) # M simulates the number of iterations 
summary(rr1)
mb <- MatchBalance(metformin_a ~ glycosidase_inhibitors_a, match.out = rr1, nboots = 1, data = dt)


Estimate...  0.14141 
AI SE......  0.052368 
T-stat.....  2.7004 
p.val......  0.0069255 

Original number of observations..............  131 
Original number of treated obs...............  37 
Matched number of observations...............  37 
Matched number of observations  (unweighted).  1709 


***** (V1) glycosidase_inhibitors_aY *****
                       Before Matching 	 	 After Matching
mean treatment........    0.54054 	 	    0.54054 
mean control..........    0.39362 	 	    0.54054 
std mean diff.........     29.081 	 	          0 

mean raw eQQ diff.....    0.16216 	 	          0 
med  raw eQQ diff.....          0 	 	          0 
max  raw eQQ diff.....          1 	 	          0 

mean eCDF diff........   0.073462 	 	          0 
med  eCDF diff........   0.073462 	 	          0 
max  eCDF diff........    0.14692 	 	          0 

var ratio (Tr/Co).....     1.0581 	 	          1 
T-test p-value........     0.1359 	 	          1 



### Bootstrap the data to determine if the number of records were increased and the data are representative of the general population would there be any significance in the data.

In [37]:
glm.fit <-  glm(metformin_a ~ steroid_use, data = dt2, family = binomial)#glycosidase_inhibitors_a+steroid_use+
#summary(glm.fit)
rr1 <- Match(Y = dt$outcome, Tr = dt$metformin_a, glm.fit$fitted, M=100) # M simulates the number of iterations 
summary(rr1)
mb <- MatchBalance(metformin_a ~ steroid_use, match.out = rr1, nboots = 10, data = dt)


Estimate...  0.16935 
AI SE......  0.050443 
T-stat.....  3.3573 
p.val......  0.00078717 

Original number of observations..............  131 
Original number of treated obs...............  37 
Matched number of observations...............  37 
Matched number of observations  (unweighted).  3478 


***** (V1) steroid_useY *****
                       Before Matching 	 	 After Matching
mean treatment........    0.21622 	 	    0.21622 
mean control..........    0.42553 	 	    0.42553 
std mean diff.........    -50.155 	 	    -50.155 

mean raw eQQ diff.....    0.21622 	 	    0.20932 
med  raw eQQ diff.....          0 	 	          0 
max  raw eQQ diff.....          1 	 	          1 

mean eCDF diff........    0.10466 	 	    0.10466 
med  eCDF diff........    0.10466 	 	    0.10466 
max  eCDF diff........    0.20932 	 	    0.20932 

var ratio (Tr/Co).....    0.70492 	 	    0.69324 
T-test p-value........   0.016789 	 	   0.055505 



## Use a matched pairs design to explore the effects of metformin monotherapy on outcome (survival/death)

In [19]:
dt_sub <- dt[,c("insulin_b", "metformin_b", "secretagogues_b",
                "tzd_b", "glycosidase_inhibitors_b",  "dpp4_inhibitor_b",
                "insulin_a", "metformin_a",  "secretagogues_a",
                "tzd_a",  "glycosidase_inhibitors_a", "dpp4_inhibitor_a",
                "steroid_use", "outcome")][,c("metformin_b") := .(ifelse(metformin_b==1,"Y","N"))]
str(dt_sub)

Classes 'data.table' and 'data.frame':	131 obs. of  14 variables:
 $ insulin_b               : chr  "N" "N" "Y" "N" ...
 $ metformin_b             : chr  "Y" "Y" "N" "N" ...
 $ secretagogues_b         : chr  "N" "N" "N" "N" ...
 $ tzd_b                   : chr  "N" "N" "N" "N" ...
 $ glycosidase_inhibitors_b: chr  "Y" "N" "N" "N" ...
 $ dpp4_inhibitor_b        : chr  "N" "N" "N" "N" ...
 $ insulin_a               : chr  "N" "N" "N" "N" ...
 $ metformin_a             : chr  "Y" "Y" "N" "N" ...
 $ secretagogues_a         : chr  "N" "N" "N" "N" ...
 $ tzd_a                   : chr  "N" "N" "N" "N" ...
 $ glycosidase_inhibitors_a: chr  "Y" "N" "N" "N" ...
 $ dpp4_inhibitor_a        : chr  "N" "N" "N" "N" ...
 $ steroid_use             : chr  "N" "N" "Y" "N" ...
 $ outcome                 : num  1 1 0 1 0 0 1 1 1 1 ...
 - attr(*, ".internal.selfref")=<externalptr> 


In [103]:
#### metformin monotherapy before and after
met <- dt_sub[(!dt_sub$`insulin_a` == "Y") &
                (!dt_sub$`secretagogues_a` == "Y") &
                (!dt_sub$`tzd_a` == "Y") &
                (!dt_sub$`glycosidase_inhibitors_a` == "Y") &
                (!dt_sub$`dpp4_inhibitor_a` == "Y") &
                (!dt_sub$steroid_use == "Y") &
                (!dt_sub$`insulin_b` == "Y") &
                (!dt_sub$`secretagogues_b` == "Y") &
                (!dt_sub$`tzd_b` == "Y") &
                (!dt_sub$`glycosidase_inhibitors_b` == "Y") &
                (!dt_sub$`dpp4_inhibitor_b` == "Y") &
                (!dt_sub$steroid_use == "Y"),]
met
glm.fit <- glm(outcome ~ `metformin_b`, data = met, family = binomial); summary(glm.fit)
glm.fit <- glm(outcome ~ `metformin_a`, data = met, family = binomial); summary(glm.fit)


insulin_b,metformin_b,secretagogues_b,tzd_b,glycosidase_inhibitors_b,dpp4_inhibitor_b,insulin_a,metformin_a,secretagogues_a,tzd_a,glycosidase_inhibitors_a,dpp4_inhibitor_a,steroid_use,outcome
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>
N,Y,N,N,N,N,N,Y,N,N,N,N,N,1
N,N,N,N,N,N,N,N,N,N,N,N,N,1
N,N,N,N,N,N,N,N,N,N,N,N,N,1
N,Y,N,N,N,N,N,Y,N,N,N,N,N,1
N,N,N,N,N,N,N,N,N,N,N,N,N,1
N,Y,N,N,N,N,N,Y,N,N,N,N,N,1
N,N,N,N,N,N,N,N,N,N,N,N,N,1
N,N,N,N,N,N,N,N,N,N,N,N,N,0
N,N,N,N,N,N,N,N,N,N,N,N,N,1
N,N,N,N,N,N,N,N,N,N,N,N,N,1



Call:
glm(formula = outcome ~ metformin_b, family = binomial, data = met)

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-1.93484   0.00008   0.57802   0.57802   0.57802  

Coefficients:
              Estimate Std. Error z value Pr(>|z|)  
(Intercept)     1.7047     0.7687   2.218   0.0266 *
metformin_bY   17.8613  4390.3075   0.004   0.9968  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 12.787  on 18  degrees of freedom
Residual deviance: 11.162  on 17  degrees of freedom
AIC: 15.162

Number of Fisher Scoring iterations: 18



Call:
glm(formula = outcome ~ metformin_a, family = binomial, data = met)

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-1.93484   0.00008   0.57802   0.57802   0.57802  

Coefficients:
              Estimate Std. Error z value Pr(>|z|)  
(Intercept)     1.7047     0.7687   2.218   0.0266 *
metformin_aY   17.8613  4390.3075   0.004   0.9968  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 12.787  on 18  degrees of freedom
Residual deviance: 11.162  on 17  degrees of freedom
AIC: 15.162

Number of Fisher Scoring iterations: 18


In [104]:
freqsdt('met','metformin_b,outcome')[]

metformin_b,outcome,frequency,percent
<chr>,<dbl>,<int>,<dbl>
N,1,11,57.89474
Y,1,6,31.57895
N,0,2,10.52632


In [20]:
#### metformin monotherapy before and after
met <- dt_sub[(!dt_sub$`insulin_a` == "Y") &
                (!dt_sub$`secretagogues_a` == "Y") &
                (!dt_sub$`tzd_a` == "Y") &
                (!dt_sub$`glycosidase_inhibitors_a` == "Y") &
                (!dt_sub$`dpp4_inhibitor_a` == "Y") &
                (!dt_sub$steroid_use == "Y") &
                (!dt_sub$`insulin_b` == "Y") &
                (!dt_sub$`secretagogues_b` == "Y") &
                (!dt_sub$`tzd_b` == "Y") &
                (!dt_sub$`glycosidase_inhibitors_b` == "Y") &
                (!dt_sub$`dpp4_inhibitor_b` == "Y") &
                (!dt_sub$steroid_use == "Y"),]
met
glm.fit <- glm(outcome ~ `metformin_b`, data = met, family = binomial); summary(glm.fit)
glm.fit <- glm(outcome ~ `metformin_a`, data = met, family = binomial); summary(glm.fit)


insulin_b,metformin_b,secretagogues_b,tzd_b,glycosidase_inhibitors_b,dpp4_inhibitor_b,insulin_a,metformin_a,secretagogues_a,tzd_a,glycosidase_inhibitors_a,dpp4_inhibitor_a,steroid_use,outcome
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>
N,Y,N,N,N,N,N,Y,N,N,N,N,N,1
N,N,N,N,N,N,N,N,N,N,N,N,N,1
N,N,N,N,N,N,N,N,N,N,N,N,N,1
N,Y,N,N,N,N,N,Y,N,N,N,N,N,1
N,N,N,N,N,N,N,N,N,N,N,N,N,1
N,Y,N,N,N,N,N,Y,N,N,N,N,N,1
N,N,N,N,N,N,N,N,N,N,N,N,N,1
N,N,N,N,N,N,N,N,N,N,N,N,N,0
N,N,N,N,N,N,N,N,N,N,N,N,N,1
N,N,N,N,N,N,N,N,N,N,N,N,N,1



Call:
glm(formula = outcome ~ metformin_b, family = binomial, data = met)

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-1.93484   0.00008   0.57802   0.57802   0.57802  

Coefficients:
              Estimate Std. Error z value Pr(>|z|)  
(Intercept)     1.7047     0.7687   2.218   0.0266 *
metformin_bY   17.8613  4390.3075   0.004   0.9968  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 12.787  on 18  degrees of freedom
Residual deviance: 11.162  on 17  degrees of freedom
AIC: 15.162

Number of Fisher Scoring iterations: 18



Call:
glm(formula = outcome ~ metformin_a, family = binomial, data = met)

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-1.93484   0.00008   0.57802   0.57802   0.57802  

Coefficients:
              Estimate Std. Error z value Pr(>|z|)  
(Intercept)     1.7047     0.7687   2.218   0.0266 *
metformin_aY   17.8613  4390.3075   0.004   0.9968  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 12.787  on 18  degrees of freedom
Residual deviance: 11.162  on 17  degrees of freedom
AIC: 15.162

Number of Fisher Scoring iterations: 18
