In [1]:
library(simex)

In [2]:
merged_df <- read.csv("combined_mimic_smoking_status_0417.csv")
head(merged_df)

In [3]:
fml <- 'mort_28_day ~ echo + first_careunit + age + gender + weight + saps + sofa + elix_score + vent + \
            vaso + icu_adm_weekday + icu_adm_hour + icd_chf + icd_afib + icd_renal + icd_liver + icd_copd + \
            icd_cad + icd_stroke + icd_malignancy + vs_heart_rate_first + vs_map_first + vs_temp_first + \
            lab_hemoglobin_first + lab_platelet_first + lab_wbc_first + lab_ph_first + lab_chloride_first + \
            lab_sodium_first + lab_bun_first + lab_bicarbonate_first + lab_pco2_first + lab_creatinine_first + \
            lab_potassium_first + lab_po2_first + lab_lactate_first + sedative + vs_cvp_flag + \
            lab_creatinine_kinase_flag + lab_bnp_flag + lab_troponin_flag + SMOKING_STATUS'
fml

In [4]:
merged_df$SMOKING_STATUS <- as.factor(merged_df$SMOKING_STATUS)

In [68]:
glm_model = glm(as.formula(fml), data = merged_df, family = binomial, na.action = na.exclude)

In [69]:
summary(glm_model)


Call:
glm(formula = as.formula(fml), family = binomial, data = merged_df, 
    na.action = na.exclude)

Coefficients:
                             Estimate Std. Error z value Pr(>|z|)    
(Intercept)                 3.5542731  7.4974785   0.474 0.635455    
echo                       -0.1629170  0.1284231  -1.269 0.204585    
first_careunitSICU         -0.1714718  0.1725299  -0.994 0.320288    
age                         0.0196017  0.0049069   3.995 6.48e-05 ***
genderM                     0.2627325  0.1292458   2.033 0.042071 *  
weight                     -0.0076758  0.0028308  -2.712 0.006697 ** 
saps                        0.0941357  0.0169318   5.560 2.70e-08 ***
sofa                        0.2292723  0.0260908   8.787  < 2e-16 ***
elix_score                  0.0016398  0.0382643   0.043 0.965819    
vent                        0.2919105  0.2076957   1.405 0.159881    
vaso                        0.0096973  0.1592693   0.061 0.951450    
icu_adm_weekdaymonday       0.2885723  0.

In [70]:
matrix_error <- matrix(c(8/11, 0, 2/11, 1/11, 4/11, 4/11, 3/11, 0, 1/16, 0, 14/16, 1/16, 1/63, 0, 1/63, 61/63), nrow=4)
matrix_error <- build.mc.matrix(matrix_error)
dimnames(matrix_error) <- list(levels(merged_df$SMOKING_STATUS), levels(merged_df$SMOKING_STATUS))
matrix_error

Unnamed: 0,1,2,3,4
1,0.7272727,0.3508917,0.0625,0.015873
2,0.0,0.3545172,0.0,0.0
3,0.1818182,0.2634643,0.875,0.015873
4,0.0909091,0.0311268,0.0625,0.968254


In [71]:
tte_smoking_mcsimex <- mcsimex(glm_model, SIMEXvariable = "SMOKING_STATUS", mc.matrix=matrix_error, asymptotic = FALSE)

In [72]:
summary(tte_smoking_mcsimex)

Call:
mcsimex(model = glm_model, SIMEXvariable = "SMOKING_STATUS", 
    mc.matrix = matrix_error, asymptotic = FALSE)

Naive model: 
glm(formula = as.formula(fml), family = binomial, data = merged_df, 
    na.action = na.exclude)

Simex variable : SMOKING_STATUS 
Misclassification matrix: 
          1         2      3        4
1 0.7272727 0.3508917 0.0625 0.015873
2 0.0000000 0.3545172 0.0000 0.000000
3 0.1818182 0.2634643 0.8750 0.015873
4 0.0909091 0.0311268 0.0625 0.968254

Number of iterations:  100 

Residuals: 
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-0.926516 -0.208407 -0.065317  0.006416  0.177691  0.984092 

Coefficients: 

Jackknife variance: 
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                 3.3393653  7.6097071   0.439 0.660834    
echo                       -0.1560066  0.1299939  -1.200 0.230244    
first_careunitSICU         -0.1536026  0.1745758  -0.880 0.379043    
age                         0.0203226

In [10]:
# plot(tte_smoking_mcsimex)

### Calculating Risk Ratio using MC-SIMEX

In [80]:
merged_df_0 <- read.csv("combined_mimic_smoking_status_0417.csv")

In [81]:
data <- replace(merged_df_0["echo"], merged_df_0["echo"]>0, 0) 
# print(data)

In [82]:
merged_df_0["echo"] <- data
merged_df_0$SMOKING_STATUS <- as.factor(merged_df_0$SMOKING_STATUS)

In [83]:
predictions_0 = predict(tte_smoking_mcsimex, merged_df_0, type="response")

In [85]:
p0_total = sum(predictions_0, na.rm=T)
print(p0_total)

[1] 662.0483


In [88]:
merged_df_1 <- read.csv("combined_mimic_smoking_status_0417.csv")

In [90]:
data <- replace(merged_df_1["echo"], merged_df_1["echo"]>-1, 1)

In [91]:
merged_df_1["echo"] <- data
merged_df_1$SMOKING_STATUS <- as.factor(merged_df_1$SMOKING_STATUS)

In [92]:
predictions_1 = predict(tte_smoking_mcsimex, merged_df_1, type="response")

In [93]:
p1_total = sum(predictions_1, na.rm=T)
print(p1_total)

[1] 617.9846


In [94]:
rr = p1_total / p0_total
print(rr)

[1] 0.9334433
