In [1]:
setwd("/data/Projects/ACC_NCDR/NCDR/BJMDATA/ACTION/NCHData/")
source(file = "/data/Projects/ACC_NCDR/NCDR/BJMDATA/ACTION/CODE/calcDeciles.R")

library(SpecsVerification)
#library(pROC)
library(ggplot2)
library(ggpubr)
library(PRROC)

Loading required package: magrittr


In [2]:
folds.all <- list(1, 2, 3, 4, 5)
modes <- list('mcnamara', 'expanded')

In [3]:
for (fold in folds.all){
    for (mode in modes){
        data_dir <- paste0("/data/Projects/ACC_NCDR/NCDR/BJMDATA/ACTION/NCHData/multiple_imputed/fold_", fold)
        fig_dir  <- paste0("/data/Projects/ACC_NCDR/NCDR/BJMDATA/ACTION/NCHData/multiple_imputed/figs/")
        cat(paste('Loading data for ', mode,' Model Fold ', fold, '\n', sep=''))
        
        load(file = paste0(data_dir, '/trainCpreds_fig_', mode, '.Rdata'))
        lr_predsC.df <- read.csv(file = paste0(data_dir, '/lr_preds_trainC_', mode, '.csv'), header = TRUE)

        preds.df$lr <- lr_predsC.df$lr
        
        lr.deciles.output <- sortDeciles(preds.df$lr, preds.df$DCStatus, 
                                          quantile(preds.df$lr, preds.df$DCStatus, probs = seq(0,1,0.1), na.rm = TRUE))[[1]]
        glm.deciles.output <- sortDeciles(preds.df$glm, preds.df$DCStatus, 
                                          quantile(preds.df$glm, preds.df$DCStatus, probs = seq(0,1,0.1), na.rm = TRUE))[[1]]
        xgb.deciles.output <- sortDeciles(preds.df$xgb, preds.df$DCStatus, 
                                          quantile(preds.df$xgb, preds.df$DCStatus, probs = seq(0,1,0.1), na.rm = TRUE))[[1]]
        nn.deciles.output <- sortDeciles(preds.df$nn, preds.df$DCStatus, 
                                          quantile(preds.df$nn, preds.df$DCStatus, probs = seq(0,1,0.1), na.rm = TRUE))[[1]]
        meta.deciles.output <- sortDeciles(preds.df$meta, preds.df$DCStatus, 
                                           quantile(preds.df$meta, preds.df$DCStatus, probs = seq(0,1,0.1), na.rm = TRUE))[[1]]
        
        lr.deciles <- lr.deciles.output[, c('predicted rates', 'observed rates')]
        glm.deciles <- glm.deciles.output[, c('predicted rates', 'observed rates')]
        xgb.deciles <- xgb.deciles.output[, c('predicted rates', 'observed rates')]
        nn.deciles <- nn.deciles.output[, c('predicted rates', 'observed rates')]
        meta.deciles <- meta.deciles.output[, c('predicted rates', 'observed rates')]
        
        colnames(lr.deciles) <- c('predicted', 'observed')
        colnames(glm.deciles) <- c('predicted', 'observed')
        colnames(xgb.deciles) <- c('predicted', 'observed')
        colnames(nn.deciles) <- c('predicted', 'observed')
        colnames(meta.deciles) <- c('predicted', 'observed')
        
        #decile, model, value
        #LR QUANTILES
        glm.df <- data.frame()
        glm_quant <- quantile(preds.df$lr, probs = seq(0,1,0.1), na.rm = TRUE)
        
        for (i in 2:11) {
            values <- c(mean(preds.df$lr[which(preds.df$lr < glm_quant[i] & preds.df$lr >= glm_quant[i-1])]) * 100, 
                      mean(preds.df$glm[which(preds.df$lr < glm_quant[i] & preds.df$lr >= glm_quant[i-1])]) * 100, 
                      mean(preds.df$DCStatus[which(preds.df$lr < glm_quant[i] & preds.df$lr >= glm_quant[i-1])]) * 100)#,
                      #mean(preds.df$DCStatus[which(preds.df$glm < glm_quant[i] & preds.df$glm >= glm_quant[i-1])]) * 100)
            lower.ci <- c(t.test(preds.df$lr[which(preds.df$lr < glm_quant[i] & preds.df$lr >= glm_quant[i-1])])$conf.int[1] * 100, 
                        t.test(preds.df$glm[which(preds.df$lr < glm_quant[i] & preds.df$lr >= glm_quant[i-1])])$conf.int[1] * 100)
            upper.ci <- c(t.test(preds.df$lr[which(preds.df$lr < glm_quant[i] & preds.df$lr >= glm_quant[i-1])])$conf.int[2] * 100, 
                        t.test(preds.df$glm[which(preds.df$lr < glm_quant[i] & preds.df$lr >= glm_quant[i-1])])$conf.int[2] * 100)
            model <- c("LR", "Lasso", "Observed", "LowerCI_LR", "LowerCI_GLM", "UpperCI_LR", "UpperCI_GLM")
            temp.df <- data.frame(decile= rep(i-1, 7), c(values, lower.ci, upper.ci), model)
            glm.df <- rbind(glm.df, temp.df)
        }
        colnames(glm.df) <- c('decile', 'values', 'model')
        glm.df$Groups = factor(glm.df$model, levels = c("LR", "Lasso", "Observed","LowerCI_LR", "LowerCI_GLM", "UpperCI_LR", "UpperCI_GLM"))
        
        xgb.df <- data.frame()
        xgb_quant <- quantile(preds.df$lr, probs = seq(0,1,0.1), na.rm = TRUE)
        for (i in 2:11) {
          values <- c(mean(preds.df$lr[which(preds.df$lr < xgb_quant[i] & preds.df$lr >= xgb_quant[i-1])]) * 100, 
                      mean(preds.df$xgb[which(preds.df$lr < xgb_quant[i] & preds.df$lr >= xgb_quant[i-1])]) * 100,  #this used to read which(preds.df$xgb < xgb)
                      mean(preds.df$DCStatus[which(preds.df$lr < xgb_quant[i] & preds.df$lr >= xgb_quant[i-1])]) * 100)#,
                      #mean(preds.df$DCStatus[which(preds.df$xgb < xgb_quant[i] & preds.df$xgb >= xgb_quant[i-1])]) * 100)
          
          lower.ci <- c(t.test(preds.df$lr[which(preds.df$lr < xgb_quant[i] & preds.df$lr >= xgb_quant[i-1])])$conf.int[1] * 100, 
                        t.test(preds.df$xgb[which(preds.df$lr < xgb_quant[i] & preds.df$lr >= xgb_quant[i-1])])$conf.int[1] * 100)
          upper.ci <- c(t.test(preds.df$lr[which(preds.df$lr < xgb_quant[i] & preds.df$lr >= xgb_quant[i-1])])$conf.int[2] * 100, 
                        t.test(preds.df$xgb[which(preds.df$lr < xgb_quant[i] & preds.df$lr >= xgb_quant[i-1])])$conf.int[2] * 100)
          model <- c("LR", "XGB", "Observed", "LowerCI_LR", "LowerCI_XGB", "UpperCI_LR", "UpperCI_XGB")
          temp.df <- data.frame(decile= rep(i-1, 7), c(values, lower.ci, upper.ci), model)
          xgb.df <- rbind(xgb.df, temp.df)
        }
        colnames(xgb.df) <- c('decile', 'values', 'model')
        xgb.df$Groups = factor(xgb.df$model, levels = c("LR", "XGB", "Observed", "LowerCI_LR", "LowerCI_XGB", "UpperCI_LR", "UpperCI_XGB"))
        
        meta.df <- data.frame()

        meta_quant <- quantile(preds.df$lr, probs = seq(0,1,0.1), na.rm = TRUE)
        
        for (i in 2:11) {
          values <- c(mean(preds.df$lr[which(preds.df$lr < meta_quant[i] & preds.df$lr >= meta_quant[i-1])]) * 100, 
                      mean(preds.df$meta[which(preds.df$lr < meta_quant[i] & preds.df$lr >= meta_quant[i-1])]) * 100, 
                      mean(preds.df$DCStatus[which(preds.df$lr < meta_quant[i] & preds.df$lr >= meta_quant[i-1])]) * 100)#,
                     # mean(preds.df$DCStatus[which(preds.df$meta < meta_quant[i] & preds.df$meta >= meta_quant[i-1])]) * 100)
          
          lower.ci <- c(t.test(preds.df$lr[which(preds.df$lr < meta_quant[i] & preds.df$lr >= meta_quant[i-1])])$conf.int[1] * 100, 
                        t.test(preds.df$meta[which(preds.df$lr < meta_quant[i] & preds.df$lr >= meta_quant[i-1])])$conf.int[1] * 100)
          upper.ci <- c(t.test(preds.df$lr[which(preds.df$lr < meta_quant[i] & preds.df$lr >= meta_quant[i-1])])$conf.int[2] * 100, 
                        t.test(preds.df$meta[which(preds.df$lr < meta_quant[i] & preds.df$lr >= meta_quant[i-1])])$conf.int[2] * 100)
          model <- c("LR", "meta", "Observed", "LowerCI_LR", "LowerCI_meta", "UpperCI_LR", "UpperCI_meta")
          temp.df <- data.frame(decile= rep(i-1, 7), c(values, lower.ci, upper.ci), model)
          meta.df <- rbind(meta.df, temp.df)
        }
        
        colnames(meta.df) <- c('decile', 'values', 'model')
        
        meta.df$Groups = factor(meta.df$model, levels = c("LR", "meta", "Observed", "LowerCI_LR", "LowerCI_meta", "UpperCI_LR", "UpperCI_meta"))
        
        write.table(glm.df,  row.names=FALSE, file=paste0(data_dir, '/', mode, '_vars_glm_deciles.csv'), sep=',')
        write.table(xgb.df,  row.names=FALSE, file=paste0(data_dir, '/', mode, '_vars_xgb_deciles.csv'), sep=',')
        write.table(meta.df, row.names=FALSE, file=paste0(data_dir, '/', mode, '_vars_meta_deciles.csv'), sep=',')
        
        glm.temp <- glm.df[which(glm.df$model %in% c('LR', 'Lasso', 'Observed')),]
        glm.plot <- ggplot(glm.temp, aes(fill = Groups, y=values, x=decile, color = Groups)) +
          geom_bar(position = "dodge", stat="identity") +
          scale_x_discrete("LR risk deciles", limits = c(seq(1,10,1))) +
          scale_y_discrete("Mortality risk (%)", limits = c(0, 1,2,5,10,20,30))
        file_name <- paste0(fig_dir, mode, fold, '_glm_decile_plot.tiff')
        tiff(file_name, width = 6, height = 6, units = 'in', res = 300, compression = 'lzw')
        glm.plot
        ggsave(glm.plot, file=file_name)
        dev.off()
        
        xgb.temp <- xgb.df[which(xgb.df$model %in% c('LR', 'XGB', 'Observed')),]
        xgb.plot <- ggplot(xgb.temp, aes(fill = Groups, y=values, x=decile, color = Groups)) +
          geom_bar(position = "dodge", stat="identity") +
          scale_x_discrete("LR risk deciles", limits = c(seq(1,10,1))) +
          scale_y_discrete("Mortality risk (%)", limits = c(0, 1,2,5,10,20,30))
        file_name <- paste0(fig_dir, mode, fold, '_xgb_decile_plot.tiff')
        tiff(file_name, width = 6, height = 6, units = 'in', res = 300, compression = 'lzw')
        xgb.plot
        ggsave(xgb.plot, file=file_name)
        dev.off()
        
        meta.temp <- meta.df[which(meta.df$model %in% c('LR', 'meta', 'Observed')),]
        meta.plot <- ggplot(meta.temp, aes(fill = Groups, y=values, x=decile, color = Groups)) +
          geom_bar(position = "dodge", stat="identity") +
          scale_x_discrete("LR risk deciles", limits = c(seq(1,10,1))) +
          scale_y_discrete("Mortality risk (%)", limits = c(0, 1,2,5,10,20,30))
        file_name <- paste0(fig_dir, mode, fold, '_meta_decile_plot.tiff')
        tiff(file_name, width = 6, height = 6, units = 'in', res = 300, compression = 'lzw')
        meta.plot
        ggsave(meta.plot, file=file_name)
        dev.off()
        
        cat('Decile plots for', mode, 'fold', fold, 'plotted and saved.\n')
        
    }
}

Loading data for mcnamara Model Fold 1


Saving 6 x 6 in image
Saving 6 x 6 in image
Saving 6 x 6 in image


Decile plots for mcnamara fold 1 plotted and saved.
Loading data for expanded Model Fold 1


Saving 6 x 6 in image
Saving 6 x 6 in image
Saving 6 x 6 in image


Decile plots for expanded fold 1 plotted and saved.
Loading data for mcnamara Model Fold 2


Saving 6 x 6 in image
Saving 6 x 6 in image
Saving 6 x 6 in image


Decile plots for mcnamara fold 2 plotted and saved.
Loading data for expanded Model Fold 2


Saving 6 x 6 in image
Saving 6 x 6 in image
Saving 6 x 6 in image


Decile plots for expanded fold 2 plotted and saved.
Loading data for mcnamara Model Fold 3


Saving 6 x 6 in image
Saving 6 x 6 in image
Saving 6 x 6 in image


Decile plots for mcnamara fold 3 plotted and saved.
Loading data for expanded Model Fold 3


Saving 6 x 6 in image
Saving 6 x 6 in image
Saving 6 x 6 in image


Decile plots for expanded fold 3 plotted and saved.
Loading data for mcnamara Model Fold 4


Saving 6 x 6 in image
Saving 6 x 6 in image
Saving 6 x 6 in image


Decile plots for mcnamara fold 4 plotted and saved.
Loading data for expanded Model Fold 4


Saving 6 x 6 in image
Saving 6 x 6 in image
Saving 6 x 6 in image


Decile plots for expanded fold 4 plotted and saved.
Loading data for mcnamara Model Fold 5


Saving 6 x 6 in image
Saving 6 x 6 in image
Saving 6 x 6 in image


Decile plots for mcnamara fold 5 plotted and saved.
Loading data for expanded Model Fold 5


Saving 6 x 6 in image
Saving 6 x 6 in image
Saving 6 x 6 in image


Decile plots for expanded fold 5 plotted and saved.
