In [1]:
import os, numpy, warnings
import pandas as pd

In [2]:
os.environ['R_HOME'] = '/home/gdpoore/anaconda3/envs/tcgaAnalysisPythonR/lib/R'
warnings.filterwarnings('ignore')
%config InlineBackend.figure_format = 'retina'

In [3]:
%reload_ext rpy2.ipython

In [4]:
%%R

require(ggplot2)
require(snm)
require(limma)
require(edgeR)
require(dplyr)
require(edgeR)
require(pvca)
require(lme4)
require(ggsci)
require(cowplot)
require(doMC)
require(splitstackshape)

numCores <- detectCores()
registerDoMC(cores=numCores)

In [5]:
%%R
load("snmCfdnaShogunAndMetadata_Dec2_Final.RData")
load("snmKrakenAndMetadataFiltered_Dec2_Final.RData")

In [6]:
%%R
# Load dependencies
require(devtools)
require(doMC)
require(tibble)
require(gbm)
require(splitstackshape)
require(reshape2)
require(ggpubr)
require(caret) # for model building
require(pROC) # for AUC calculations
require(purrr) # for functional programming using map()
require(dplyr) # for data manipulation
require(doMC) # for parallel computing
require(gbm) # for machine learning
require(tibble) # for df operations
require(cowplot) # for plotting
require(PRROC) # for precision-recall curves
require(MLmetrics) # for multi-class learning
require(caret) # for machine learning

defaultGBMGrid <-  expand.grid(interaction.depth = seq(1,3),
                               n.trees = floor((1:3) * 50),
                               shrinkage = 0.1,
                               n.minobsinnode = 5)
customGBMGrid <-  expand.grid(interaction.depth = seq(1,3),
                              n.trees = floor((1:3) * 50),
                              shrinkage = 0.1,
                              n.minobsinnode = 1)
numKFold <- 4
numResampleIter <- 1

ml2DTs <- function(snmData, 
                   classOfInterest = "Lung Adenocarcinoma", 
                   cutPoint = 0.5, 
                   samplingSize = 20, 
                   caretTuneGrid = defaultGBMGrid){
  
  metaTmp1 <- droplevels(metadataPSMatchedDPQCFiltered[(metadataPSMatchedDPQCFiltered$disease_type_consol %in% c("PRAD",
                                                                                                             "SKCM",
                                                                                                             "NSCLC")),])
  tmp <- metaTmp1
  tmp$disease_type_consol <- factor(ifelse(metaTmp1$disease_type_consol == classOfInterest, yes = classOfInterest, no = "Other"))
  metadataSimSampled <- as.data.frame(stratified(tmp,
                                                 group = "disease_type_consol",
                                                 size = samplingSize,
                                                 keep.rownames = TRUE,
                                                 replace = FALSE,
                                                 bothSets = FALSE))
  rownames(metadataSimSampled) <- metadataSimSampled$rn
  mlDataY <- metadataSimSampled
  mlDataX <- snmData[rownames(mlDataY),]
  
  set.seed(42)
  index <- createDataPartition(mlDataY$disease_type_consol, p = 0.7, list = FALSE)
  trainX <- mlDataX[index,]
  trainY <- mlDataY[index,]$disease_type_consol
  testX <- mlDataX[-index,]
  testY <- mlDataY[-index,]$disease_type_consol
  # print(testY)
  
  refactoredTrainY <- factor(gsub('([[:punct:]])|\\s+','',trainY))
  refactoredTestY <- factor(gsub('([[:punct:]])|\\s+','',testY))
  
  set.seed(42)
  ctrl <- trainControl(method = "repeatedcv",
                       number = numKFold,
                       repeats = numResampleIter,
                       sampling = "up",
                       summaryFunction = twoClassSummary,
                       classProbs = TRUE,
                       verboseIter = TRUE,
                       savePredictions = TRUE,
                       allowParallel=TRUE)
  
  mlModel <- train(x = trainX,
                   y = refactoredTrainY,
                   method = "gbm",
                   preProcess = c("scale","center"),
                   trControl = ctrl,
                   verbose = TRUE,
                   metric = "ROC",
                   tuneGrid = customGBMGrid)
  
  positiveClass <- gsub(" ","", classOfInterest)
  negativeClass <- "Other"
  
  predProbs <- as.numeric(predict(mlModel, newdata = testX, type = "prob")[,positiveClass])
  fg <- predProbs[refactoredTestY == positiveClass]
  bg <- predProbs[refactoredTestY == negativeClass]
  
  prroc_roc <- roc.curve(scores.class0 = fg, scores.class1 = bg, curve = T)
  prroc_pr <- pr.curve(scores.class0 = fg, scores.class1 = bg, curve = T, rand.compute=T)
  
  # par(mfrow = c(1,2))
  plot(prroc_roc)
  plot(prroc_pr)
  # dev.off()
  
  
  predClass <- predict(mlModel, newdata = testX)
  
  confusionMatrix(table(predict(mlModel, newdata = testX, type="prob")[,positiveClass] >= cutPoint,
                        refactoredTestY == positiveClass))
}

#-----------------------------------------#
# Machine learning
#-----------------------------------------#


# mlHvsC <- function(snmData){
# Load dependencies


mlHvsC <- function(snmData){
  
  numCores <- detectCores()
  registerDoMC(cores=numCores)
  
  defaultGBMGrid <-  expand.grid(interaction.depth = seq(1,3),
                                 n.trees = floor((1:3) * 50),
                                 shrinkage = 0.1,
                                 n.minobsinnode = 5)
  customGBMGrid <-  expand.grid(interaction.depth = seq(1,3),
                                n.trees = floor((1:3) * 50),
                                shrinkage = 0.1,
                                n.minobsinnode = 1)
  
  caretTuneGrid <- defaultGBMGrid
  numKFold <- 4
  numResampleIter <- 1
  
  mlDataY <- metadataPSMatchedDPQCFiltered
  mlDataX <- snmData[rownames(mlDataY),]
  
  set.seed(42)
  index <- createDataPartition(mlDataY$HvsC, p = 0.7, list = FALSE)
  trainX <- mlDataX[index,]
  trainY <- mlDataY[index,]$HvsC
  testX <- mlDataX[-index,]
  testY <- mlDataY[-index,]$HvsC
  # print(testY)
  
  refactoredTrainY <- factor(gsub('([[:punct:]])|\\s+','',trainY))
  refactoredTestY <- factor(gsub('([[:punct:]])|\\s+','',testY))
  
  set.seed(42)
  ctrl <- trainControl(method = "repeatedcv",
                       number = numKFold,
                       repeats = numResampleIter,
                       sampling = "up",
                       summaryFunction = twoClassSummary,
                       classProbs = TRUE,
                       verboseIter = TRUE,
                       savePredictions = TRUE,
                       allowParallel=TRUE)
  
  mlModel <- train(x = trainX,
                   y = refactoredTrainY,
                   method = "gbm",
                   preProcess = c("scale","center"),
                   trControl = ctrl,
                   verbose = TRUE,
                   metric = "ROC",
                   tuneGrid = defaultGBMGrid)
  
  positiveClass <- "Cancer"
  negativeClass <- "Control"
  predProbs <- as.numeric(predict(mlModel, newdata = testX, type = "prob")[,positiveClass])
  fg <- predProbs[refactoredTestY == positiveClass]
  bg <- predProbs[refactoredTestY == negativeClass]
  
  prroc_roc <- roc.curve(scores.class0 = fg, scores.class1 = bg, curve = T)
  prroc_pr <- pr.curve(scores.class0 = fg, scores.class1 = bg, curve = T, rand.compute=T)
  
  plot(prroc_roc)
  plot(prroc_pr)
  
  predClass <- predict(mlModel, newdata = testX)
  print(confusionMatrix(data = predClass, reference = refactoredTestY, positive = positiveClass))
}

#-----------------------------------------------------

loocvDTs <- function(snmData, samplingSize = 15, DTs, caretTuneGrid = defaultGBMGrid,
                     filenameString = paste(DTs,collapse = "__"), HvsCFlag = FALSE){
  
  if(HvsCFlag){
    metaTmpX <- droplevels(metadataPSMatchedDPQCFiltered[(metadataPSMatchedDPQCFiltered$disease_type_consol %in% DTs),])
    metaTmpX$disease_type_consol <- metaTmpX$HvsC
    classes <- gsub(" ","",levels(metaTmpX$disease_type_consol))
  } else{
    metaTmpX <- droplevels(metadataPSMatchedDPQCFiltered[(metadataPSMatchedDPQCFiltered$disease_type_consol %in% DTs),])
    classes <- gsub(" ","",DTs)
  }
  
  # Do LOOCV model building and testing
  
  multiClassSummaryStats <- list()
  multiClassSummaryStatsDist <- list()
  numKFold <- 4
  numResampleIter <- 1
  metaData <- metaTmpX
  snmData <- snmData # dataPSUniqueDecontamQC # 
  iterSize <- 1
  for(jj in 1:iterSize){
    metadataSimSampled <- as.data.frame(stratified(metaData,
                                                   group = "disease_type_consol",
                                                   size = samplingSize,
                                                   keep.rownames = TRUE,
                                                   replace = FALSE,
                                                   bothSets = FALSE))
    rownames(metadataSimSampled) <- metadataSimSampled$rn
    mlDataY <- metadataSimSampled
    mlDataX <- snmData[rownames(mlDataY),]
    dim(mlDataY)[1] == dim(mlDataX)[1] # Sanity check
    
    # Create data partitions
    # set.seed(42)
    indexSuper <- 1:dim(mlDataY)[1]
    predProbs <- list()
    obsClass <- vector()
    predClass <- vector()
    varImpBestModelDF2OrderedNonzeroList <- list()
    
    for(ii in 1:length(indexSuper)){
      print(sprintf("Iteration: %d/%d", ii, length(indexSuper)))
      index <- indexSuper[ii]
      # print(index)
      trainX <- mlDataX[-index,]
      trainY <- mlDataY[-index,]$disease_type_consol
      testX <- mlDataX[index,,drop=FALSE]
      testY <- mlDataY[index,,drop=FALSE]$disease_type_consol
      # print(testY)
      
      refactoredTrainY <- factor(gsub('([[:punct:]])|\\s+','',trainY))
      refactoredTestY <- factor(gsub('([[:punct:]])|\\s+','',testY))
      
      obsClass[ii] <- as.character(refactoredTestY)
      
      set.seed(42)
      ctrl <- trainControl(method = "repeatedcv",
                           number = numKFold,
                           repeats = numResampleIter,
                           sampling = "up",
                           summaryFunction = multiClassSummary,
                           classProbs = TRUE,
                           verboseIter = FALSE,
                           savePredictions = TRUE,
                           allowParallel=TRUE)
      
      mlModel <- train(x = trainX,
                       y = refactoredTrainY,
                       method = "gbm",
                       preProcess = c("scale","center"),
                       trControl = ctrl,
                       verbose = FALSE,
                       metric = "ROC",
                       tuneGrid = caretTuneGrid)
      
      predProbs[ii] <- list(predict(mlModel, newdata = testX, type = "prob"))
      predClass[ii] <- as.character(predict(mlModel, newdata = testX, type = "raw"))
      
      varImpBestModelDF <- as.data.frame(varImp( mlModel$finalModel, scale = FALSE ))
      varImpBestModelDF2 <- rownames_to_column(varImpBestModelDF, "Taxa")
      varImpBestModelDF2Ordered <- varImpBestModelDF2[order(-varImpBestModelDF2$Overall),]
      colnames(varImpBestModelDF2Ordered)[2] <- "varImp"
      varImpBestModelDF2OrderedNonzero <- varImpBestModelDF2Ordered[varImpBestModelDF2Ordered$varImp != 0,]
      varImpBestModelDF2OrderedNonzeroList[[ii]] <- varImpBestModelDF2OrderedNonzero
      
      rm(mlModel)
    }
    
    loocvPreds <- cbind(obs = factor(obsClass,
                                     levels = classes),
                        pred = factor(predClass,
                                      levels = classes),
                        do.call(rbind,predProbs))
    # multiClassSummaryStats <- multiClassSummary(loocvPreds, lev = classes)
    # print(multiClassSummaryStats)
    
    multiClassSummaryStats[[jj]] <- multiClassSummary(loocvPreds, lev = classes)
    print(multiClassSummaryStats[[jj]])
    
    filenameROC <- paste0(filenameString,"__SHOGUN__ROC.png")
    filenamePR <- paste0(filenameString,"__SHOGUN__PR.png")
    filenameROCData <- paste0(filenameString,"__SHOGUN__Data__ROC.csv")
    filenamePRData <- paste0(filenameString,"__SHOGUN__Data__PR.csv")
    filenameSink <- paste0(filenameString,"__SHOGUN__CM.txt")
    
    predProbs <- loocvPreds[,DTs[1]]
    fg <- predProbs[loocvPreds$obs == DTs[1]]
    bg <- predProbs[loocvPreds$obs == DTs[2]]
    
    prroc_roc <- roc.curve(scores.class0 = fg, scores.class1 = bg, curve = T)
    prroc_pr <- pr.curve(scores.class0 = fg, scores.class1 = bg, curve = T, rand.compute=T)
    
    png(filename=filenameROC, width = 6, height = 4, units = 'in', res = 300)
    plot(prroc_roc)
    dev.off()
    
    png(filename=filenamePR, width = 6, height = 4, units = 'in', res = 300)
    plot(prroc_pr)
    dev.off()
    
    rocCurveData <- cbind(as.data.frame(prroc_roc$curve), DT1 = DTs[1], DT2 = DTs[2])
    prCurveData <- cbind(as.data.frame(prroc_pr$curve), DT1 = DTs[1], DT2 = DTs[2])
    
    write.table(prCurveData, sep=",", file = filenamePRData, col.names = FALSE)
    write.table(rocCurveData, sep=",", file = filenameROCData, col.names = FALSE)
  }
  
  print(confusionMatrix(loocvPreds$obs, loocvPreds$pred))
  multiClassSummaryStatsDist <- data.frame(do.call(rbind, multiClassSummaryStats))
  
  sink(filenameSink)
  print(print(confusionMatrix(loocvPreds$obs, loocvPreds$pred)))
  sink()
}

#-----------------------------------------------------

ml2DTs <- function(snmData, 
                   classOfInterest = "Lung Adenocarcinoma", 
                   cutPoint = 0.5, 
                   samplingSize = 20, 
                   caretTuneGrid = defaultGBMGrid){
  
  metaTmp1 <- droplevels(metadataPSMatchedDPQCFiltered[(metadataPSMatchedDPQCFiltered$disease_type_consol %in% c("PRAD",
                                                                                                             "SKCM",
                                                                                                             "NSCLC")),])
  tmp <- metaTmp1
  tmp$disease_type_consol <- factor(ifelse(metaTmp1$disease_type_consol == classOfInterest, yes = classOfInterest, no = "Other"))
  metadataSimSampled <- as.data.frame(stratified(tmp,
                                                 group = "disease_type_consol",
                                                 size = samplingSize,
                                                 keep.rownames = TRUE,
                                                 replace = FALSE,
                                                 bothSets = FALSE))
  rownames(metadataSimSampled) <- metadataSimSampled$rn
  mlDataY <- metadataSimSampled
  mlDataX <- snmData[rownames(mlDataY),]
  
  set.seed(42)
  index <- createDataPartition(mlDataY$disease_type_consol, p = 0.7, list = FALSE)
  trainX <- mlDataX[index,]
  trainY <- mlDataY[index,]$disease_type_consol
  testX <- mlDataX[-index,]
  testY <- mlDataY[-index,]$disease_type_consol
  # print(testY)
  
  refactoredTrainY <- factor(gsub('([[:punct:]])|\\s+','',trainY))
  refactoredTestY <- factor(gsub('([[:punct:]])|\\s+','',testY))
  
  set.seed(42)
  ctrl <- trainControl(method = "repeatedcv",
                       number = numKFold,
                       repeats = numResampleIter,
                       sampling = "up",
                       summaryFunction = twoClassSummary,
                       classProbs = TRUE,
                       verboseIter = TRUE,
                       savePredictions = TRUE,
                       allowParallel=TRUE)
  
  mlModel <- train(x = trainX,
                   y = refactoredTrainY,
                   method = "gbm",
                   preProcess = c("scale","center"),
                   trControl = ctrl,
                   verbose = TRUE,
                   metric = "ROC",
                   tuneGrid = customGBMGrid)
  
  positiveClass <- gsub(" ","", classOfInterest)
  negativeClass <- "Other"
  
  predProbs <- as.numeric(predict(mlModel, newdata = testX, type = "prob")[,positiveClass])
  fg <- predProbs[refactoredTestY == positiveClass]
  bg <- predProbs[refactoredTestY == negativeClass]
  
  prroc_roc <- roc.curve(scores.class0 = fg, scores.class1 = bg, curve = T)
  prroc_pr <- pr.curve(scores.class0 = fg, scores.class1 = bg, curve = T, rand.compute=T)
  
  # par(mfrow = c(1,2))
  plot(prroc_roc)
  plot(prroc_pr)
  # dev.off()
  
  
  predClass <- predict(mlModel, newdata = testX)
  
  confusionMatrix(table(predict(mlModel, newdata = testX, type="prob")[,positiveClass] >= cutPoint,
                        refactoredTestY == positiveClass))
}


In [8]:
%%R
hVsC4 <- loocvDTs(snmData = snmShogundataPSUniqueDecontamDPQC,
                 samplingSize = 100, 
                 DTs = c("Control","SKCM", "PRAD", "NSCLC"),
                 caretTuneGrid = defaultGBMGrid, HvsCFlag = TRUE)

[1] "Iteration: 1/169"
[1] "Iteration: 2/169"
[1] "Iteration: 3/169"
[1] "Iteration: 4/169"
[1] "Iteration: 5/169"
[1] "Iteration: 6/169"
[1] "Iteration: 7/169"
[1] "Iteration: 8/169"
[1] "Iteration: 9/169"
[1] "Iteration: 10/169"
[1] "Iteration: 11/169"
[1] "Iteration: 12/169"
[1] "Iteration: 13/169"
[1] "Iteration: 14/169"
[1] "Iteration: 15/169"
[1] "Iteration: 16/169"
[1] "Iteration: 17/169"
[1] "Iteration: 18/169"
[1] "Iteration: 19/169"
[1] "Iteration: 20/169"
[1] "Iteration: 21/169"
[1] "Iteration: 22/169"
[1] "Iteration: 23/169"
[1] "Iteration: 24/169"
[1] "Iteration: 25/169"
[1] "Iteration: 26/169"
[1] "Iteration: 27/169"
[1] "Iteration: 28/169"
[1] "Iteration: 29/169"
[1] "Iteration: 30/169"
[1] "Iteration: 31/169"
[1] "Iteration: 32/169"
[1] "Iteration: 33/169"
[1] "Iteration: 34/169"
[1] "Iteration: 35/169"
[1] "Iteration: 36/169"
[1] "Iteration: 37/169"
[1] "Iteration: 38/169"
[1] "Iteration: 39/169"
[1] "Iteration: 40/169"
[1] "Iteration: 41/169"
[1] "Iteration: 42/169"
[

In [7]:
%%R
hVsC_PRAD <- loocvDTs(snmData = snmShogundataPSUniqueDecontamDPQC,
                 samplingSize = 69, 
                 DTs = c("PRAD","Control"),
                 caretTuneGrid = defaultGBMGrid)

[1] "Iteration: 1/128"
[1] "Iteration: 2/128"
[1] "Iteration: 3/128"
[1] "Iteration: 4/128"
[1] "Iteration: 5/128"
[1] "Iteration: 6/128"
[1] "Iteration: 7/128"
[1] "Iteration: 8/128"
[1] "Iteration: 9/128"
[1] "Iteration: 10/128"
[1] "Iteration: 11/128"
[1] "Iteration: 12/128"
[1] "Iteration: 13/128"
[1] "Iteration: 14/128"
[1] "Iteration: 15/128"
[1] "Iteration: 16/128"
[1] "Iteration: 17/128"
[1] "Iteration: 18/128"
[1] "Iteration: 19/128"
[1] "Iteration: 20/128"
[1] "Iteration: 21/128"
[1] "Iteration: 22/128"
[1] "Iteration: 23/128"
[1] "Iteration: 24/128"
[1] "Iteration: 25/128"
[1] "Iteration: 26/128"
[1] "Iteration: 27/128"
[1] "Iteration: 28/128"
[1] "Iteration: 29/128"
[1] "Iteration: 30/128"
[1] "Iteration: 31/128"
[1] "Iteration: 32/128"
[1] "Iteration: 33/128"
[1] "Iteration: 34/128"
[1] "Iteration: 35/128"
[1] "Iteration: 36/128"
[1] "Iteration: 37/128"
[1] "Iteration: 38/128"
[1] "Iteration: 39/128"
[1] "Iteration: 40/128"
[1] "Iteration: 41/128"
[1] "Iteration: 42/128"
[

In [8]:
%%R
hVsC_NSCLC <- loocvDTs(snmData = snmShogundataPSUniqueDecontamDPQC,
                 samplingSize = 69, 
                 DTs = c("NSCLC","Control"),
                 caretTuneGrid = defaultGBMGrid)

[1] "Iteration: 1/94"
[1] "Iteration: 2/94"
[1] "Iteration: 3/94"
[1] "Iteration: 4/94"
[1] "Iteration: 5/94"
[1] "Iteration: 6/94"
[1] "Iteration: 7/94"
[1] "Iteration: 8/94"
[1] "Iteration: 9/94"
[1] "Iteration: 10/94"
[1] "Iteration: 11/94"
[1] "Iteration: 12/94"
[1] "Iteration: 13/94"
[1] "Iteration: 14/94"
[1] "Iteration: 15/94"
[1] "Iteration: 16/94"
[1] "Iteration: 17/94"
[1] "Iteration: 18/94"
[1] "Iteration: 19/94"
[1] "Iteration: 20/94"
[1] "Iteration: 21/94"
[1] "Iteration: 22/94"
[1] "Iteration: 23/94"
[1] "Iteration: 24/94"
[1] "Iteration: 25/94"
[1] "Iteration: 26/94"
[1] "Iteration: 27/94"
[1] "Iteration: 28/94"
[1] "Iteration: 29/94"
[1] "Iteration: 30/94"
[1] "Iteration: 31/94"
[1] "Iteration: 32/94"
[1] "Iteration: 33/94"
[1] "Iteration: 34/94"
[1] "Iteration: 35/94"
[1] "Iteration: 36/94"
[1] "Iteration: 37/94"
[1] "Iteration: 38/94"
[1] "Iteration: 39/94"
[1] "Iteration: 40/94"
[1] "Iteration: 41/94"
[1] "Iteration: 42/94"
[1] "Iteration: 43/94"
[1] "Iteration: 44/9

In [9]:
%%R
hVsC_SKCM <- loocvDTs(snmData = snmShogundataPSUniqueDecontamDPQC,
                 samplingSize = 69, 
                 DTs = c("SKCM","Control"),
                 caretTuneGrid = defaultGBMGrid)

[1] "Iteration: 1/85"
[1] "Iteration: 2/85"
[1] "Iteration: 3/85"
[1] "Iteration: 4/85"
[1] "Iteration: 5/85"
[1] "Iteration: 6/85"
[1] "Iteration: 7/85"
[1] "Iteration: 8/85"
[1] "Iteration: 9/85"
[1] "Iteration: 10/85"
[1] "Iteration: 11/85"
[1] "Iteration: 12/85"
[1] "Iteration: 13/85"
[1] "Iteration: 14/85"
[1] "Iteration: 15/85"
[1] "Iteration: 16/85"
[1] "Iteration: 17/85"
[1] "Iteration: 18/85"
[1] "Iteration: 19/85"
[1] "Iteration: 20/85"
[1] "Iteration: 21/85"
[1] "Iteration: 22/85"
[1] "Iteration: 23/85"
[1] "Iteration: 24/85"
[1] "Iteration: 25/85"
[1] "Iteration: 26/85"
[1] "Iteration: 27/85"
[1] "Iteration: 28/85"
[1] "Iteration: 29/85"
[1] "Iteration: 30/85"
[1] "Iteration: 31/85"
[1] "Iteration: 32/85"
[1] "Iteration: 33/85"
[1] "Iteration: 34/85"
[1] "Iteration: 35/85"
[1] "Iteration: 36/85"
[1] "Iteration: 37/85"
[1] "Iteration: 38/85"
[1] "Iteration: 39/85"
[1] "Iteration: 40/85"
[1] "Iteration: 41/85"
[1] "Iteration: 42/85"
[1] "Iteration: 43/85"
[1] "Iteration: 44/8

In [12]:
%%R
prad_nsclc <- loocvDTs(snmData = snmShogundataPSUniqueDecontamDPQC,
                 samplingSize = 59, 
                 DTs = c("PRAD","NSCLC"),
                 caretTuneGrid = defaultGBMGrid)

[1] "Iteration: 1/84"
[1] "Iteration: 2/84"
[1] "Iteration: 3/84"
[1] "Iteration: 4/84"
[1] "Iteration: 5/84"
[1] "Iteration: 6/84"
[1] "Iteration: 7/84"
[1] "Iteration: 8/84"
[1] "Iteration: 9/84"
[1] "Iteration: 10/84"
[1] "Iteration: 11/84"
[1] "Iteration: 12/84"
[1] "Iteration: 13/84"
[1] "Iteration: 14/84"
[1] "Iteration: 15/84"
[1] "Iteration: 16/84"
[1] "Iteration: 17/84"
[1] "Iteration: 18/84"
[1] "Iteration: 19/84"
[1] "Iteration: 20/84"
[1] "Iteration: 21/84"
[1] "Iteration: 22/84"
[1] "Iteration: 23/84"
[1] "Iteration: 24/84"
[1] "Iteration: 25/84"
[1] "Iteration: 26/84"
[1] "Iteration: 27/84"
[1] "Iteration: 28/84"
[1] "Iteration: 29/84"
[1] "Iteration: 30/84"
[1] "Iteration: 31/84"
[1] "Iteration: 32/84"
[1] "Iteration: 33/84"
[1] "Iteration: 34/84"
[1] "Iteration: 35/84"
[1] "Iteration: 36/84"
[1] "Iteration: 37/84"
[1] "Iteration: 38/84"
[1] "Iteration: 39/84"
[1] "Iteration: 40/84"
[1] "Iteration: 41/84"
[1] "Iteration: 42/84"
[1] "Iteration: 43/84"
[1] "Iteration: 44/8

In [13]:
%%R
nsclc_skcm <- loocvDTs(snmData = snmShogundataPSUniqueDecontamDPQC,
                 samplingSize = 25, 
                 DTs = c("NSCLC","SKCM"),
                 caretTuneGrid = defaultGBMGrid)

[1] "Iteration: 1/41"
[1] "Iteration: 2/41"
[1] "Iteration: 3/41"
[1] "Iteration: 4/41"
[1] "Iteration: 5/41"
[1] "Iteration: 6/41"
[1] "Iteration: 7/41"
[1] "Iteration: 8/41"
[1] "Iteration: 9/41"
[1] "Iteration: 10/41"
[1] "Iteration: 11/41"
[1] "Iteration: 12/41"
[1] "Iteration: 13/41"
[1] "Iteration: 14/41"
[1] "Iteration: 15/41"
[1] "Iteration: 16/41"
[1] "Iteration: 17/41"
[1] "Iteration: 18/41"
[1] "Iteration: 19/41"
[1] "Iteration: 20/41"
[1] "Iteration: 21/41"
[1] "Iteration: 22/41"
[1] "Iteration: 23/41"
[1] "Iteration: 24/41"
[1] "Iteration: 25/41"
[1] "Iteration: 26/41"
[1] "Iteration: 27/41"
[1] "Iteration: 28/41"
[1] "Iteration: 29/41"
[1] "Iteration: 30/41"
[1] "Iteration: 31/41"
[1] "Iteration: 32/41"
[1] "Iteration: 33/41"
[1] "Iteration: 34/41"
[1] "Iteration: 35/41"
[1] "Iteration: 36/41"
[1] "Iteration: 37/41"
[1] "Iteration: 38/41"
[1] "Iteration: 39/41"
[1] "Iteration: 40/41"
[1] "Iteration: 41/41"
          logLoss               AUC             prAUC          Acc

In [14]:
%%R
prad_skcm <- loocvDTs(snmData = snmShogundataPSUniqueDecontamDPQC,
                 samplingSize = 59, 
                 DTs = c("PRAD","SKCM"),
                 caretTuneGrid = defaultGBMGrid)

[1] "Iteration: 1/75"
[1] "Iteration: 2/75"
[1] "Iteration: 3/75"
[1] "Iteration: 4/75"
[1] "Iteration: 5/75"
[1] "Iteration: 6/75"
[1] "Iteration: 7/75"
[1] "Iteration: 8/75"
[1] "Iteration: 9/75"
[1] "Iteration: 10/75"
[1] "Iteration: 11/75"
[1] "Iteration: 12/75"
[1] "Iteration: 13/75"
[1] "Iteration: 14/75"
[1] "Iteration: 15/75"
[1] "Iteration: 16/75"
[1] "Iteration: 17/75"
[1] "Iteration: 18/75"
[1] "Iteration: 19/75"
[1] "Iteration: 20/75"
[1] "Iteration: 21/75"
[1] "Iteration: 22/75"
[1] "Iteration: 23/75"
[1] "Iteration: 24/75"
[1] "Iteration: 25/75"
[1] "Iteration: 26/75"
[1] "Iteration: 27/75"
[1] "Iteration: 28/75"
[1] "Iteration: 29/75"
[1] "Iteration: 30/75"
[1] "Iteration: 31/75"
[1] "Iteration: 32/75"
[1] "Iteration: 33/75"
[1] "Iteration: 34/75"
[1] "Iteration: 35/75"
[1] "Iteration: 36/75"
[1] "Iteration: 37/75"
[1] "Iteration: 38/75"
[1] "Iteration: 39/75"
[1] "Iteration: 40/75"
[1] "Iteration: 41/75"
[1] "Iteration: 42/75"
[1] "Iteration: 43/75"
[1] "Iteration: 44/7

In [15]:
%%R
DT3 <- loocvDTs(snmData = snmShogundataPSUniqueDecontamDPQC,
                 samplingSize = 59, 
                 DTs = c("PRAD","NSCLC","SKCM"),
                 caretTuneGrid = defaultGBMGrid)

[1] "Iteration: 1/100"
[1] "Iteration: 2/100"
[1] "Iteration: 3/100"
[1] "Iteration: 4/100"
[1] "Iteration: 5/100"
[1] "Iteration: 6/100"
[1] "Iteration: 7/100"
[1] "Iteration: 8/100"
[1] "Iteration: 9/100"
[1] "Iteration: 10/100"
[1] "Iteration: 11/100"
[1] "Iteration: 12/100"
[1] "Iteration: 13/100"
[1] "Iteration: 14/100"
[1] "Iteration: 15/100"
[1] "Iteration: 16/100"
[1] "Iteration: 17/100"
[1] "Iteration: 18/100"
[1] "Iteration: 19/100"
[1] "Iteration: 20/100"
[1] "Iteration: 21/100"
[1] "Iteration: 22/100"
[1] "Iteration: 23/100"
[1] "Iteration: 24/100"
[1] "Iteration: 25/100"
[1] "Iteration: 26/100"
[1] "Iteration: 27/100"
[1] "Iteration: 28/100"
[1] "Iteration: 29/100"
[1] "Iteration: 30/100"
[1] "Iteration: 31/100"
[1] "Iteration: 32/100"
[1] "Iteration: 33/100"
[1] "Iteration: 34/100"
[1] "Iteration: 35/100"
[1] "Iteration: 36/100"
[1] "Iteration: 37/100"
[1] "Iteration: 38/100"
[1] "Iteration: 39/100"
[1] "Iteration: 40/100"
[1] "Iteration: 41/100"
[1] "Iteration: 42/100"
[

In [16]:
%%R
DTH4 <- loocvDTs(snmData = snmShogundataPSUniqueDecontamDPQC,
                 samplingSize = 69, 
                 DTs = c("Control","PRAD","NSCLC","SKCM"),
                 caretTuneGrid = defaultGBMGrid)

[1] "Iteration: 1/169"
[1] "Iteration: 2/169"
[1] "Iteration: 3/169"
[1] "Iteration: 4/169"
[1] "Iteration: 5/169"
[1] "Iteration: 6/169"
[1] "Iteration: 7/169"
[1] "Iteration: 8/169"
[1] "Iteration: 9/169"
[1] "Iteration: 10/169"
[1] "Iteration: 11/169"
[1] "Iteration: 12/169"
[1] "Iteration: 13/169"
[1] "Iteration: 14/169"
[1] "Iteration: 15/169"
[1] "Iteration: 16/169"
[1] "Iteration: 17/169"
[1] "Iteration: 18/169"
[1] "Iteration: 19/169"
[1] "Iteration: 20/169"
[1] "Iteration: 21/169"
[1] "Iteration: 22/169"
[1] "Iteration: 23/169"
[1] "Iteration: 24/169"
[1] "Iteration: 25/169"
[1] "Iteration: 26/169"
[1] "Iteration: 27/169"
[1] "Iteration: 28/169"
[1] "Iteration: 29/169"
[1] "Iteration: 30/169"
[1] "Iteration: 31/169"
[1] "Iteration: 32/169"
[1] "Iteration: 33/169"
[1] "Iteration: 34/169"
[1] "Iteration: 35/169"
[1] "Iteration: 36/169"
[1] "Iteration: 37/169"
[1] "Iteration: 38/169"
[1] "Iteration: 39/169"
[1] "Iteration: 40/169"
[1] "Iteration: 41/169"
[1] "Iteration: 42/169"
[

In [17]:
%%R
DTH3 <- loocvDTs(snmData = snmShogundataPSUniqueDecontamDPQC,
                 samplingSize = 69, 
                 DTs = c("Control","PRAD","NSCLC"),
                 caretTuneGrid = defaultGBMGrid)

[1] "Iteration: 1/153"
[1] "Iteration: 2/153"
[1] "Iteration: 3/153"
[1] "Iteration: 4/153"
[1] "Iteration: 5/153"
[1] "Iteration: 6/153"
[1] "Iteration: 7/153"
[1] "Iteration: 8/153"
[1] "Iteration: 9/153"
[1] "Iteration: 10/153"
[1] "Iteration: 11/153"
[1] "Iteration: 12/153"
[1] "Iteration: 13/153"
[1] "Iteration: 14/153"
[1] "Iteration: 15/153"
[1] "Iteration: 16/153"
[1] "Iteration: 17/153"
[1] "Iteration: 18/153"
[1] "Iteration: 19/153"
[1] "Iteration: 20/153"
[1] "Iteration: 21/153"
[1] "Iteration: 22/153"
[1] "Iteration: 23/153"
[1] "Iteration: 24/153"
[1] "Iteration: 25/153"
[1] "Iteration: 26/153"
[1] "Iteration: 27/153"
[1] "Iteration: 28/153"
[1] "Iteration: 29/153"
[1] "Iteration: 30/153"
[1] "Iteration: 31/153"
[1] "Iteration: 32/153"
[1] "Iteration: 33/153"
[1] "Iteration: 34/153"
[1] "Iteration: 35/153"
[1] "Iteration: 36/153"
[1] "Iteration: 37/153"
[1] "Iteration: 38/153"
[1] "Iteration: 39/153"
[1] "Iteration: 40/153"
[1] "Iteration: 41/153"
[1] "Iteration: 42/153"
[