In [16]:
# Install and load the required packages if not already installed
if (!requireNamespace("survival", quietly = TRUE)) {
  install.packages("survival",lib='/mind_data/jeej/Rlib') # replace with Rlib location
}

if (!requireNamespace("cmprsk", quietly = TRUE)) {
  install.packages("cmprsk",lib='/mind_data/jeej/Rlib')
}

if (!requireNamespace("survivalROC", quietly = TRUE)) {
  install.packages("survivalROC",lib='/mind_data/jeej/Rlib')
}

also installing the dependencies 'progress', 'cli', 'cpp11', 'hms', 'vroom', 'tzdb', 'purrr', 'stringr', 'downloader', 'igraph', 'readr', 'tidyr', 'visNetwork', 'data.tree', 'DiagrammeR'




# Fine Gray analyses

In [1]:
library(cmprsk,lib.loc='/mind_data/jeej/Rlib') # replace with Rlib location
library(data.table)
library(dplyr)
library(survivalROC,lib.loc='/mind_data/jeej/Rlib')

Loading required package: survival


Attaching package: 'dplyr'


The following objects are masked from 'package:data.table':

    between, first, last


The following objects are masked from 'package:stats':

    filter, lag


The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union




In [6]:
# load primary data

# discovery cohort (MSK-ACCESS)
vte <- read.csv("../data/discovery.csv", header=TRUE)
logical_columns <- sapply(vte, function(x) all(x %in% c("True", "False")))
vte <- vte %>%
  mutate_at(vars(names(logical_columns)[logical_columns]), ~ ifelse(. == "True", TRUE, FALSE))
        
# validation cohort (MSK-ACCESS)
vte2 <- read.csv("../data/validation.csv", header=TRUE)
logical_columns <- sapply(vte2, function(x) all(x %in% c("True", "False")))
vte2 <- vte2 %>%
  mutate_at(vars(names(logical_columns)[logical_columns]), ~ ifelse(. == "True", TRUE, FALSE))

# resbio = ctDx generalizability cohort
resbio_VTE <- read.csv('../data/generalizability_msk.csv', header=TRUE)
logical_columns <- sapply(resbio_VTE, function(x) all(x %in% c("True", "False")))
resbio_VTE <- resbio_VTE %>%
  mutate_at(vars(names(logical_columns)[logical_columns]), ~ ifelse(. == "True", TRUE, FALSE))
                          
sydney <- read.csv('../data/generalizability_sydney.csv', header=TRUE)
logical_columns <- sapply(sydney, function(x) all(x %in% c("True", "False")))
sydney <- sydney %>%
  mutate_at(vars(names(logical_columns)[logical_columns]), ~ ifelse(. == "True", TRUE, FALSE))
                          
resbio_all <- rbindlist(list(resbio_VTE, sydney),fill=TRUE)

In [6]:
# example testing association of ctDNA with VTE in the discovery cohort using competing risks
print(z<-crr(vte$stop,vte$CAT_DEATH_ENDPT,vte$X.ctDNA))
summary(z)$conf.int

convergence:  TRUE 
coefficients:
vte$X.ctDNA1 
      0.9105 
standard errors:
[1] 0.1136
two-sided p-values:
vte$X.ctDNA1 
     1.1e-15 


Unnamed: 0,exp(coef),exp(-coef),2.5%,97.5%
vte$X.ctDNA1,2.485543,0.4023265,1.98942,3.105391


In [7]:
# example of >6mo analysis
print(z<-crr(resbio_all[resbio_all$stop>180,]$stop,
             resbio_all[resbio_all$stop>180,]$CAT_DEATH_ENDPT,
             resbio_all[resbio_all$stop>180,]$X.ctDNA))
summary(z)$conf.int

convergence:  TRUE 
coefficients:
resbio_all[resbio_all$stop > 180, ]$X.ctDNA1 
                                      0.4832 
standard errors:
[1] 0.3461
two-sided p-values:
resbio_all[resbio_all$stop > 180, ]$X.ctDNA1 
                                        0.16 


Unnamed: 0,exp(coef),exp(-coef),2.5%,97.5%
"resbio_all[resbio_all$stop > 180, ]$X.ctDNA1",1.621246,0.6168097,0.8227671,3.194631


In [19]:
# example multivariate analysis
z<-crr(vte$stop,vte$CAT_DEATH_ENDPT,vte[c("X.ctDNA",
                  "KHORANA.SCORE","N.organ.sites","log10.cfDNA.concentration.","chemotherapy")])
summary(z)$conf.int
write.csv(summary(z)$conf.int, "multivariate_HR_example.csv", row.names=TRUE)

Unnamed: 0,exp(coef),exp(-coef),2.5%,97.5%
X.ctDNA,1.655933,0.603889,1.29902,2.11091
KHORANA.SCORE,1.133262,0.8824081,1.033132,1.243098
N.organ.sites,1.121745,0.8914684,1.065183,1.18131
log10.cfDNA.concentration.,1.614733,0.6192973,1.358886,1.918751
chemotherapy,1.705112,0.5864716,1.4044,2.070214


In [18]:
# example testing whether anticoagulation is associated with VTE in ctDNA+ patients
cois <- list("Non.Small.Cell.Lung.Cancer", "Breast.Cancer", "Pancreatic.Cancer",
            "Melanoma", "Prostate.Cancer", "Bladder.Cancer",
            "Esophagogastric.Cancer", "Hepatobiliary.Cancer", "Colorectal.Cancer")
temp<-vte[c(unlist(vte$X.ctDNA)),]
z<-crr(temp$stop,temp$CAT_DEATH_ENDPT,
             temp[c("previous.enoxaparin.dalteparin.xaban.warfarin.fondaparinux.dabigatran",
                   "lt_start","AGE",unlist(cois))])
summary(z)$conf.int

Unnamed: 0,exp(coef),exp(-coef),2.5%,97.5%
previous.enoxaparin.dalteparin.xaban.warfarin.fondaparinux.dabigatran,0.494842,2.0208469,0.30281598,0.8086384
lt_start,0.9999016,1.0000984,0.99980298,1.0000003
AGE,1.0005251,0.9994752,0.9936241,1.007474
Non.Small.Cell.Lung.Cancer,1.9419929,0.5149349,1.2562199,3.0021307
Breast.Cancer,0.793578,1.2601156,0.46466741,1.3553048
Pancreatic.Cancer,2.2059238,0.4533248,1.32354923,3.6765538
Melanoma,0.8495063,1.1771542,0.35081042,2.0571254
Prostate.Cancer,1.2199999,0.8196722,0.64236218,2.3170724
Bladder.Cancer,1.5133203,0.6607986,0.83180179,2.7532261
Esophagogastric.Cancer,1.2020071,0.8319419,0.59983356,2.4087032


# RSF risk scores for dynamic AUC

In [14]:
# note requires risk score output to run (from run_rsf_vte.py)

# load validation dataset if not done yet
vte2 <- read.csv("../data/validation.csv", header=TRUE)
logical_columns <- sapply(vte2, function(x) all(x %in% c("True", "False")))
vte2 <- vte2 %>%
  mutate_at(vars(names(logical_columns)[logical_columns]), ~ ifelse(. == "True", TRUE, FALSE))

# load risk scores (from running run_rsf_vte.py)
vte2_riskscore <- read.csv("vte_riskscores_validation.csv", header=TRUE)

# generate ROC values for the two models
for (c in list("Khorana.Score.chemotherapy","LB.")){

    ROC.1 = survivalROC(Stime = vte2$stop, status = vte2$CAT_DEATH_ENDPT==1, 
                        marker = vte2_riskscore[c], predict.time = 180, lambda = 0.05)

    write.csv(ROC.1, file = paste('validation_',c,'_dROC.csv'))
}