# NBL-survival
For response to reviewers.
Is there any difference in survival between extra- and intrachromosomally amplified neuroblastomas?

## Dependencies
Dependencies:  
r-survival.yml  
Run `preprocess-rodriguezfos-data.ipynb`

## Results
ecDNA has (barely) significantly worse outcomes than intrachromosomal amplification, both in the
Kaplan-Meier model (adjusted p = 0.042) and in the Cox model controlling for sex, age and amplification ONLY (p = 0.048). Including MYCN amplification as a covariate, using either the Rodriguez annotations or the AmpliconClassifier, renders the effect of ecDNA nonsignificant. Figures saved to `./out`.

## TODO
- Association test comparing incidence of ecDNA vs chromosomal amp for MYCN vs other loci?
- Follow up with Elias and Anton about discrepant ecDNA classifications.

In [None]:
Sys.setenv(LANGUAGE = "en") # set language to "ja" if you prefer

library(tidyverse)
library(readxl)
library(dplyr)
library(stringr)
library(naniar) #for replace with Nas function
library(survival)
library(survminer)
library(RColorBrewer)
library(janitor)
library(gt)
library(gtsummary)
library(ggsurvfit)
library(extrafont)
library(svglite)

extrafont::font_import(pattern="Arial",prompt=FALSE)
extrafont::loadfonts()

sessionInfo()

In [None]:
# From survival.ipynb, load data
dir.create('out', showWarnings = FALSE)

preprocess_survival_data <- function(combinedsurv) {
  old_len <- nrow(combinedsurv)
  # Drop NAs
  combinedsurv <- combinedsurv %>%
    filter(complete.cases(amplicon_class,OS_status,OS_months)) %>%
    mutate(OS_months = as.numeric(OS_months)) %>%
  # Censor at 5 years = 60 months
    mutate(OS_months_5y = if_else(OS_months < 60, OS_months, 60)) %>%
    mutate(OS_status_5y = if_else(OS_months <= 60, OS_status, "Alive")) %>%
    mutate(OS_status_5y = if_else(OS_status_5y == "Alive", 0, 1)) %>%
  # get ecDNA status
    mutate(ecDNA_status = if_else(amplicon_class == "ecDNA", "ecDNA+", "ecDNA-")) %>%
    mutate(amplicon_class = if_else(amplicon_class == "intrachromosomal", "chromosomal", amplicon_class)) %>%
    mutate(amplified = if_else(amplicon_class %in% c("ecDNA","chromosomal"), TRUE, FALSE)) %>%
  # convert to factors
    mutate(ecDNA_status = factor(ecDNA_status)) %>%
    mutate(amplicon_class = factor(amplicon_class)) %>%
    mutate(cancer_type = factor(cancer_type)) %>%
    mutate(amplified = factor(amplified)) %>% 
    mutate(MYCN_amp = factor(MYCN_amp)) %>%
    mutate(MYCN_amp_AC = factor(MYCN_amp_AC))
  combinedsurv$amplified = relevel(combinedsurv$amplified,ref=TRUE)
  combinedsurv$ecDNA_status = relevel(combinedsurv$ecDNA_status,ref="ecDNA-")
  #combinedsurv$MYCN_amp = relevel(combinedsurv$MYCN_amp,ref=FALSE)
  new_len <- nrow(combinedsurv)
  message("Dropped ",old_len-new_len," entries without survival data")
  return(combinedsurv)
}

In [None]:
load_survival_data <- function(path){
    data <- read_tsv(path,show_col_types = FALSE) %>%
        preprocess_survival_data()
    return(data)
}

In [None]:
nbl_survival_data = 'out/processed_nbl_survival_data.tsv'
data <- load_survival_data(nbl_survival_data)

In [None]:
data %>%head()

In [None]:
cox_plot <- function(coxobj,data,outfile=NULL,width=3,height=6){
  ## OSC function to perform a Cox regression and generate the plot
  #coxph(Surv(OS_months, OS_status) ~ ecDNA_status + strata(cancer_type), data = data)
  zph <-cox.zph(coxobj) 
  print(zph)
  #ggcoxzph(zph)
  #m4
  #creating forest plots
  plt <- ggforest(coxobj,data=as.data.frame(data)) +
        theme_classic(base_size=7, base_family="Arial") +
        theme(axis.text = element_text(size=7,colour="black"),
              plot.title = element_text(size=7))

  if(!is.null(outfile)){
    #pdf.options(encoding='ISOLatin2.enc')
    #pdfName = paste(outfile, ".pdf", sep="")
    pngName = paste(outfile, ".png", sep="")
    svgName = paste(outfile, ".svg", sep = "")
    #ggsave(path="out", filename=pdfName, device="pdf", width=width, height=height, units='in')
    ggsave(path="out", device="png", filename=pngName, width=width, height=height, units='in')
    ggsave(path="out", device="svg", filename=svgName, width=width, height=height, units='in')
  }
  return(plt)
}

km_plot <- function(survObj,outfile=NULL){
  ## OSC function to perform a KM analysis and generate the plot
  if (length(survObj$n) == 2){
    colors = c('blue', 'red')
    labels = c('ecDNA-', 'ecDNA+')
  } else if (length(survObj$n) == 3){
    colors = c('magenta','red','dodgerblue')
    labels = c('chromosomal','ecDNA','no amplification')
  }
  plt <- survObj %>% 
   ggsurvfit(linewidth=0.5) +
   labs(x = 'Follow-up time (Months)',
        y = 'Overall Survival') +
   scale_color_manual(values = colors,
                      labels = labels) +
   scale_fill_manual(values = colors,
                     labels = labels) +
   scale_y_continuous(limits=c(0, 1))+
   add_censor_mark(size = .5, alpha = 1) +
   add_risktable(risktable_stats = "n.risk", size=2,
                 theme = theme_risktable_default(axis.text.y.size = 7,
                                    plot.title.size = 7)) +
   add_risktable_strata_symbol(size=4) + 
   theme_classic(base_size=7, base_family="Arial",) +
   theme(axis.text = element_text(size=7,colour="black"),
         legend.position = "bottom",
   )
  if (length(survObj$n) <=3){
    plt <- plt + add_confidence_interval()
  }
  
  if(!is.null(outfile)){
    #pdf.options(encoding='ISOLatin2.enc')
    #pdfName = paste(outfile, ".pdf", sep="")
    pngName = paste(outfile, ".png", sep="")
    svgName = paste(outfile, ".svg", sep = "")
    #ggsave(path="out", filename=pdfName, device="pdf", width=3, height=3.5, units='in')
    ggsave(path="out", device="png", filename=pngName, width=3, height=3.5, units='in')
    ggsave(path="out", device="svg", filename=svgName, width=3, height=3.5, units='in')
  }
  return(plt)
}

In [None]:
# KM
formula = Surv(OS_months_5y, OS_status_5y) ~ amplicon_class
km = survfit2(formula=formula, data = data )
plt <- km_plot(km, "km_nbl_5year")
#km_plot(km)
logrank <- pairwise_survdiff(formula,data,p.adjust.method="BH",rho=0)
logrank

In [None]:
# Cox model
m4 <- coxph(Surv(OS_months_5y, OS_status_5y) ~ ecDNA_status + amplified + sex + age_at_diagnosis, data = data)
cox_plot(m4,data,"cox_forest_nbl",width=6,height=6)

In [None]:
# Cox model including MYCN amp, using rodriguez annotations
m5 <- coxph(Surv(OS_months_5y, OS_status_5y) ~ ecDNA_status + amplified + MYCN_amp + sex + age_at_diagnosis, data = data)
cox_plot(m5,data,NULL,width=6,height=6)

In [None]:
# Cox model including MYCN amp, using AmpliconClassifier annotations
m6 <- coxph(Surv(OS_months_5y, OS_status_5y) ~ ecDNA_status + amplified + MYCN_amp_AC + sex + age_at_diagnosis, data = data)
cox_plot(m6,data,'cox_forest_nbl_mycn',width=6,height=6)