In [1]:
wd <- dirname(dirname(getwd()))
source(paste0(wd,"/mission_control/treasure_map.R"))
source(paste0(wd,"/mission_control/helpers/validation/settings.R"))
library(tidyverse)

Registered S3 method overwritten by 'rvest':
  method            from
  read_xml.response xml2
── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.3.6     ✔ purrr   0.3.4
✔ tibble  3.1.2     ✔ dplyr   1.0.6
✔ tidyr   1.1.3     ✔ stringr 1.4.0
✔ readr   1.3.1     ✔ forcats 0.5.1
“package ‘forcats’ was built under R version 3.6.3”── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()


### 0 - Load Inspire
- Mutational and clinical data

In [2]:
load(paste0( E_DIR, "inspire/inspire-genomics/data/RData/PanCanMut.data.RData"))

- Transcriptomics

In [3]:
rna <- read.table( paste0(E_DIR, "inspire/inspire-genomics/data/Source Data/SourceData_Fig4/gene-expression-matrix-TPM-final.tsv")) 

### 1 - Curate Clinical Data

In [4]:
pt.dat$bor <- pt.dat[,"PR/CR"]
pt.dat$clinical_recist <- pt.dat[,"Best overall response"]
pt.dat$benefit <- pt.dat[,"Clinical Benefit"]

In [5]:
clinical <- (pt.dat 
     %>% rownames_to_column("patient_id")
     %>% transmute( 
         patient_id, 
         gender = SEX, 
         age = AGE, 
         tissue = COHORT, 
         tissue_full = COHORT,
         pretreat = PRIOR_SYSTEMIC_THERAPY, 
         bor,
         pfs_event = PFS_EVENT, 
         pfs_days = PFS, 
         os_event = OS_EVENT, 
         os_days = OS,
         clinical_recist,
         benefit, 
         biopsy_location = NA,
         mechanism = "pd"
     )
)

### 2 - Curate Mutational Data

In [6]:
mut <- suv.dat %>% transmute(
     patient_id = pt,
     tmb_raw = ns.per.Mb, 
     tmb = log(ns.per.Mb + 1),
     OS, PFS, OS_EVENT, PFS_EVENT, response
 )

In [7]:
clin_mut <- clinical %>% left_join(mut, by = "patient_id")

### 3 - Load and Curate RNA

#### save for further analysis 
- Used for outside correlation analysis

In [8]:
tmp <- data.frame(t(rna %>% rownames_to_column("gene")))
rna_send <- tmp[-1,]
names(rna_send) <- unname(unlist(lapply(tmp[1,], as.character)))
rna_send <- log(data.frame(lapply(rna_send, as.numeric)))
saveRDS( rna_send, paste0( REF_DIR, "rna_validation_inspire.Rds"))

#### main analysis

In [9]:
express <- (
    rna
        %>% rownames_to_column("gene")
        %>% filter( gene %in% unlist(gene_sets))
)

In [10]:
rna <- data.frame(t(express)[-1,])
colnames(rna) <- t(express)[1,]
patients <- rna %>% rownames_to_column("patient_id") %>% pull(patient_id)
rna <- log(data.frame(lapply(rna, as.numeric)))
rna$patient_id <- patients 

### 3 - Combine

In [11]:
rna$id <- unlist(lapply( strsplit(rna$patient_id, "\\."), function(i) paste( i[c(2,3)], collapse = "-") ))
rna_mns <- rna %>% select(-patient_id) %>% group_by(id) %>% summarise_all( mean)

In [12]:
clin_mut$id <- unlist(lapply( strsplit(clin_mut$patient_id, "-"), function(i) paste( i[c(2,3)], collapse = "-") ))
together <- clin_mut %>% inner_join( rna_mns, by = "id") %>% select(-contains("patient"))

#### Compute Gene Sets

In [13]:
names(gene_sets)

In [14]:
together$tcell <- apply( together %>% select( any_of( gene_sets$clusters$tcell ) ), 1, mean, na.rm = TRUE)
together$tgfb <- apply( together %>% select( any_of( gene_sets$clusters$tgfb ) ), 1, mean, na.rm = TRUE)
together$prolif <- apply( together %>% select( any_of( gene_sets$clusters$prolif) ), 1, mean, na.rm = TRUE)

together$tcell_cluster5 <- apply( together %>% select( any_of( gene_sets$clusters5$tcell ) ), 1, mean, na.rm = TRUE)
together$tgfb_cluster5 <- apply( together %>% select( any_of( gene_sets$clusters5$tgfb ) ), 1, mean, na.rm = TRUE)
together$prolif_cluster5 <- apply( together %>% select( any_of( gene_sets$clusters5$prolif) ), 1, mean, na.rm = TRUE)

together$tcell_set <- apply( together %>% select( any_of( gene_sets$sets1$tcell ) ), 1, mean, na.rm = TRUE)
together$tgfb_set <- apply( together %>% select( any_of( gene_sets$sets1$tgfb ) ), 1, mean, na.rm = TRUE)
together$prolif_set <- apply( together %>% select( any_of( gene_sets$sets1$prolif) ), 1, mean, na.rm = TRUE)

#### Output Data

In [15]:
inspire_go <- (
    together 
        %>% transmute(
          patient_id = id, 
          bor, 
          os = ifelse( os_event == 0, -os_days, os_days), 
          os_event,
          os_days, 
          age, 
          gender, 
          tissue, 
          tissue_full = tissue,
          tmb,
          tcell, 
          prolif, 
          tgfb,
          tcell_cluster5,
          tgfb_cluster5,
          prolif_cluster5,  
          tcell_set,
          prolif_set, 
          tgfb_set,
          pdl1 = CD274,
          pretreat,
          pretreat_comp = NA, 
          purity = NA,
          Study = "INSPIRE"
    )
)

### 4 - Send it!

In [16]:
saveRDS( inspire_go, paste0( TMP_DIR, "validation-inspire-go.Rds"))