In [32]:
wd <- dirname(dirname(getwd()))
source(paste0(wd,"/mission_control/treasure_map.R"))
source(paste0(wd,"/mission_control/after_burn_help.R"))

library(dplyr)
library(tidyr)
library(data.table)

args <- list("RawTPM") #or "AdjTPM"

In [35]:
biomarkers <- fread(paste0( CLN_DIR, "biomarkers_",args[1],".csv"))

### 0 - Overview

In [7]:
dim(biomarkers); head(biomarkers[10:30], 1)

Survival_time_to_pfs_event,Survival_patient_died,Survival_time_to_last_response,Survival_at_6_months,Survival_at_12_months,Survival_at_18_months,clinical_age_at_treatment_start,clinical_biopsy_site,clinical_meta_consolidatedTreatmentType,clinical_meta_gender,...,clinical_meta_hasSystemicPreTreatment,clinical_meta_primaryTumorLocation,clinical_meta_primaryTumorType,clinical_meta_treatment,clinical_meta_tumorPurity,clinical_post_contains_Chemotherapy,clinical_post_contains_Hormonal,clinical_post_contains_Immunotherapy,clinical_post_contains_Targeted,clinical_pre_contains_Chemotherapy
414,1,414,1,1,0,48,other,immunotherapy,1,...,1,lung,Carcinoma,Nivolumab,0.2,0,0,True,0,1


In [8]:
col_types <- data.frame( 
    column_types = unlist(
            lapply( colnames(biomarkers),
                    function(i) strsplit(i,"_")[[1]][1]
                   )))
col_types %>% group_by(column_types) %>% summarise(ct = n()) %>% arrange(desc(ct))

column_types,ct
somatic.gene,19342
isofox.nr,19253
isofox,18768
cnv.region,2500
somatic,95
sig,78
cibersort,24
clinical,23
hla,22
isofox.pc,20


### 0 - Drivers

In [27]:
drivers <- biomarkers %>% select(clinical_meta_consolidatedTreatmentType, clinical_meta_primaryTumorLocation, contains("driver"))
#head(drivers)

In [28]:
#summary(drivers 
#    %>% filter(clinical_meta_consolidatedTreatmentType == "Immunotherapy", 
#               clinical_meta_primaryTumorLocation == "Skin") 
#    %>% select( contains(c("KRAS","CDKN2A", "NRAS","EGFR", "BRAF", "MET", "HER2", "ALK", "RET", "ROS1", "NF1")) )
# )

### 1 - Filter to CPI patients - Immune Biomarkers Project

In [20]:
cpi <- biomarkers %>% filter(clinical_post_contains_Immunotherapy == 'True'); dim(cpi)

### 2 - Filter to Prostate Patients and response + clinical and cibersort features

In [21]:
prostate <- 
(biomarkers 
    %>% filter(clinical_meta_primaryTumorLocation == "Prostate") 
    %>% select(sampleId, 
               Y_best_response_binary, 
               (contains("Surv")), 
               (contains("clinical")),
                contains("cibersort")))

In [22]:
dim(prostate); head(prostate)

sampleId,Y_best_response_binary,Survival_patient_died,Survival_progression,Survival_time_to_death_in_days,Survival_time_to_last_response,Survival_time_to_progression,clinical_age_at_treatment_start,clinical_biopsy_site,clinical_meta_gender,...,cibersort_Macrophages.M1,cibersort_Macrophages.M2,cibersort_Dendritic.cells.resting,cibersort_Dendritic.cells.activated,cibersort_Mast.cells.resting,cibersort_Mast.cells.activated,cibersort_Eosinophils,cibersort_Neutrophils,cibersort_mix_r,cibersort_mix_rmse
CPCT02020224TII,0,1,1,117.0,117,117,64,other,male,...,,,,,,,,,,
CPCT02020378T,0,1,1,111.0,111,62,70,other,male,...,0.01392428,0.2687489,0.06041429,0.01718199,0.008218343,0.0,0.0,0.005336025,0.084062517,4.701353
CPCT02020380T,0,1,1,442.0,442,44,73,lymph,male,...,0.02395106,0.1416252,0.02487538,0.0,0.0,0.0,0.0,0.006766372,0.150740057,5.720984
CPCT02020386T,0,1,1,75.0,75,56,64,liver,male,...,,,,,,,,,,
CPCT02020422T,0,1,1,102.0,102,59,73,liver,male,...,0.14347179,0.1835066,0.05752571,0.06242263,0.0,0.0,0.0,0.0,0.008366677,25.570789
CPCT02020686T,0,0,1,,63,63,69,lymph,male,...,0.01523946,0.3521015,0.08244609,0.0,0.040985698,0.0,0.0,0.037026754,0.025450099,4.144207


### 3 - Get chemotherapy patients, with response and gene set expression

In [23]:
(biomarkers 
    %>% filter(clinical_post_contains_Chemotherapy == "True") 
    %>% select(sampleId, 
               clinical_meta_primaryTumorLocation,
               Y_best_response_binary, 
               Survival_progression, 
               Survival_time_to_progression,
               contains("gene_set"))
    %>% drop_na(isofox_gene_set_cyt)
    %>% head())

sampleId,clinical_meta_primaryTumorLocation,Y_best_response_binary,Survival_progression,Survival_time_to_progression,isofox_gene_set_cyt,isofox_gene_set_t_cell_gep_6,isofox_gene_set_t_cel_gep_10,isofox_gene_set_t_cell_gep_18,isofox_gene_set_prolif.,...,somatic.gene_SETD4.mutations,somatic.gene_SETD6.mutations,somatic.gene_SETMAR.mutations,somatic.gene_SETD1B.mutations,somatic.gene_SETBP1.mutations,somatic.gene_SETX.mutations,somatic.gene_SETD2.mutations,somatic.gene_SETD5.mutations,somatic.gene_SETDB2.mutations,somatic.gene_SETD1A.mutations
CPCT02011035T,Lung,0,1,45,0.92312,2.2570049,1.7056539,1.8997466,2.621186,...,0,0,0,0,3,1,0,0,0,0
CPCT02011047T,Lung,0,1,39,0.7059537,2.1631151,1.6081289,1.7847269,1.658841,...,0,0,0,0,7,0,0,0,0,0
CPCT02030294T,Head and neck,0,1,62,0.2881818,2.1428328,1.5585975,1.5189376,2.426667,...,0,0,0,0,2,0,0,0,1,0
CPCT02040039T,Lung,0,1,70,0.1595295,0.9142088,0.6026335,0.7766371,1.196729,...,0,0,0,0,6,0,0,0,1,0
CPCT02040216T,Lung,0,1,84,0.3218165,1.8098092,1.277454,1.636808,1.199122,...,0,0,0,0,0,0,0,0,0,0
CPCT02060092T,Skin,0,1,64,2.2910122,3.8102067,3.1570836,2.7687624,1.87239,...,0,0,0,0,0,0,0,0,0,0
