In [2]:
wd <- dirname(getwd())
source(paste0(wd,"/mission_control/treasure_map.R"))
source(paste0(wd,"/mission_control/isofox_help.R"))

library(dplyr)
library(tidyverse)
library(data.table)

args <- list("AdjTPM")

#### Read in Gene Sets

- Sets included in first submission

In [5]:
cpi1000_sets <- readRDS(paste0(REF_DIR,"cpi1000_gene_sets.Rds"))
mariathan_sets <- readRDS(paste0(REF_DIR,"human_gene_signatures.Rds"))
tgfb_sets <- readRDS(paste0(REF_DIR,"battle_gene_sets.Rds"))
vhio_sets <- readRDS(paste0(REF_DIR,"vhio_gene_sets.Rds"))
cluster_sets <- readRDS(paste0(REF_DIR,"fig_2and3_clusters.Rds"))

- Add extra GSEA sets from Axel

In [6]:
gsea_sets <- readRDS(paste0(REF_DIR,"GSEA_gene_sets.Rds"))
kegg_sets <- gsea_sets[which(unlist(lapply( names(gsea_sets), function(i) grepl("KEGG",i))))]
hallmark_sets <- gsea_sets[which(unlist(lapply( names(gsea_sets), function(i) grepl("HALLMARK",i))))]
go_sets <- gsea_sets[which(unlist(lapply( names(gsea_sets), function(i) grepl("_GO_",i))))]                                               

In [10]:
#cpi1000_sets

In [11]:
library(jsonlite)
write_json(list("a" = 1), "test.json")


Attaching package: ‘jsonlite’

The following object is masked from ‘package:purrr’:

    flatten



In [12]:
getwd()

In [13]:
write_json(
    list("cpi1000_sets" = cpi1000_sets, 
     "kegg_sets" = kegg_sets, 
     "hallmark_sets" = hallmark_sets,
     "go_sets" = go_sets),
     "gene_sets.json"
)

#### 0 - Read in raw data

In [4]:
isofox <- fread(paste0( TMP_DIR,"isofox_",args[1],"_features.csv"))

#### 1 - Transform
- log transform

In [5]:
isofox2 <- isofox %>% mutate_at(vars(-sampleId), ~(log(.+1) %>% as.vector))

#### 2 - Add gene sets 

In [6]:
names(mariathan_sets) <- gsub(" ", "_", names(mariathan_sets))
names(mariathan_sets) <- unlist(lapply(names(mariathan_sets), function(i) paste0("gene_set_mariathan_", i)))

In [7]:
names(cluster_sets) <- unlist(lapply(names(cluster_sets), function(i) paste0("gene_set_", i)))
names(tgfb_sets) <- unlist(lapply(names(tgfb_sets), function(i) paste0("gene_set_", i)))
names(vhio_sets) <- unlist(lapply(names(vhio_sets), function(i) paste0("gene_set_", i)))
names(kegg_sets) <- unlist(lapply(names(kegg_sets), function(i) paste0("gene_set_", i)))
names(hallmark_sets) <- unlist(lapply(names(hallmark_sets), function(i) paste0("gene_set_", i))) 
names(go_sets) <- unlist(lapply(names(go_sets), function(i) paste0("gene_set_", i)))                                                                             

In [8]:
gene_sets <- c(cpi1000_sets, mariathan_sets, tgfb_sets, vhio_sets, kegg_sets, hallmark_sets, go_sets, cluster_sets)

In [9]:
names(gene_sets) <- paste0("isofox_", names(gene_sets))

In [10]:
gene_sets[['gene_set_tgfb_cluster']]

NULL

##### Edit the gene sets (update names, few genes missing)

In [11]:
appender <- function(ll) unlist(lapply( ll, function(i) gsub("-",".",paste0("isofox_", i))))
for (i in names(gene_sets)) gene_sets[[i]] <- appender(gene_sets[[i]])

#### 1 - Compute Gene Sets 

In [12]:
gene_sets[['isofox_gene_set_mariathan_Histones']] <- NULL
gene_sets[['isofox_CD_8_T_EFFECTOR']] <- NULL

In [13]:
gene_sets_ls <- list()
for (i in names(gene_sets)){
    print(i)
    flush.console()
    tmp <- isofox2 %>% select(any_of(gene_sets[[i]]))
    gene_sets_ls[[i]] <- apply(tmp, 1, mean, na.rm = TRUE)
}
gene_sets_final <- data.frame(gene_sets_ls)
gene_sets_final$sampleId <- isofox2 %>% pull(sampleId)

[1] "isofox_gene_set_cyt"
[1] "isofox_gene_set_t_cell_gep_6"
[1] "isofox_gene_set_t_cell_gep_10"
[1] "isofox_gene_set_t_cell_gep_18"
[1] "isofox_gene_set_prolif"
[1] "isofox_gene_set_tim3"
[1] "isofox_gene_set_t_cell_effector"
[1] "isofox_gene_set_myeloid_inflammation"
[1] "isofox_gene_set_stroma_emt_shortened"
[1] "isofox_gene_set_Pan_TBRS"
[1] "isofox_gene_set_impres"
[1] "isofox_gene_set_12_chemokine"
[1] "isofox_gene_set_immune_checkpoint_genes"
[1] "isofox_gene_set_cd8_t_effector"
[1] "isofox_gene_set_infiltrate"
[1] "isofox_gene_set_t_cell_rand1"
[1] "isofox_gene_set_prolif_rand1"
[1] "isofox_gene_set_tgfb_rand1"
[1] "isofox_gene_set_mariathan_CD_8_T_effector"
[1] "isofox_gene_set_mariathan_DDR"
[1] "isofox_gene_set_mariathan_APM"
[1] "isofox_gene_set_mariathan_Immune_Checkpoint"
[1] "isofox_gene_set_mariathan_CC_Reg"
[1] "isofox_gene_set_mariathan_Fanconi"
[1] "isofox_gene_set_mariathan_gene19"
[1] "isofox_gene_set_mariathan_tcga"
[1] "isofox_gene_set_mariathan_EMT1"
[1] "isofox

[1] "isofox_gene_set_KEGG_PEROXISOME"
[1] "isofox_gene_set_KEGG_MTOR_SIGNALING_PATHWAY"
[1] "isofox_gene_set_KEGG_APOPTOSIS"
[1] "isofox_gene_set_KEGG_VASCULAR_SMOOTH_MUSCLE_CONTRACTION"
[1] "isofox_gene_set_KEGG_WNT_SIGNALING_PATHWAY"
[1] "isofox_gene_set_KEGG_DORSO_VENTRAL_AXIS_FORMATION"
[1] "isofox_gene_set_KEGG_NOTCH_SIGNALING_PATHWAY"
[1] "isofox_gene_set_KEGG_HEDGEHOG_SIGNALING_PATHWAY"
[1] "isofox_gene_set_KEGG_TGF_BETA_SIGNALING_PATHWAY"
[1] "isofox_gene_set_KEGG_AXON_GUIDANCE"
[1] "isofox_gene_set_KEGG_VEGF_SIGNALING_PATHWAY"
[1] "isofox_gene_set_KEGG_FOCAL_ADHESION"
[1] "isofox_gene_set_KEGG_ECM_RECEPTOR_INTERACTION"
[1] "isofox_gene_set_KEGG_CELL_ADHESION_MOLECULES_CAMS"
[1] "isofox_gene_set_KEGG_ADHERENS_JUNCTION"
[1] "isofox_gene_set_KEGG_TIGHT_JUNCTION"
[1] "isofox_gene_set_KEGG_GAP_JUNCTION"
[1] "isofox_gene_set_KEGG_COMPLEMENT_AND_COAGULATION_CASCADES"
[1] "isofox_gene_set_KEGG_ANTIGEN_PROCESSING_AND_PRESENTATION"
[1] "isofox_gene_set_KEGG_TOLL_LIKE_RECEPTOR_SIGNALING_

#### 3 - Format for Univariate Scan

In [14]:
happy_isofox <- isofox2 %>% left_join(gene_sets_final, by = "sampleId")
rownames(happy_isofox) <- NULL

#### 4 - Save for scan analyses

In [15]:
fwrite( happy_isofox, file = paste0( TMP_DIR, "isofox_", args[1], "_ready.csv") )