# Load TCGA results into objects

In [1]:
library(data.table)
library(furrr)
library(dplyr)
plan(multisession, workers = 5)

Loading required package: future


Attaching package: ‘dplyr’


The following objects are masked from ‘package:data.table’:

    between, first, last


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




## Functions

In [2]:
trex_res_file_to_dfr<-function(file,anmode,res_dir,...){
    res <- suppressWarnings(fread(file.path(res_dir,file),data.table=FALSE,stringsAsFactors = F,drop=1,nThread = 5))%>%
                             mutate(cancer=sub("/.*","",file),
                                    analysis=anmode,
                                    exp_var=sub(".*trex_countA\\.","",file) %>% sub("\\.tsv","",.)) %>%
                             select(-constitutive_transcripts,-alternative_transcripts,-seqname)
    
    return(res)
}

In [3]:
get_res_stats<-function(res.obj){

    stats<-res.obj %>%
                    filter(log2FoldChange!=0) %>% 
                    group_by(analysis,cancer,event_type,exp_var) %>%
                    mutate(significant = padj<0.05) %>%
                    summarize(sig_events = sum(significant,na.rm=T),
                              sig_genes = length(unique(gene_id[significant])),
                              total_events = length(significant)) 
    return(stats)
}

In [4]:
build_trex_res_objects<-function(amode,rmode,res_dir,...){
    
    res.files<-list.files(res_dir,pattern = paste0("*.",rmode,".",amode,".trex_countA*"),recursive = T)
    res<-furrr::future_map_dfr(res.files,trex_res_file_to_dfr,anmode=amode,res_dir=res_dir)
    
    res.stats<-get_res_stats(res)
    
    message("Storing RDS object...")
    saveRDS(res,paste0(out_obj_dir,"/tcga.",amode,".",rmode,".RDS"))
    saveRDS(res.stats,paste0(out_obj_dir,"/tcga.",amode,".",rmode,".stats.RDS"))
    message("Finished!")
    
    return(res.stats)
}

In [5]:
deseq_res_file_to_dfr<-function(file,anmode,resmode,res_dir,...){
    
    res <- suppressWarnings(fread(file.path(res_dir,file),data.table=FALSE,stringsAsFactors = F,drop=1,nThread = 4))%>%
                             mutate(cancer=sub("/.*","",file),
                                    analysis=anmode,
                                    exp_var=sub(paste0(".*/",resmode,"."),"",file) %>% sub("\\.tsv","",.)) 
    ddsfile<-file.path(res_dir,sub("/.*","/dds.RData",file))
    load(file = ddsfile)
    res$gene_id<-rownames(dds)

    return(res)
}

In [6]:
build_deseq_res_objects<-function(amode,rmode,res_dir,...){
    
    res.files<-list.files(deseq_dir,pattern = paste0(rmode,".*.tsv"),recursive = T) 
    if(rmode!="res.lfcShrink"){
        res.files<-res.files[!grepl("res.lfcShrink",res.files,fixed=T)]
    }    
    res<-furrr::future_map_dfr(res.files,deseq_res_file_to_dfr,anmode=amode,res_dir=res_dir,resmode=rmode)
    
    res.stats<-get_deseq_res_stats(res)
    
    message("Storing RDS object...")
    saveRDS(res,paste0(out_obj_dir,"/tcga.",amode,".",rmode,".RDS"))
    saveRDS(res.stats,paste0(out_obj_dir,"/tcga.",amode,".",rmode,".stats.RDS"))
    message("Finished!")
    
    return(res.stats)
}

In [7]:
get_deseq_res_stats<-function(res.obj){

    stats<-res.obj %>%
            filter(log2FoldChange!=0) %>% 
            group_by(analysis,cancer,exp_var) %>%
            mutate(significant = padj<0.05) %>%
            summarize(sig_events = sum(significant,na.rm=T),
                      sig_genes = length(unique(gene_id[significant])),
                      total_events = length(significant)) 
    return(stats)
}

### TRex predicitions splicing

In [8]:
trex_dir <- "~/lmprojects/splicing-pancancer/results/trex.logit_ASALL_v2"
out_obj_dir<-"../input/trex_objects"

### Condition

In [None]:
# Condition analysis
res.stats<-build_trex_res_objects(amode="condition",rmode = "res",res_dir = trex_dir)
res.stats.lfc<-build_trex_res_objects(amode="condition",rmode = "res.lfcShrink",res_dir = trex_dir)
head(res.stats,2)
head(res.stats.lfc,2)

In [None]:
# Condition.NoCovs analysis
res.stats<-build_trex_res_objects(amode="condition.NoCovs",rmode = "res",res_dir = trex_dir)
res.stats.lfc<-build_trex_res_objects(amode="condition.NoCovs",rmode = "res.lfcShrink",res_dir = trex_dir)

In [9]:
# Extract only final condition coefficients
res.lfc<-readRDS(paste0(out_obj_dir,"/tcga.condition.res.lfcShrink.RDS"))
cond<-res.lfc %>%
      filter(exp_var=="conditiontumor")
saveRDS(cond,file=paste0(out_obj_dir,"/tcga.condition.res.lfcShrink.conditiontumor.RDS"))

### Stage

In [None]:
# Stage analysis
res.stats<-build_trex_res_objects(amode="tumorStageContinuous",rmode = "res",res_dir = trex_dir)
res.stats.lfc<-build_trex_res_objects(amode="tumorStageContinuous",rmode = "res.lfcShrink",res_dir = trex_dir)
print(unique(res.stats.lfc$cancer))

In [10]:
# Extract final stage coeficient only
res.lfc<-readRDS(paste0(out_obj_dir,"/tcga.tumorStageContinuous.res.lfcShrink.RDS"))
stage<-res.lfc %>%
      filter(exp_var=="stage")
saveRDS(stage,file=paste0(out_obj_dir,"/tcga.tumorStageContinuous.res.lfcShrink.stage.RDS"))

### DESeq2 predictions DGE

In [None]:
deseq_dir <- "../output/deseq2_dge_objects"

In [None]:
# Condition analysis
res.stats<-build_deseq_res_objects(amode="condition.gex",rmode = "res",res_dir = deseq_dir)
res.stats.lfc<-build_deseq_res_objects(amode="condition.gex",rmode = "res.lfcShrink",res_dir = deseq_dir)
tail(res.stats,2)
head(res.stats.lfc,2)