# Comparison of condition associated changes in tumor vs normal samples of TCGA


## Libraries

In [1]:
library(dplyr)
library(ggVennDiagram)
library(ggplot2)
library(ggsci)
library(fgsea)
library(ComplexHeatmap)
library(ggpubr)
library(msigdbr)
library(patchwork)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: grid

ComplexHeatmap version 2.8.0
Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
Github page: https://github.com/jokergoo/ComplexHeatmap
Documentation: http://jokergoo.github.io/ComplexHeatmap-reference

If you use it in published research, please cite:
Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
  genomic data. Bioinformatics 2016.

The new InteractiveComplexHeatmap package can directly export static 
complex heatmaps into an interactive Shiny app with zero effort. Have a try!

This message can be suppressed by:
  suppressPackageStartupMessages(library(ComplexHeatmap))




## Load data

In [2]:
analysis<-"condition"

In [3]:
cond.res<-readRDS(file = "../input/tcga.condition.res.RDS")
res.stats<-readRDS("../input/tcga.condition.res.stats.RDS")
cond.res.lfsh<-readRDS(file = "../input/tcga.condition.res.lfcShrink.RDS")
res.stats.lfsh<-readRDS("../input/tcga.condition.res.lfcShrink.stats.RDS")

In [4]:
cond.res.lfsh$event_id<-cond.res$event_id

## Summary stats

In [5]:
cond.res %>% 
    filter(padj<0.05) %>%
    group_by(exp_var,event_type) %>%
    distinct(event_id) %>%
    summarize(n=length(event_id))

spcancer<-cond.res %>% 
            filter(padj<0.05) %>%
            distinct(exp_var,event_type,event_id,cancer) %>%
            group_by(exp_var,event_type,event_id)  %>%
            summarize(ncancers = length(unique(cancer))) 

[1m[22m`summarise()` has grouped output by 'exp_var'. You can override using the
`.groups` argument.


exp_var,event_type,n
<chr>,<chr>,<int>
age,A3,2283
age,A5,1694
age,AF,7284
age,AL,1966
age,MX,355
age,RI,765
age,SE,5300
conditiontumor,A3,12775
conditiontumor,A5,11253
conditiontumor,AF,49572


[1m[22m`summarise()` has grouped output by 'exp_var', 'event_type'. You can override
using the `.groups` argument.


In [6]:
# Events significant in only one cancer type
spcancer %>%
    filter(ncancers == 1) %>%
    group_by(exp_var,event_type) %>%
    summarize(nevents=length(event_id))

[1m[22m`summarise()` has grouped output by 'exp_var'. You can override using the
`.groups` argument.


exp_var,event_type,nevents
<chr>,<chr>,<int>
age,A3,1552
age,A5,1238
age,AF,5809
age,AL,1571
age,MX,260
age,RI,552
age,SE,3932
conditiontumor,A3,3574
conditiontumor,A5,3539
conditiontumor,AF,20281


In [7]:
# Events significant in more than 15 cancer types
spcancer %>%
    filter(ncancers>=15) %>%
    group_by(exp_var,event_type) %>%
    summarize(nevents=length(event_id))

[1m[22m`summarise()` has grouped output by 'exp_var'. You can override using the
`.groups` argument.


exp_var,event_type,nevents
<chr>,<chr>,<int>
conditiontumor,A3,1
conditiontumor,AF,5
conditiontumor,AL,2
conditiontumor,RI,2
conditiontumor,SE,15
impurity,A3,17
impurity,A5,6
impurity,AF,16
impurity,AL,5
impurity,MX,2


## Heatmaps

In [8]:
vars<-c("conditiontumor","impurity")

In [9]:
get_heatmap_data<-function(res,var){
    
    hm.res <- res %>%
              filter(exp_var==var)
    
    all_res<-list()
    for(event in unique(hm.res$event_type)){

        hm.res.event <- hm.res %>% filter(event_type==event)

        pvals <- hm.res.event %>%
                    tidyr::pivot_wider(id_cols = event_id,names_from = cancer,values_from = padj) %>%
                    tibble::column_to_rownames('event_id')
        signif.events <- rownames(pvals)[rowSums(pvals<0.05,na.rm=T)>=3] # Events significant in at least 1 cancer types

        lfc <- hm.res.event %>%
                filter(event_id %in% signif.events) %>%
                tidyr::pivot_wider(id_cols = event_id,names_from = cancer,values_from = log2FoldChange) %>%
                tibble::column_to_rownames('event_id')
        signif.events <- rownames(lfc)[rowSums(abs(lfc)>=1,na.rm=T)>=3] # Events with lfc>1 in at least 1 cancer type

        hm <- hm.res.event %>%
                tidyr::pivot_wider(id_cols = event_id,names_from = cancer,values_from = log2FoldChange) %>%
                filter(event_id %in% signif.events) %>%
                tibble::column_to_rownames('event_id') 

        hm <- hm.res.event %>%
                tidyr::pivot_wider(id_cols = event_id,names_from = cancer,values_from = log2FoldChange) %>%
                filter(event_id %in% signif.events) %>%
                tibble::column_to_rownames('event_id') 

        hm <- hm[rowSums(is.na(hm))==0,] # events with less than 10 samples missing information
        
        all_res[[paste0(var,"_",event)]]<-t(hm)
    }
    
    return(all_res)
}

In [10]:
draw_heatmap<-function(hm.data,figfile,resname){
    
    pdf(file = figfile,width = 8,height = 5)
    
    col_title <- paste(ncol(hm.data),sub(".*_","",resname),"events significant for",sub("_.*","",resname))
    draw(Heatmap(hm.data,
                 show_column_names = FALSE,
                 show_column_dend = FALSE,
                 use_raster=TRUE,
                 heatmap_legend_param = list(title = ""),
                 column_title = col_title,
                 column_title_side = "bottom", 
                 clustering_distance_rows = "pearson",
                 clustering_distance_columns = "pearson",
                 clustering_method_rows="ward.D2",
                 clustering_method_columns="ward.D2",
                 show_row_names = TRUE))

    dev.off()

}

In [11]:
for(evar in vars){
    
    message("Preparing ",evar," data")
    # Prepare data 
    hm.data.list<-get_heatmap_data(cond.res,var = evar)
    # Draw heatmaps
    for(nr in names(hm.data.list)){
        message("Drawing ",nr," heatmap")
        file<-paste0("../output/figures/heatmaps_effectsize/condition/",nr,".signifevents.pdf")
        draw_heatmap(hm.data = hm.data.list[[nr]],figfile = file,resname = nr)
    }
}

Preparing conditiontumor data

Drawing conditiontumor_A3 heatmap

Drawing conditiontumor_A5 heatmap

Drawing conditiontumor_AF heatmap

Drawing conditiontumor_AL heatmap

Drawing conditiontumor_MX heatmap

Drawing conditiontumor_RI heatmap

Drawing conditiontumor_SE heatmap

Preparing impurity data

Drawing impurity_A3 heatmap

Drawing impurity_A5 heatmap

Drawing impurity_AF heatmap

Drawing impurity_AL heatmap

Drawing impurity_MX heatmap

Drawing impurity_RI heatmap

Drawing impurity_SE heatmap



In [12]:
# Using shrunken coefficients

for(evar in vars){
    message("Preparing ",evar," data")
    # Prepare data 
    hm.data.list<-get_heatmap_data(cond.res.lfsh,var = evar)
    
    # Draw heatmaps
    for(nr in names(hm.data.list)){
        message("Drawing ",nr," heatmap")
        file<-paste0("../output/figures/heatmaps_effectsize/condition/",nr,".lfcShrink.signifevents.pdf")
        draw_heatmap(hm.data = hm.data.list[[nr]],figfile = file,resname = nr)
    }
}

Preparing conditiontumor data

Drawing conditiontumor_A3 heatmap

Drawing conditiontumor_A5 heatmap

The automatically generated colors map from the minus and plus 99^th of
the absolute values in the matrix. There are outliers in the matrix
whose patterns might be hidden by this color mapping. You can manually
set the color to `col` argument.

Use `suppressMessages()` to turn off this message.

Drawing conditiontumor_AF heatmap

The automatically generated colors map from the minus and plus 99^th of
the absolute values in the matrix. There are outliers in the matrix
whose patterns might be hidden by this color mapping. You can manually
set the color to `col` argument.

Use `suppressMessages()` to turn off this message.

Drawing conditiontumor_AL heatmap

The automatically generated colors map from the minus and plus 99^th of
the absolute values in the matrix. There are outliers in the matrix
whose patterns might be hidden by this color mapping. You can manually
set the color to `col` a

## Volcano plots 

In [13]:
plot_as_volcano<-function(res,event,var,res_type,...){
    res<-res %>%
              mutate(status = factor(case_when(padj<0.05 & abs(log2FoldChange)>=1 ~ "DifAltSpl",
                                               padj<0.05 ~ "S",
                                               TRUE ~ "NS"),
                                               levels = c("DifAltSpl","S","NS"))) 
    
    das.color<-ev.colors[event]
    res.plt<-res %>%
             filter(exp_var==var,
                    event_type==event,
                    !is.na(log2FoldChange),!is.na(padj)) 
    stat.cols<-c(as.character(das.color),"#999999","#D2D1D3")

    g<-ggplot(res.plt,aes(x=log2FoldChange,y=-log10(padj),color=status))+
            geom_point(size=0.1)+
            geom_hline(yintercept = -log10(0.05), col = 'grey',linetype="dashed")+
            geom_vline(xintercept = -1, col = 'grey',linetype="dashed",linewidth=0.5)+
            geom_vline(xintercept = 1, col = 'grey',linetype="dashed",linewidth=0.5)+
            theme_pubr()+
            theme(strip.background = element_blank(),
                  strip.text = element_text(face="bold",size=12),
                  text = element_text(size=12),
                  legend.position ="none",
                  panel.grid.major = element_blank(),
                  panel.grid.minor = element_blank())+
            scale_color_manual(values = stat.cols)+
            labs(x=expression(Delta*"Logit(PSI)"))+
            facet_wrap(~cancer,ncol=5,scales="free")

    res.table<-res.plt %>%
               group_by(cancer,status) %>%
               summarize(nevents=length(event_id),.groups="drop")%>%
               tidyr::pivot_wider(values_from = nevents,names_from = cancer,values_fill=0)

    resfile<-paste0("../output/figures/volcano_plots/condition/",var,".",event,".",res_type,".coeffs")
    ggsave(g,file=paste0(resfile,".pdf"),width=10,height=10,units="in",device="pdf")
    write.table(res.table,file = paste0(resfile,".summary.tsv"),row.names = FALSE,col.names = TRUE,sep = "\t")
}

In [14]:
totals<-res.stats %>%
        filter(analysis=="condition",exp_var=="conditiontumor") %>%
        group_by(cancer,event_type) %>%
        summarize(n=sum(total_events),.groups="drop") %>%
        select(-cancer) %>% group_by(event_type) %>% slice_max(n,n=1) %>%
        distinct(event_type,n) %>%
        arrange(desc(n))
ev.colors<-pal_locuszoom()(7)
names(ev.colors)<-totals$event_type

In [None]:
vars<-c("conditiontumor","impurity")
events<-unique(cond.res$event_type)
for(v in vars){
    for(ev in events){
        plot_as_volcano(cond.res.lfsh,event = ev,var = v,res_type = "lfcShrink")
    }
}