In [1]:
#MAKE_volcano_plot_Rscript
#
#Designed to make volcano plot for every omics data (proteomics, metabolomics, autoantibody) 
#and every comparison (negVSpos, cVSpos, cVSneg, cVSra)
#
#Note: [1] P-value threshold for autoantibody : 0.05
#      [2] P-value threshold for autoantibody : 0.01

library(ggplot2)
library(ggrepel)

In [2]:
omics_list = c("autoantibody","metabolomics","proteomics")
condition_list = c("cVSpos","cVSneg","cVSra", "negVSpos")

for (omics in omics_list){
    
    for (condition in condition_list){
        
        data_file = paste("../../../analysis/statistics/linear_model/differential_abundance_v2/", 
                          omics, ".", condition,".padj.v2.tsv", sep="")
        input_df = read.csv(data_file, sep="\t", header=TRUE, row.names=1)
        
        x_axis <- input_df$fc_case_control
        y_axis <- -log10(input_df$pval)
        cohenD <- input_df$cohenD
        
        if (condition == "negVSpos"){
            xaxis_label <- "log2(fold-change: ACPA-positive/ACPA-negative)"
            numerator_color = "#636363"
            denominator_color = "#B57623"
            
        }
        if (condition == "cVSpos"){
            xaxis_label <- "log2(fold-change: ACPA-positive/Control)"
            numerator_color = "#636363"
            denominator_color = "#78AF3F"
        }
        if (condition == "cVSneg"){
            xaxis_label <- "log2(fold-change: ACPA-negative/Control)"
            numerator_color = "#B57623"
            denominator_color = "#78AF3F"
        }
        if (condition == "cVSra"){
            xaxis_label <- "log2(fold-change: RA/control)"
            numerator_color = "#4572B8"
            denominator_color = "#78AF3F"
        }

        gene_list <- rownames(input_df)
        
        #make dataframe for volcano plot
        #row names = gene list
        df <- do.call(rbind, Map(data.frame, 'log2FC'=x_axis, 'pval'=y_axis, 'cohenD'=cohenD))
        rownames(df) <- gene_list
        df$genes <- row.names(df)
        

        
        #Proteomics color & scale parameter = Default        
        #Thresholds for data points color
        #pvalue 0.01 = 2 (-log10 pval)
        ylim_bottum <- 0
        ylim_top <- 6
        sig_subset <- subset(df, pval > 2)
        log2pval_threshold = 2
        
        #Autoantibody color & scale parameter    
        if (omics == "autoantibody"){ #since we decided to change the threshold of autoantibody profiles
            sig_subset <- subset(df, pval > 1.30103)
            log2pval_threshold = 1.30103
            ylim_bottum <- 0
            ylim_top <- 4
        }
        
        #Metabolomics color & scale parameter    
        if (omics == "metabolites"){ #since we decided to change the threshold of autoantibody profiles
            ylim_bottum <- 0
            ylim_top <- 7
        }

        #filter out by Cohen's D
        sig_subset <- subset(sig_subset, abs(cohenD) > 0.5)
        
        #assign color of the dot (feature)
        sig_red_subset <- subset(sig_subset, log2FC > 0) 
        sig_blue_subset <- subset(sig_subset, log2FC < 0)
                
        sig_red_text_subset <- subset(sig_subset, log2FC > 0)
        sig_blue_text_subset <- subset(sig_subset, log2FC < 0)
        
        #Debug
        print ('#####')
        print (omics)
        print (condition)
        print (nrow(sig_red_subset))
        print (nrow(sig_blue_subset))
        print ('#####')
        #Debug
              
        figure_title = paste(omics,": ", condition, sep="")
        
        output_pdf = paste("../../../analysis/statistics/volcano_plots/", omics,
                            ".", condition, ".volcano.label.pdf", sep="")
        pdf(output_pdf)
        plot_pdf <- ggplot(df, aes(x=log2FC, y=pval))+ coord_cartesian(xlim=c(-2,2), ylim=c(ylim_bottum,ylim_top))+ 
        geom_point(colour="#DCDCDC") + 
        geom_hline(yintercept = log2pval_threshold, colour="#BEBEBE", linetype="dashed") +
        geom_point(data = sig_red_subset, colour=numerator_color) +
        geom_point(data = sig_blue_subset, colour=denominator_color) +
        geom_text_repel(data=sig_red_text_subset, aes(log2FC, pval, label=genes), colour=numerator_color, size=2) +
        geom_text_repel(data=sig_blue_text_subset, aes(log2FC, pval, label=genes), colour=denominator_color, size=2) +
        ylab("-log10 (P-value)") + xlab(xaxis_label) +  
        theme_bw() +
        theme(axis.line = element_line(colour = "black"), panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank())+
        ggtitle(figure_title)
        print (plot_pdf)
        dev.off()
        # break
    }
    # break
}

[1] "#####"
[1] "autoantibody"
[1] "cVSpos"
[1] 17
[1] 17
[1] "#####"


“ggrepel: 4 unlabeled data points (too many overlaps). Consider increasing max.overlaps”


[1] "#####"
[1] "autoantibody"
[1] "cVSneg"
[1] 4
[1] 3
[1] "#####"
[1] "#####"
[1] "autoantibody"
[1] "cVSra"
[1] 1
[1] 5
[1] "#####"
[1] "#####"
[1] "autoantibody"
[1] "negVSpos"
[1] 61
[1] 43
[1] "#####"


“ggrepel: 50 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
“ggrepel: 23 unlabeled data points (too many overlaps). Consider increasing max.overlaps”


[1] "#####"
[1] "metabolomics"
[1] "cVSpos"
[1] 13
[1] 23
[1] "#####"


“ggrepel: 9 unlabeled data points (too many overlaps). Consider increasing max.overlaps”


[1] "#####"
[1] "metabolomics"
[1] "cVSneg"
[1] 15
[1] 25
[1] "#####"


“ggrepel: 5 unlabeled data points (too many overlaps). Consider increasing max.overlaps”


[1] "#####"
[1] "metabolomics"
[1] "cVSra"
[1] 15
[1] 50
[1] "#####"


“ggrepel: 36 unlabeled data points (too many overlaps). Consider increasing max.overlaps”


[1] "#####"
[1] "metabolomics"
[1] "negVSpos"
[1] 2
[1] 3
[1] "#####"
[1] "#####"
[1] "proteomics"
[1] "cVSpos"
[1] 25
[1] 32
[1] "#####"


“ggrepel: 16 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
“ggrepel: 25 unlabeled data points (too many overlaps). Consider increasing max.overlaps”


[1] "#####"
[1] "proteomics"
[1] "cVSneg"
[1] 23
[1] 49
[1] "#####"


“ggrepel: 12 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
“ggrepel: 26 unlabeled data points (too many overlaps). Consider increasing max.overlaps”


[1] "#####"
[1] "proteomics"
[1] "cVSra"
[1] 26
[1] 111
[1] "#####"


“ggrepel: 18 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
“ggrepel: 83 unlabeled data points (too many overlaps). Consider increasing max.overlaps”


[1] "#####"
[1] "proteomics"
[1] "negVSpos"
[1] 18
[1] 9
[1] "#####"


“ggrepel: 3 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
