In [3]:
#MAKE_volcano_plot_Rscript
#
#Designed to make volcano plot for every omics data (proteomics, metabolomics, autoantibody) 
#and every comparison (negVSpos, cVSpos, cVSneg, cVSra)
#
#Note: [1] P-value threshold for autoantibody : 0.05
#      [2] P-value threshold for autoantibody : 0.01

library(ggplot2)
library(ggrepel)

In [4]:
omics_list = c("metabolomics","proteomics")
condition_list = c("cVSpos","cVSneg","negVSpos")

for (omics in omics_list){
    cohend_threshold = 0.2

    for (condition in condition_list){
        
        data_file = paste("../../../analysis/statistics/linear_model/differential_abundance_logit/", 
                          omics, ".", condition,".tsv", sep="")
        input_df = read.csv(data_file, sep="\t", header=TRUE, row.names=1)
        
        x_axis <- input_df$cohenD
        y_axis <- -log10(input_df$all_adj_pval)

        if (condition == "negVSpos"){
            xaxis_label <- "Effect size (Cohen's D): ACPA+ RA vs. ACPA– RA"
            numerator_color = "#636363"
            denominator_color = "#B57623"
            
        }
        if (condition == "cVSpos"){
            xaxis_label <- "Effect size (Cohen's D): ACPA+ RA vs. Control"
            numerator_color = "#636363"
            denominator_color = "#78AF3F"
        }
        if (condition == "cVSneg"){
            xaxis_label <- "Effect size (Cohen's D): ACPA– RA vs. Control"
            numerator_color = "#B57623"
            denominator_color = "#78AF3F"
        }

        gene_list <- rownames(input_df)
        
        #make dataframe for volcano plot
        #row names = gene list
        df <- do.call(rbind, Map(data.frame, 'cohenD'=x_axis, 'all_adj_pval'=y_axis))
        rownames(df) <- gene_list
        df$genes <- row.names(df)
      
        #Proteomics color & scale parameter = Default        
        #Thresholds for data points color
        #pvalue 0.01 = 2 (-log10 pval)
        ylim_bottum <- 0
        ylim_top <- 3.5
        sig_subset <- subset(df, all_adj_pval > 1.30103)
        
        log2pval_threshold = 1.30103

        #assign color of the dot (feature)
        sig_red_subset <- subset(sig_subset, cohenD > cohend_threshold) 
        sig_blue_subset <- subset(sig_subset, cohenD < -cohend_threshold)
                
        sig_red_text_subset <- subset(sig_subset, cohenD > cohend_threshold)
        sig_blue_text_subset <- subset(sig_subset, cohenD < -cohend_threshold)
        
        #Debug
        print ('#####')
        print (omics)
        print (condition)
        print (paste("up:", nrow(sig_red_subset), sep=""))
        print (paste("down:", nrow(sig_blue_subset), sep=""))
        print ('#####')
        #Debug
              
        figure_title = paste(omics,": ", condition, sep="")
        
        output_pdf = paste("../../../analysis/statistics/volcano_plots/", omics,
                            ".", condition, ".volcano.label.pdf", sep="")
        pdf(output_pdf)
        
        #draw plots:start
        plot_pdf <- ggplot(df, aes(x=cohenD, y=all_adj_pval))+ 
        coord_cartesian(xlim=c(-1.2,1.2), ylim=c(ylim_bottum,ylim_top))+ 
        geom_point(colour="#DCDCDC", size = 2.5, stroke = 0) + 
        geom_hline(yintercept = log2pval_threshold, colour="#BEBEBE", linetype="dashed") +
        geom_vline(xintercept = cohend_threshold, colour="#BEBEBE", linetype="dashed") +
        geom_vline(xintercept = -cohend_threshold, colour="#BEBEBE", linetype="dashed") +
        geom_point(data = sig_red_subset, colour=numerator_color, size = 2.5, stroke = 0) +
        geom_point(data = sig_blue_subset, colour=denominator_color,size = 2.5, stroke = 0) +
        geom_text_repel(data=sig_red_text_subset, aes(cohenD, all_adj_pval, label=genes), colour=numerator_color, size=2) +
        geom_text_repel(data=sig_blue_text_subset, aes(cohenD, all_adj_pval, label=genes), colour=denominator_color, size=2) +
        ylab("-log10 (P-value)") + xlab(xaxis_label) +  
        theme_bw() +
        theme(axis.line = element_line(colour = "black"), panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank())+
        ggtitle(figure_title)
        #draw plots:end
        
        print (plot_pdf)
        dev.off()
    }
}

[1] "#####"
[1] "metabolomics"
[1] "cVSpos"
[1] "up:8"
[1] "down:37"
[1] "#####"


“ggrepel: 27 unlabeled data points (too many overlaps). Consider increasing max.overlaps”


[1] "#####"
[1] "metabolomics"
[1] "cVSneg"
[1] "up:26"
[1] "down:57"
[1] "#####"


“ggrepel: 8 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
“ggrepel: 31 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
“for 'Effect size (Cohen's D): ACPA– RA vs. Control' in 'mbcsToSbcs': - substituted for – (U+2013)”


[1] "#####"
[1] "metabolomics"
[1] "negVSpos"
[1] "up:11"
[1] "down:42"
[1] "#####"


“ggrepel: 39 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
“for 'Effect size (Cohen's D): ACPA+ RA vs. ACPA– RA' in 'mbcsToSbcs': - substituted for – (U+2013)”


[1] "#####"
[1] "proteomics"
[1] "cVSpos"
[1] "up:149"
[1] "down:73"
[1] "#####"


“ggrepel: 138 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
“ggrepel: 58 unlabeled data points (too many overlaps). Consider increasing max.overlaps”


[1] "#####"
[1] "proteomics"
[1] "cVSneg"
[1] "up:549"
[1] "down:263"
[1] "#####"


“ggrepel: 544 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
“ggrepel: 252 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
“for 'Effect size (Cohen's D): ACPA– RA vs. Control' in 'mbcsToSbcs': - substituted for – (U+2013)”


[1] "#####"
[1] "proteomics"
[1] "negVSpos"
[1] "up:93"
[1] "down:71"
[1] "#####"


“ggrepel: 80 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
“ggrepel: 59 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
“for 'Effect size (Cohen's D): ACPA+ RA vs. ACPA– RA' in 'mbcsToSbcs': - substituted for – (U+2013)”
