In [2]:
#MAKE_volcano_plot_Rscript V2

#Designed to make volcano plot for every omics data (proteomics, metabolomics, autoantibody) 
#and every comparison (negVSpos, cVSpos, cVSneg, cVSra)

#Note: [1] P-value threshold for autoantibody : 0.05
#      [2] P-value threshold for autoantibody : 0.01

#Modified to consider Cohen's D (instead of log2FC)

library(ggplot2)
library(ggrepel)

In [12]:
omics_list = c("autoantibody","metabolomics","proteomics")
condition_list = c("negVSpos","cVSpos","cVSneg","cVSra")

for (omics in omics_list){
    
    for (condition in condition_list){
        
        data_file = paste("../../../analysis/statistics/linear_model/differential_abundance_v2/", 
                          omics, ".", condition,".padj.v2.tsv", sep="")
        input_df = read.csv(data_file, sep="\t", header=TRUE, row.names=1)
        
        x_axis <- input_df$cohenD
        y_axis <- -log10(input_df$pval)
        cohenD <- input_df$cohenD
        
        if (condition == "negVSpos"){
            xaxis_label <- "log2FC (ACPA-positive/ACPA-negative)"
            numerator_color = "#BB2026"
            #CE8D24
            denominator_color = "#3B54A4"
            
        }
        if (condition == "cVSpos"){
            xaxis_label <- "log2FC (ACPA-positive/Control)"
            numerator_color = "#BB2026"
            denominator_color = "#5CAC64"
        }
        if (condition == "cVSneg"){
            xaxis_label <- "log2FC (ACPA-negative/Control)"
            numerator_color = "#3B54A4"
            denominator_color = "#5CAC64"
        }
        if (condition == "cVSra"){
            xaxis_label <- "log2FC (RA/control)"
            numerator_color = "#642875"
            denominator_color = "#5CAC64"
        }

        gene_list <- rownames(input_df)
        
        #make dataframe for volcano plot
        #row names = gene list
        df <- do.call(rbind, Map(data.frame, 'cohenD'=x_axis, 'pval'=y_axis, 'cohenD'=cohenD))
        rownames(df) <- gene_list
        df$genes <- row.names(df)
        

        #Thresholds for data points color
        #pvalue 0.01 = 2 (-log10 pval)
        sig_subset <- subset(df, pval > 2)
        log2pval_threshold = 2
        
        #Thresholds for data points color
        #since we decided to change the threshold of autoantibody profiles
        #pvalue 0.05 = 1.30103 (-log10 pval)
        if (omics == "autoantibody"){ 
            sig_subset <- subset(df, pval > 1.30103)
            log2pval_threshold = 1.30103
        }

        #filter out by Cohen's D
        sig_subset <- subset(sig_subset, abs(cohenD) > 0.5)
        
        #assign color of the dot (feature)
        sig_red_subset <- subset(sig_subset, cohenD > 0.5) 
        sig_blue_subset <- subset(sig_subset, cohenD < -0.5)
                
        sig_red_text_subset <- subset(sig_subset, cohenD > 0.5)
        sig_blue_text_subset <- subset(sig_subset, cohenD < -0.5)
        
        #Debug
        print ('#####')
        print (omics)
        print (condition)
        print (nrow(sig_red_subset))
        print (nrow(sig_blue_subset))
        print ('#####')
        #Debug
        
        
        figure_title = paste(omics,": ", condition, sep="")
        
        output_pdf = paste("../../../analysis/statistics/volcano_plots/", omics,
                            ".", condition, ".volcano.label.pdf", sep="")
        pdf(output_pdf)
        plot_pdf <- ggplot(df, aes(x=cohenD, y=pval))+ coord_cartesian(xlim=c(-2,2))+ 
        geom_point(colour="grey") + 
        geom_hline(yintercept = log2pval_threshold, colour="#BEBEBE", linetype="dashed") +
        geom_vline(xintercept = 0.5, colour="#BEBEBE", linetype="dashed") +
        geom_vline(xintercept = -0.5, colour="#BEBEBE", linetype="dashed") +
        geom_point(data = sig_red_subset, colour=numerator_color) +
        geom_point(data = sig_blue_subset, colour=denominator_color) +
        geom_text_repel(data=sig_red_text_subset, aes(cohenD, pval, label=genes), colour=numerator_color, size=2) +
        geom_text_repel(data=sig_blue_text_subset, aes(cohenD, pval, label=genes), colour=denominator_color, size=2) +
        ylab("-log10 (P-value)") + xlab(xaxis_label)
        ggtitle(figure_title)
        print (plot_pdf)
        dev.off()
        break
    }
    break
}

[1] "#####"
[1] "autoantibody"
[1] "negVSpos"
[1] 43
[1] 61
[1] "#####"


“ggrepel: 29 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
“ggrepel: 50 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
