In [6]:
#MAKE_volcano_plot_Rscript
#
#Designed to make volcano plot for every omics data (proteomics, metabolomics, autoantibody) 
#and every comparison (negVSpos, cVSpos, cVSneg, cVSra)
#
#Note: [1] P-value threshold for autoantibody : 0.05
#      [2] P-value threshold for autoantibody : 0.01

library(ggplot2)
library(ggrepel)

In [7]:
omics_list = c("autoantibody","metabolomics","proteomics")
condition_list = c("negVSpos","cVSpos","cVSneg","cVSra")

for (omics in omics_list){
    
    for (condition in condition_list){
        
        data_file = paste("../../../analysis/statistics/linear_model/differential_abundance_v4/", 
                          omics, ".", condition,".padj.v2.tsv", sep="")
        input_df = read.csv(data_file, sep="\t", header=TRUE, row.names=1)
        
        x_axis <- input_df$fc_case_control
        y_axis <- -log10(input_df$pval)
        cohenD <- input_df$cohenD
        
        if (condition == "negVSpos"){
            xaxis_label <- "log2FC (ACPA-positive/ACPA-negative)"
            numerator_color = "#BB2026"
            denominator_color = "#3B54A4"
            
        }
        if (condition == "cVSpos"){
            xaxis_label <- "log2FC (ACPA-positive/Control)"
            numerator_color = "#BB2026"
            denominator_color = "#5CAC64"
        }
        if (condition == "cVSneg"){
            xaxis_label <- "log2FC (ACPA-negative/Control)"
            numerator_color = "#3B54A4"
            denominator_color = "#5CAC64"
        }
        if (condition == "cVSra"){
            xaxis_label <- "log2FC (RA/control)"
            numerator_color = "#642875"
            denominator_color = "#5CAC64"
        }

        gene_list <- rownames(input_df)
        
        #make dataframe for volcano plot
        #row names = gene list
        df <- do.call(rbind, Map(data.frame, 'log2FC'=x_axis, 'pval'=y_axis, 'cohenD'=cohenD))
        rownames(df) <- gene_list
        df$genes <- row.names(df)
        

        #Thresholds for data points color
        #pvalue 0.01 = 2 (-log10 pval)
        sig_subset <- subset(df, pval > 2)
        
        if (omics == "autoantibody"){ #since we decided to change the threshold of autoantibody profiles
            sig_subset <- subset(df, pval > 1.30103)
        }

        #filter out by Cohen's D
        sig_subset <- subset(sig_subset, cohenD > 0.5)
        
        #assign color of the dot (feature)
        sig_red_subset <- subset(sig_subset, log2FC > 0) 
        sig_blue_subset <- subset(sig_subset, log2FC < 0)
                
        sig_red_text_subset <- subset(sig_subset, log2FC > 0)
        sig_blue_text_subset <- subset(sig_subset, log2FC < 0)
        
        #Debug
        print ('#####')
        print (omics)
        print (condition)
        print (nrow(sig_red_subset))
        print (nrow(sig_blue_subset))
        print ('#####')
        #Debug
        
        
        figure_title = paste(omics,": ", condition, sep="")
        
        output_pdf = paste("../../../analysis/statistics/volcano_plots_adjust/", omics,
                            ".", condition, ".volcano.label.pdf", sep="")
        pdf(output_pdf)
        plot_pdf <- ggplot(df, aes(x=log2FC, y=pval))+ coord_cartesian(xlim=c(-2,2))+ 
        geom_point(colour="grey") + 
        geom_line(aes(y=2))+
        geom_point(data = sig_red_subset, colour=numerator_color) +
        geom_point(data = sig_blue_subset, colour=denominator_color) +
        geom_text_repel(data=sig_red_text_subset, aes(log2FC, pval, label=genes), colour=numerator_color, size=2) +
        geom_text_repel(data=sig_blue_text_subset, aes(log2FC, pval, label=genes), colour=denominator_color, size=2) +
        ylab("-Log10 (P-value)") + xlab(xaxis_label)
        ggtitle(figure_title)
        print (plot_pdf)
        dev.off()
    }
}

[1] "#####"
[1] "autoantibody"
[1] "negVSpos"
[1] 61
[1] 43
[1] "#####"


“ggrepel: 45 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
“ggrepel: 14 unlabeled data points (too many overlaps). Consider increasing max.overlaps”


[1] "#####"
[1] "autoantibody"
[1] "cVSpos"
[1] 6
[1] 5
[1] "#####"
[1] "#####"
[1] "autoantibody"
[1] "cVSneg"
[1] 2
[1] 2
[1] "#####"
[1] "#####"
[1] "autoantibody"
[1] "cVSra"
[1] 0
[1] 5
[1] "#####"
[1] "#####"
[1] "metabolomics"
[1] "negVSpos"
[1] 1
[1] 3
[1] "#####"
[1] "#####"
[1] "metabolomics"
[1] "cVSpos"
[1] 4
[1] 22
[1] "#####"


“ggrepel: 7 unlabeled data points (too many overlaps). Consider increasing max.overlaps”


[1] "#####"
[1] "metabolomics"
[1] "cVSneg"
[1] 6
[1] 20
[1] "#####"
[1] "#####"
[1] "metabolomics"
[1] "cVSra"
[1] 7
[1] 38
[1] "#####"


“ggrepel: 14 unlabeled data points (too many overlaps). Consider increasing max.overlaps”


[1] "#####"
[1] "proteomics"
[1] "negVSpos"
[1] 18
[1] 8
[1] "#####"
[1] "#####"
[1] "proteomics"
[1] "cVSpos"
[1] 31
[1] 16
[1] "#####"


“ggrepel: 12 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
“ggrepel: 1 unlabeled data points (too many overlaps). Consider increasing max.overlaps”


[1] "#####"
[1] "proteomics"
[1] "cVSneg"
[1] 30
[1] 47
[1] "#####"


“ggrepel: 5 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
“ggrepel: 33 unlabeled data points (too many overlaps). Consider increasing max.overlaps”


[1] "#####"
[1] "proteomics"
[1] "cVSra"
[1] 18
[1] 56
[1] "#####"


“ggrepel: 29 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
