In [22]:
# draw_scatterplots_for_figure2B    2022.08.18
#
# Drawing scatter plot.

library(ggplot2)
library(dplyr)
library(ggpubr)

In [52]:
draw_scatterplot <- function(data_file, output_pdf, omics, clinical_variable) {

    data_df <- read.csv(data_file, sep = "\t", header = TRUE)
    print (cor.test(data_df$ACPA_neg_rho, data_df$ACPA_pos_rho, method="spearman"))

    # Scatter plot
    p <- ggscatter(data_df, x = "ACPA_neg_rho", y ="ACPA_pos_rho",
                   color ="#636363", size = 3,
                   alpha = 0.7,
                   add = "reg.line",
                   add.params = list(color = "orange2", fill = "lightgray"),
                   cor.coef = TRUE,
                   cor.coeff.args = list(method = "spearman")) +
                geom_hline(yintercept = 0, linetype = "dashed", color = "#636363") +
                geom_vline(xintercept = 0, linetype = "dashed", color = "#636363") +
                xlim(-0.8, 0.8) + ylim(-0.8, 0.8) +
                ggtitle(paste(omics,"-", clinical_variable,sep=""), subtitle = waiver()) + theme_bw()
    
    # Save to PDF
    ggsave(output_pdf, plot = p, width = 10, height = 10)
}

In [54]:
omics_list <- c("metabolomics", "proteomics")
# omics_list <- c("metabolomics")
clinical_variable_list <- c("crp", "esr", "das28crp")
# clinical_variable_list <- c("crp")

# Set directory
data_dir <- "../../../analysis/statistics/omics_clinical_feature_correlation"

if (!dir.exists("../../../analysis/statistics/omics_clinical_feature_correlation/plot")) {
  dir.create("../../../analysis/statistics/omics_clinical_feature_correlation/plot")
}

# Loop through omics and clinical variables
for (omics in omics_list) {
    for (clinical_variable in clinical_variable_list) {
        print (paste(omics, " ~ ", clinical_variable, sep=""))
        data_file <- sprintf("%s/%s_%s.rho.tsv", data_dir, omics, clinical_variable)
        output_pdf <- sprintf("%s/plot/%s_%s.rho.plot.pdf", data_dir, omics, clinical_variable)

        draw_scatterplot(data_file, output_pdf, omics, clinical_variable)
    }
}



[1] "metabolomics ~ crp"


“Cannot compute exact p-value with ties”



	Spearman's rank correlation rho

data:  data_df$ACPA_neg_rho and data_df$ACPA_pos_rho
S = 145944012, p-value < 2.2e-16
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.2668518 

[1] "metabolomics ~ esr"


“Cannot compute exact p-value with ties”



	Spearman's rank correlation rho

data:  data_df$ACPA_neg_rho and data_df$ACPA_pos_rho
S = 155752831, p-value = 7.79e-13
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.2175773 

[1] "metabolomics ~ das28crp"


“Cannot compute exact p-value with ties”



	Spearman's rank correlation rho

data:  data_df$ACPA_neg_rho and data_df$ACPA_pos_rho
S = 152705429, p-value = 1.557e-14
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.2328859 

[1] "proteomics ~ crp"


“Cannot compute exact p-value with ties”



	Spearman's rank correlation rho

data:  data_df$ACPA_neg_rho and data_df$ACPA_pos_rho
S = 3.8296e+10, p-value < 2.2e-16
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.4027443 



“[1m[22mRemoved 1 row containing non-finite outside the scale range (`stat_smooth()`).”
“[1m[22mRemoved 1 row containing non-finite outside the scale range (`stat_cor()`).”
“[1m[22mRemoved 1 row containing missing values or values outside the scale range (`geom_point()`).”


[1] "proteomics ~ esr"


“Cannot compute exact p-value with ties”



	Spearman's rank correlation rho

data:  data_df$ACPA_neg_rho and data_df$ACPA_pos_rho
S = 3.9291e+10, p-value < 2.2e-16
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.3872141 

[1] "proteomics ~ das28crp"


“Cannot compute exact p-value with ties”



	Spearman's rank correlation rho

data:  data_df$ACPA_neg_rho and data_df$ACPA_pos_rho
S = 6.0118e+10, p-value = 1e-07
alternative hypothesis: true rho is not equal to 0
sample estimates:
       rho 
0.06240959 

