In [1]:
#01_DifferentialAbundance_and_cohens_D_adjust_effect
#Derivated from: 01_DifferentialAbundance_and_cohens_D
#
#
#Purpose: Use logistic regression to identify disease-associated features (for each omics)
#[1] Perform logistic regression models while adjusting for sex, age, BMI, smoking history, prednisone use, and use of bDMARDs and csDMARDs.
#ex) sample_phenotype ~ feature_abundance + sex + age + BMI + smoking_history + prednisone + bDMARDs + csDMARDS
#[2] features Cohend's D above medium (i.e., 0.5)
#[3] adjust confounding effects if they are significant in marginal model.
#features that fulfills [1] + [2] will considered as differentially abundant.

library("effsize")
library(lme4)
library(lmerTest)
library(stringr)
library(effects)
library(dplyr)


“package ‘effsize’ was built under R version 4.2.3”
“package ‘lme4’ was built under R version 4.2.3”
Loading required package: Matrix

“package ‘Matrix’ was built under R version 4.2.3”
“package ‘lmerTest’ was built under R version 4.2.3”

Attaching package: ‘lmerTest’


The following object is masked from ‘package:lme4’:

    lmer


The following object is masked from ‘package:stats’:

    step


“package ‘stringr’ was built under R version 4.2.3”
“package ‘effects’ was built under R version 4.2.3”
Loading required package: carData

“package ‘carData’ was built under R version 4.2.3”
lattice theme set by effectsTheme()
See ?effectsTheme for details.

“package ‘dplyr’ was built under R version 4.2.3”

Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [4]:
#Main
output_dir = '../../../analysis/statistics/linear_model/differential_abundance_logit/'

#make directory if it does not exist
if (!dir.exists(output_dir)){
dir.create(output_dir)
} else {
    print("Dir already exists!")
}

metabolomics_data_file = '../../../preprocessed_data/metabolomics/metabolone_raw_norm_preprocessed.v2.tsv'
proteomics_data_file = '../../../preprocessed_data/proteomics/somascan_anml.T.v2.tsv'
patient_info_file = '../../../preprocessed_data/meta/patient_info_for_statistics.v3.T.tsv'

[1] "Dir already exists!"


In [27]:
#Bilirubin Z,Z
input_data_df <- read.csv(metabolomics_data_file, sep="\t", header=TRUE, row.names=1)
# input_data_df <- read.csv(proteomics_data_file, sep="\t", header=TRUE, row.names=1)
patient_info_df <- read.csv(patient_info_file, sep="\t", header=TRUE, row.names=1)

merged_data_df <- rbind(patient_info_df[1:nrow(patient_info_df),], input_data_df) #acpa    
merged_data_df <- as.data.frame(t(merged_data_df))

#note: acpa-neg: 2, acpa-pos: 1, control: 0
control_vs_acpa_neg <- filter(merged_data_df, acpa == 0 | acpa == 2)
control_vs_acpa_neg$acpa[control_vs_acpa_neg$acpa == 2] <- 1   #changing class 2 -> 1; making 0 vs 1

#All disease activity.
# formula_string <- "acpa ~ `bilirubin (Z,Z)` + sex + age + bmi + smoking + pred + bdmard + all_csdmard"
formula_string <- "`carnitine` ~ acpa + sex + age + bmi + smoking + pred + bdmard + all_csdmard"
# formula_string <- "acpa ~ `BLVRB_17148-7` + sex + age + bmi + smoking + pred + bdmard + all_csdmard"

glm_results <- glm(as.formula(formula_string), data = control_vs_acpa_neg)
print ((coef(summary(glm_results))[,4][2]))

#>5.2 disease activity.
tmp_control_vs_acpa_neg <- filter(control_vs_acpa_neg, (acpa == 1 & das28crp > 5.1)| acpa == 0)
glm_results <- glm(as.formula(formula_string), data = tmp_control_vs_acpa_neg)
print ((coef(summary(glm_results))[,4][2]))

#>5.1 < 3.2 disease activity.
tmp_control_vs_acpa_neg <- filter(control_vs_acpa_neg, (acpa == 1 & das28crp > 3.2 & das28crp < 5.1)  | acpa == 0)
glm_results <- glm(as.formula(formula_string), data = tmp_control_vs_acpa_neg)
print ((coef(summary(glm_results))[,4][2]))

#> 3.2 disease activity.
tmp_control_vs_acpa_neg <- filter(control_vs_acpa_neg, (acpa == 1 & das28crp > 3.2)  | acpa == 0)
glm_results <- glm(as.formula(formula_string), data = tmp_control_vs_acpa_neg)
print ((coef(summary(glm_results))[,4][2]))

#< 3.2 disease activity.
tmp_control_vs_acpa_neg <- filter(control_vs_acpa_neg, (acpa == 1 &  das28crp < 3.2)  | acpa == 0)
glm_results <- glm(as.formula(formula_string), data = tmp_control_vs_acpa_neg)
print ((coef(summary(glm_results))[,4][2]))

       acpa 
0.006908648 
       acpa 
0.007303031 
     acpa 
0.8146624 
      acpa 
0.02984238 
    acpa 
0.514215 


In [28]:
#control vs acpa pos
control_vs_acpa_pos <- filter(merged_data_df, acpa == 0 | acpa == 1)

#All disease activity.
glm_results <- glm(as.formula(formula_string), data = control_vs_acpa_pos)
print ((coef(summary(glm_results))[,4][2]))

#>5.2 disease activity.
tmp_control_vs_acpa_pos <- filter(control_vs_acpa_pos, (acpa == 1 & das28crp > 5.1)| acpa == 0)
glm_results <- glm(as.formula(formula_string), data = tmp_control_vs_acpa_pos)
print ((coef(summary(glm_results))[,4][2]))

#>5.1 < 3.2 disease activity.
tmp_control_vs_acpa_pos <- filter(control_vs_acpa_pos, (acpa == 1 & das28crp > 3.2 & das28crp < 5.1)  | acpa == 0)
glm_results <- glm(as.formula(formula_string), data = tmp_control_vs_acpa_pos)
print ((coef(summary(glm_results))[,4][2]))

#< 3.2 disease activity.

tmp_control_vs_acpa_pos <- filter(control_vs_acpa_pos, (acpa == 1 &  das28crp > 3.2)  | acpa == 0)
glm_results <- glm(as.formula(formula_string), data = tmp_control_vs_acpa_pos)
print ((coef(summary(glm_results))[,4][2]))

#< 3.2 disease activity.
tmp_control_vs_acpa_pos <- filter(control_vs_acpa_pos, (acpa == 1 &  das28crp < 3.2)  | acpa == 0)
glm_results <- glm(as.formula(formula_string), data = tmp_control_vs_acpa_pos)
print ((coef(summary(glm_results))[,4][2]))


     acpa 
0.3049276 
     acpa 
0.5596848 
     acpa 
0.5146207 
     acpa 
0.4380508 
     acpa 
0.8310324 


In [29]:
formula_string <- "`bilirubin (Z,Z)` ~ acpa + pred + all_csdmard"
acpa_neg_vs_acpa_pos <- filter(merged_data_df, acpa == 1 | acpa == 2)
acpa_neg_vs_acpa_pos$acpa[acpa_neg_vs_acpa_pos$acpa == 1] <- 0   #changing class 1 -> 0
acpa_neg_vs_acpa_pos$acpa[acpa_neg_vs_acpa_pos$acpa == 2] <- 1   #changing class 2 -> 1

#All disease activity.
glm_results <- glm(as.formula(formula_string), data = acpa_neg_vs_acpa_pos)
print ((coef(summary(glm_results))[,4][2]))

#>5.2 disease activity.
tmp_acpa_neg_vs_acpa_pos <- filter(acpa_neg_vs_acpa_pos, das28crp > 5.1)
glm_results <- glm(as.formula(formula_string), data = tmp_acpa_neg_vs_acpa_pos)
print ((coef(summary(glm_results))[,4][2]))

#>5.1 < 3.2 disease activity.
tmp_acpa_neg_vs_acpa_pos <- filter(acpa_neg_vs_acpa_pos, (das28crp > 3.2 & das28crp < 5.1))
glm_results <- glm(as.formula(formula_string), data = tmp_acpa_neg_vs_acpa_pos)
print ((coef(summary(glm_results))[,4][2]))


#> 3.2 disease activity.
tmp_acpa_neg_vs_acpa_pos <- filter(acpa_neg_vs_acpa_pos, das28crp > 3.2)
glm_results <- glm(as.formula(formula_string), data = tmp_acpa_neg_vs_acpa_pos)
print ((coef(summary(glm_results))[,4][2]))


#< 3.2 disease activity.
tmp_acpa_neg_vs_acpa_pos <- filter(acpa_neg_vs_acpa_pos, das28crp < 3.2)
glm_results <- glm(as.formula(formula_string), data = tmp_acpa_neg_vs_acpa_pos)
print ((coef(summary(glm_results))[,4][2]))



     acpa 
0.3265701 
     acpa 
0.8005021 
      acpa 
0.02431009 
      acpa 
0.04100682 
     acpa 
0.7633891 
