In [1]:
#01_DifferentialAbundance_and_cohens_D
#
#use linear model to identify differentially abundant features (for each omics)
#[1] features (or predictors) that have significant coefficients (p < 0.05; P < 0.01)
#           -> whether ACPA status (predictor) has affected the abundance of the feature (response).
#[2] features Cohend's D above medium (i.e., 0.5)
#features that fulfills [1] + [2] will considerexd as differentially abundant.

library("effsize")
library(lme4)
library(lmerTest)
library(stringr)
library(effects)
library(dplyr)


Loading required package: Matrix


Attaching package: ‘lmerTest’


The following object is masked from ‘package:lme4’:

    lmer


The following object is masked from ‘package:stats’:

    step


Loading required package: carData

lattice theme set by effectsTheme()
See ?effectsTheme for details.


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [16]:
main <- function(input_data_file, output_dir, output_str){
    
    input_data_df <- read.csv(input_data_file, sep="\t", header=TRUE, row.names=1)
    merged_data_df <- as.data.frame(t(input_data_df))

    #young vs old
    young_vs_old_df <- filter(merged_data_df, age_status == 1 | age_status == 0)

    run_cohenD_and_glm(young_vs_old_df, 1, 0, output_dir, output_str, '.youngVSold')
    
    return (young_vs_old_df)
}

run_cohenD_and_glm <- function(data_df, condition_a_num, condition_b_num, output_dir, output_str, output_type){
    
    NUM_FEATURES <- ncol(data_df)
    temp_condition_a_df <- filter(data_df, age_status == condition_a_num) #this is necessary for cohends D
    temp_condition_b_df <- filter(data_df, age_status == condition_b_num) #this is necessary for cohends D   
    
    output_txt <- paste(output_dir,output_str,output_type, '.tsv', sep="") 
    if (file.exists(output_txt)) {
        #Delete file if it exists
        file.remove(output_txt)
    }

    output_string <- "\tcoef\tcohenD\tpval\n"
    cat(output_string, file=output_txt, append=TRUE)
    
    for (i in 1:NUM_FEATURES){
        if (i > 3){
            
            feature <- colnames(data_df)[i]
            # print (feature)
            
            #calculate cohens D between two population
            condition_a_list <- temp_condition_a_df[,i]
            condition_b_list <- temp_condition_b_df[,i]
            cohend = cohen.d(condition_a_list,condition_b_list)
            # cohend_value <- abs(cohend$estimate)
            cohend_value <- cohend$estimate
  
            #calculate the significance of the linear model
            glm_results <- glm(data_df[,i] ~ data_df[,1])
            feature_coef <- (coef(summary(glm_results))[,1][2])
            feature_pval <- (coef(summary(glm_results))[,4][2])

            output_string <- paste(feature, "\t", feature_coef, "\t", cohend_value,"\t", feature_pval, "\n", sep="")
            cat(output_string, file=output_txt,append=TRUE)
            # break
        } 
    }
}


In [17]:
#Main
output_dir = '../../../analysis/age_stratified/differential_abundance_acpa_pos_specific/'

#make directory if it does not exist
if (!dir.exists(output_dir)){
dir.create(output_dir)
} else {
    print("Dir already exists!")
}

data_file = '../../../preprocessed_data/age_stratified/high_low_age_3_omics.acpa_pos.tsv'



[1] "Dir already exists!"


In [18]:
temp_df = main(data_file, output_dir, 'high_low_acpa_pos')


[1] "aa_A1BG"
[1] "aa_AAK1"
[1] "aa_AARS"
[1] "aa_ABCF1"
[1] "aa_ABCF3"
[1] "aa_ABI1"
[1] "aa_ABL2"
[1] "aa_ABT1"
[1] "aa_ACAT2"
[1] "aa_ACPP"
[1] "aa_ACRBP"
[1] "aa_ACSL6"
[1] "aa_ACTA1"
[1] "aa_ACTB"
[1] "aa_ACTL7B"
[1] "aa_ACTL8"
[1] "aa_ACTR2"
[1] "aa_ACVR1"
[1] "aa_ACVR1C"
[1] "aa_ACVR2A"
[1] "aa_ADAM2"
[1] "aa_ADAM29"
[1] "aa_ADCK1"
[1] "aa_ADD1"
[1] "aa_ADSL"
[1] "aa_AFF4"
[1] "aa_AFP"
[1] "aa_AGR2"
[1] "aa_AHSG"
[1] "aa_AICDA"
[1] "aa_AIF1"
[1] "aa_AIFM2"
[1] "aa_AIFM3"
[1] "aa_AIM2"
[1] "aa_AK1"
[1] "aa_AK2"
[1] "aa_AK3"
[1] "aa_AK4"
[1] "aa_AK7"
[1] "aa_AK9"
[1] "aa_AKAP3"
[1] "aa_AKAP4"
[1] "aa_AKT1"
[1] "aa_AKT3"
[1] "aa_ALDOA"
[1] "aa_ALOX15"
[1] "aa_ALOX15B"
[1] "aa_ALPK1"
[1] "aa_ALX1"
[1] "aa_AMD1"
[1] "aa_AMOTL2"
[1] "aa_AMT"
[1] "aa_ANKRD13A"
[1] "aa_ANKRD45"
[1] "aa_ANXA1"
[1] "aa_ANXA11"
[1] "aa_ANXA2"
[1] "aa_AP2M1"
[1] "aa_APCS"
[1] "aa_APEX1"
[1] "aa_API5"
[1] "aa_APOBEC3G"
[1] "aa_APP"
[1] "aa_APPL1"
[1] "aa_AR"
[1] "aa_ARAF"
[1] "aa_AREG"
[1] "aa_ARHGDIA"
[1] "

In [9]:
temp_df

Unnamed: 0_level_0,acpa,age_status,age,aa_A1BG,aa_AAK1,aa_AARS,aa_ABCF1,aa_ABCF3,aa_ABI1,aa_ABL2,⋯,p_YIPF6_9984-12,p_NPW_9986-14,p_LRRC25_9987-30,p_LRRC24_9989-12,p_EMILIN3_9991-112,p_ZNF264_9993-11,p_ATP4B_9994-217,p_DUT_9995-6,p_UBXN4_9997-12,p_IRF6_9999-1
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
X20233007136,1,1,62.7,980.8549,2764.783,1115.9451,1011.8503,1282.856,847.4833,2497.549,⋯,485.9,2953.5,604.1,403.4,470.0,1286.6,1398.2,10118.2,56309.4,6734.1
X1621002555,1,1,63.0,1284.733,3594.265,1368.8958,1072.7819,1153.378,759.1097,2666.783,⋯,577.1,10869.9,736.3,333.9,602.7,1431.1,3779.6,8640.5,46814.4,6171.2
X1525703721,1,1,64.0,1021.084,2349.677,1263.0576,1402.9736,1366.921,1022.1514,1732.976,⋯,513.9,3550.0,495.2,424.0,1096.7,1006.5,1288.2,4394.6,30865.7,3425.3
X1608103517,1,1,64.0,1049.9396,4969.359,1207.7118,1175.3222,3673.061,794.5569,2041.562,⋯,540.9,6360.3,606.8,326.5,540.6,1250.1,1503.3,18961.1,66658.4,7210.4
X1511103353,1,1,65.0,1170.8549,2448.91,1202.1306,1039.8472,1214.046,776.3493,1911.231,⋯,528.5,1858.6,582.3,387.4,1282.5,1082.8,1810.0,8035.9,38892.3,3852.5
X21027004278,1,1,65.6,1043.125,2014.145,1039.059,992.3479,1135.08,750.0847,2079.192,⋯,561.6,1488.1,618.8,354.4,753.0,1274.1,1358.4,10326.8,44826.3,6742.6
X1509707363,1,1,66.0,945.7521,2359.74,1246.9361,1116.8816,1759.403,886.3257,2379.71,⋯,563.6,1825.8,492.8,372.7,461.1,922.2,1250.8,7787.8,35781.9,3456.5
X1428003562,1,1,66.0,1080.4042,2875.147,990.1382,964.1451,1837.212,780.3865,1697.315,⋯,555.6,483.9,578.4,356.9,7888.2,1181.3,1322.2,5838.0,32343.3,4991.7
X1518106720,1,1,67.0,1091.3118,2637.044,1184.8375,1623.0896,1483.006,796.1674,2126.128,⋯,533.0,3889.5,677.8,486.7,1300.8,1429.4,2033.0,8586.1,32583.9,3502.6
X1509707315,1,1,67.0,1063.059,2853.759,1152.6243,1029.6865,1697.315,815.6625,2201.847,⋯,509.9,3761.5,548.9,365.3,3027.1,1006.9,1232.4,7158.0,33502.1,3883.8
