In [8]:
#02_report_differential_abundance_adjusting_drug.ipynb
#
#Purpose: Organize the output from 01_DifferentialAbundance_and_cohens_D_adjust_effect
#

import pandas as pd
import seaborn as sns

In [9]:
def main(data_dir, omics_type, p_threshold, data_dict):
    
    condition_list = ['cVSneg','cVSpos','negVSpos']
    
    for condition_type in condition_list:

        print (">>>>>>>> %s <<<<<<<<" % condition_type)
        data_file = "%s/%s.%s.tsv" % (data_dir, omics_type, condition_type)
        data_df = pd.read_csv(data_file, sep="\t", index_col=0)
        
        mask = (data_df['pval'] < p_threshold)
        subset_df = data_df[mask]
        marginal_model_list = list(subset_df.index.values)

        print ("marginal pval < %s" % p_threshold)
        print ("log2fc > 0:", (subset_df['fc_case_control'] > 0).sum())
        print ("log2fc < 0:", (subset_df['fc_case_control'] < 0).sum())

        print ("---------")
        mask = (data_df['all_adj_pval'] < p_threshold) 
        subset_df = data_df[mask]
        all_adj_model_cohenD_list = list(subset_df.index.values)

        print ("all adj pval < %s" % p_threshold)
        print ("log2fc > 0:", (subset_df['fc_case_control'] > 0).sum())
        print ("log2fc < 0:", (subset_df['fc_case_control'] < 0).sum())
        
        print("\nMarginal P < %s: %s" % (p_threshold, len(marginal_model_list)))
        print("Adjusting ALL < %s: %s" % (p_threshold, len(all_adj_model_cohenD_list)))

        data_dict[omics_type, condition_type, "marginal", "NO_cohenD", p_threshold] = len(marginal_model_list)
        data_dict[omics_type, condition_type, "adjust_all", "NO_cohenD", p_threshold] = len(all_adj_model_cohenD_list)
        
    return data_dict



In [10]:
def main2(data_dir, omics_type, p_threshold, data_dict):
    condition_list = ['cVSneg','cVSpos','negVSpos']
    
    for condition_type in condition_list:

        print (">>>>>>>> %s <<<<<<<<" % condition_type)
        data_file = "%s/%s.%s.tsv" % (data_dir, omics_type, condition_type)
        data_df = pd.read_csv(data_file, sep="\t", index_col=0)
        
        mask = (data_df['pval'] < p_threshold) & (abs(data_df['cohenD']) > 0.5)
        subset_df = data_df[mask]
        marginal_model_list = list(subset_df.index.values)

        print ("marginal pval < %s" % p_threshold)
        print ("log2fc > 0:", (subset_df['fc_case_control'] > 0).sum())
        print ("log2fc < 0:", (subset_df['fc_case_control'] < 0).sum())

        print ("---------")
        mask = (data_df['all_adj_pval'] < p_threshold) & (abs(data_df['cohenD']) > 0.5)
        subset_df = data_df[mask]
        all_adj_model_cohenD_list = list(subset_df.index.values)

        print ("all adj pval < %s" % p_threshold)
        print ("log2fc > 0:", (subset_df['fc_case_control'] > 0).sum())
        print ("log2fc < 0:", (subset_df['fc_case_control'] < 0).sum())
        
        print("\nMarginal P < %s: %s" % (p_threshold, len(marginal_model_list)))
        print("Adjusting ALL < %s: %s" % (p_threshold, len(all_adj_model_cohenD_list)))

        data_dict[omics_type, condition_type, "marginal", "with_cohenD", p_threshold] = len(marginal_model_list)
        data_dict[omics_type, condition_type, "adjust_all", "with_cohenD", p_threshold] = len(all_adj_model_cohenD_list)

    return data_dict


In [11]:
data_dir = '../../../analysis/statistics/linear_model/differential_abundance_logit'

omics_list = ['metabolomics','autoantibody','proteomics']
data_dict = {}

data_dict = main(data_dir, 'proteomics', 0.05, data_dict)
data_dict = main2(data_dir, 'proteomics', 0.05, data_dict)
data_dict = main(data_dir, 'proteomics', 0.01, data_dict)
data_dict = main2(data_dir, 'proteomics', 0.01, data_dict)

>>>>>>>> cVSneg <<<<<<<<
marginal pval < 0.05
log2fc > 0: 104
log2fc < 0: 196
---------
all adj pval < 0.05
log2fc > 0: 872
log2fc < 0: 379

Marginal P < 0.05: 300
Adjusting ALL < 0.05: 1251
>>>>>>>> cVSpos <<<<<<<<
marginal pval < 0.05
log2fc > 0: 128
log2fc < 0: 198
---------
all adj pval < 0.05
log2fc > 0: 218
log2fc < 0: 122

Marginal P < 0.05: 326
Adjusting ALL < 0.05: 340
>>>>>>>> negVSpos <<<<<<<<
marginal pval < 0.05
log2fc > 0: 90
log2fc < 0: 87
---------
all adj pval < 0.05
log2fc > 0: 83
log2fc < 0: 56

Marginal P < 0.05: 177
Adjusting ALL < 0.05: 139
>>>>>>>> cVSneg <<<<<<<<
marginal pval < 0.05
log2fc > 0: 71
log2fc < 0: 128
---------
all adj pval < 0.05
log2fc > 0: 40
log2fc < 0: 69

Marginal P < 0.05: 199
Adjusting ALL < 0.05: 109
>>>>>>>> cVSpos <<<<<<<<
marginal pval < 0.05
log2fc > 0: 73
log2fc < 0: 124
---------
all adj pval < 0.05
log2fc > 0: 25
log2fc < 0: 15

Marginal P < 0.05: 197
Adjusting ALL < 0.05: 40
>>>>>>>> negVSpos <<<<<<<<
marginal pval < 0.05
log2fc > 0

In [12]:
data_dict = main(data_dir, 'metabolomics', 0.05, data_dict)
data_dict = main2(data_dir, 'metabolomics', 0.05, data_dict)
data_dict = main(data_dir, 'metabolomics', 0.01, data_dict)
data_dict = main2(data_dir, 'metabolomics', 0.01, data_dict)

>>>>>>>> cVSneg <<<<<<<<
marginal pval < 0.05
log2fc > 0: 26
log2fc < 0: 67
---------
all adj pval < 0.05
log2fc > 0: 36
log2fc < 0: 64

Marginal P < 0.05: 93
Adjusting ALL < 0.05: 100
>>>>>>>> cVSpos <<<<<<<<
marginal pval < 0.05
log2fc > 0: 19
log2fc < 0: 65
---------
all adj pval < 0.05
log2fc > 0: 10
log2fc < 0: 41

Marginal P < 0.05: 84
Adjusting ALL < 0.05: 51
>>>>>>>> negVSpos <<<<<<<<
marginal pval < 0.05
log2fc > 0: 10
log2fc < 0: 28
---------
all adj pval < 0.05
log2fc > 0: 9
log2fc < 0: 32

Marginal P < 0.05: 38
Adjusting ALL < 0.05: 41
>>>>>>>> cVSneg <<<<<<<<
marginal pval < 0.05
log2fc > 0: 21
log2fc < 0: 53
---------
all adj pval < 0.05
log2fc > 0: 12
log2fc < 0: 22

Marginal P < 0.05: 74
Adjusting ALL < 0.05: 34
>>>>>>>> cVSpos <<<<<<<<
marginal pval < 0.05
log2fc > 0: 18
log2fc < 0: 52
---------
all adj pval < 0.05
log2fc > 0: 5
log2fc < 0: 16

Marginal P < 0.05: 70
Adjusting ALL < 0.05: 21
>>>>>>>> negVSpos <<<<<<<<
marginal pval < 0.05
log2fc > 0: 5
log2fc < 0: 22
--

In [13]:
data_dict = main(data_dir, 'autoantibody', 0.05, data_dict)
data_dict = main2(data_dir, 'autoantibody', 0.05, data_dict)
data_dict = main(data_dir, 'autoantibody', 0.01, data_dict)
data_dict = main2(data_dir, 'autoantibody', 0.01, data_dict)

>>>>>>>> cVSneg <<<<<<<<
marginal pval < 0.05
log2fc > 0: 5
log2fc < 0: 8
---------
all adj pval < 0.05
log2fc > 0: 34
log2fc < 0: 86

Marginal P < 0.05: 13
Adjusting ALL < 0.05: 120
>>>>>>>> cVSpos <<<<<<<<
marginal pval < 0.05
log2fc > 0: 36
log2fc < 0: 24
---------
all adj pval < 0.05
log2fc > 0: 36
log2fc < 0: 34

Marginal P < 0.05: 60
Adjusting ALL < 0.05: 70
>>>>>>>> negVSpos <<<<<<<<
marginal pval < 0.05
log2fc > 0: 86
log2fc < 0: 63
---------
all adj pval < 0.05
log2fc > 0: 81
log2fc < 0: 36

Marginal P < 0.05: 149
Adjusting ALL < 0.05: 117
>>>>>>>> cVSneg <<<<<<<<
marginal pval < 0.05
log2fc > 0: 2
log2fc < 0: 3
---------
all adj pval < 0.05
log2fc > 0: 1
log2fc < 0: 2

Marginal P < 0.05: 5
Adjusting ALL < 0.05: 3
>>>>>>>> cVSpos <<<<<<<<
marginal pval < 0.05
log2fc > 0: 17
log2fc < 0: 17
---------
all adj pval < 0.05
log2fc > 0: 0
log2fc < 0: 3

Marginal P < 0.05: 34
Adjusting ALL < 0.05: 3
>>>>>>>> negVSpos <<<<<<<<
marginal pval < 0.05
log2fc > 0: 60
log2fc < 0: 42
--------

In [14]:
data_df = pd.DataFrame(list(data_dict.items()), columns=['Column1', 'DA'])
data_df[['omics', 'disease_compare', 'model_type', 'CohenD', 'P-value']] = pd.DataFrame(data_df['Column1'].tolist(), index=data_df.index)
data_df = data_df[['omics', 'disease_compare', 'model_type', 'CohenD', 'P-value', 'DA']]
data_df.to_csv('/Users/m221138/RA_ACPA_multiomics/analysis/statistics/linear_model/differential_abundance_logit/data_investigation.logit.csv', index=False)