In [82]:
import pandas as pd
from matplotlib_venn import venn2
import matplotlib.pyplot as plt
import seaborn as sns

In [93]:
def get_sig_data_dict(data_file):
    
    data_dict = {}
    data_p_dict = {'up': [], 'down': []}
    data_m_dict = {'up': [], 'down': []}
    data_aa_dict = {'up': [], 'down': []}
    etc_dict = {'up': [], 'down': []}
    
    data_df = pd.read_csv(data_file, sep = "\t", index_col = 0)
    r, c = data_df.shape
    feature_list = data_df.index.values
    
    for i in range(r):
        feature = feature_list[i]
        cohen_d = data_df.iloc[i][1]
        pval = data_df.iloc[i][2]
        
        if pval < 0.01 and cohen_d > 0.5:
            data_dict[feature] = [cohen_d, pval]
            
            if i < 5:
                etc_dict['up'].append(feature)
            elif 'p_' in feature:
                data_p_dict['up'].append(feature)                
            elif 'aa_' in feature:
                data_aa_dict['up'].append(feature)
            else:
                data_m_dict['up'].append(feature)                
            
        if pval < 0.01 and cohen_d < -0.5:
            data_dict[feature] = [cohen_d, pval]
            
            if i < 5:
                etc_dict['down'].append(feature)                            
            elif 'p_' in feature:
                data_p_dict['down'].append(feature)                
            elif 'aa_' in feature:
                data_aa_dict['down'].append(feature)
            else:
                data_m_dict['down'].append(feature)
    
    return data_dict, data_p_dict, data_m_dict, data_aa_dict, etc_dict
    
def make_venndiagram(low_omics_dict, high_omics_dict, omics_type, updown_status, comparison_type):
    
    if updown_status == "up":
        num_intersection = len(set(low_omics_dict["up"]) & set(high_omics_dict["up"]))
        num_uniq_in_low_age = len(set(low_omics_dict["up"]) - set(high_omics_dict["up"]))
        num_uniq_in_high_age = len(set(high_omics_dict["up"]) - set(low_omics_dict["up"]))
        
    if updown_status == "down":
        num_intersection = len(set(low_omics_dict["down"]) & set(high_omics_dict["down"]))
        num_uniq_in_low_age = len(set(low_omics_dict["down"]) - set(high_omics_dict["down"]))
        num_uniq_in_high_age = len(set(high_omics_dict["down"]) - set(low_omics_dict["down"]))

    output_pdf = '../../../analysis/age_stratified/differential_abundance/plot/%s.%s.%s.pdf' % (omics_type, comparison_type, updown_status)
    venn2(subsets = (num_uniq_in_low_age, num_intersection, num_uniq_in_high_age), set_labels = ('Under 60', 'Over 60'))
    plt.title('%s:%s, population:under60 vs. over60 \n%s in disease' % (omics_type, comparison_type, updown_status))
    plt.savefig(output_pdf)
    # plt.show()
    plt.clf()

In [94]:
data_dir = '../../../analysis/age_stratified/differential_abundance/'
comparison_list = ['cVSneg','cVSpos','cVSra','negVSpos']

for comparison_type in comparison_list:
    
    high_age_file = '%s60_over.%s.padj.tsv' % (data_dir, comparison_type)
    low_age_file = '%s60_under.%s.padj.tsv' % (data_dir, comparison_type)
    
    high_dict, high_p_dict, high_m_dict, high_aa_dict, high_etc_dict = get_sig_data_dict(high_age_file)
    low_dict, low_p_dict, low_m_dict, low_aa_dict, low_etc_dict = get_sig_data_dict(low_age_file)

    make_venndiagram(low_p_dict, high_p_dict, "protein", "up", comparison_type)
    make_venndiagram(low_m_dict, high_m_dict, "metabolites", "up", comparison_type)
    make_venndiagram(low_aa_dict, high_aa_dict, "autoantibody", "up", comparison_type)
    
    make_venndiagram(low_p_dict, high_p_dict, "protein", "down", comparison_type)
    make_venndiagram(low_m_dict, high_m_dict, "metabolites", "down", comparison_type)
    make_venndiagram(low_aa_dict, high_aa_dict, "autoantibody", "down", comparison_type)



<Figure size 432x288 with 0 Axes>