In [13]:
import pandas as pd
from scipy.stats import spearmanr

In [2]:
def get_sig_feature_list(sig_file):
    sig_file_df = pd.read_csv(sig_file, sep="\t")
    subset_df = sig_file_df[(sig_file_df["adj_pval"] < 0.01) & (sig_file_df["cohen_d"].abs() > 0.5)]
    feature_list = list(subset_df["feature"])
    
    return feature_list

In [25]:
def spearman_test(sig_list, data_df, output_file):
    # Filter columns to those in prot_acpa_neg_list and present in proteomics_df
    features = [col for col in sig_list if col in data_df.columns]

    results = []
    for feature in features:
        corr, pval = spearmanr(data_df['das28crp'], data_df[feature], nan_policy='omit')
        results.append({'feature': feature, 'spearman_corr': corr, 'p_value': pval})

    # Convert results to DataFrame for easy viewing
    spearman_df = pd.DataFrame(results)
    spearman_df.to_csv(output_file, sep="\t", index=False)


In [26]:
proteomics_file = "/Users/m221138/RA_ACPA_multiomics/preprocessed_data/proteomics/proteomics.patient_info.tsv"
proteomics_df = pd.read_csv(proteomics_file, sep="\t")

metabolomics_file = "/Users/m221138/RA_ACPA_multiomics/preprocessed_data/metabolomics/metabolites.patient_info.tsv"
metabolomics_df = pd.read_csv(metabolomics_file, sep="\t")

In [27]:
prot_acpa_neg_list = get_sig_feature_list("/Users/m221138/RA_ACPA_multiomics/analysis/statistics/linear_regression/proteomics/linear_regression.cVSneg.proteomics.tsv")
print (len(prot_acpa_neg_list))

prot_acpa_pos_list = get_sig_feature_list("/Users/m221138/RA_ACPA_multiomics/analysis/statistics/linear_regression/proteomics/linear_regression.cVSpos.proteomics.tsv")
print (len(prot_acpa_pos_list))

73
18


In [28]:
met_acpa_neg_list = get_sig_feature_list("/Users/m221138/RA_ACPA_multiomics/analysis/statistics/linear_regression/metabolomics/linear_regression.cVSneg.metabolomics.tsv")
print (len(met_acpa_neg_list))

met_acpa_pos_list = get_sig_feature_list("/Users/m221138/RA_ACPA_multiomics/analysis/statistics/linear_regression/metabolomics/linear_regression.cVSpos.metabolomics.tsv")
print (len(met_acpa_pos_list))

24
6


In [29]:
spearman_test(prot_acpa_neg_list, proteomics_df, "/Users/m221138/RA_ACPA_multiomics/revision_analysis/spearman.sig.proteomics.cVSneg.tsv")
spearman_test(prot_acpa_pos_list, proteomics_df, "/Users/m221138/RA_ACPA_multiomics/revision_analysis/spearman.sig.proteomics.cVSpos.tsv")

In [30]:
spearman_test(met_acpa_neg_list, metabolomics_df, "/Users/m221138/RA_ACPA_multiomics/revision_analysis/spearman.sig.metabolomics.cVSneg.tsv")
spearman_test(met_acpa_pos_list, metabolomics_df, "/Users/m221138/RA_ACPA_multiomics/revision_analysis/spearman.sig.metabolomics.cVSpos.tsv")