In [1]:
import os
import pandas as pd
import numpy as np
# fdr correction
from statsmodels.stats.multitest import multipletests

In [2]:
def read_ldsc_res(pheno1, phenos, res_path):
    results = []
    header = None
    for i, p in enumerate(phenos):
        print('reading genetic correlation results', pheno1, p)
        # file path
        file_path = os.path.join(res_path, f'{p}_{pheno1}_rg.log')
        with open(file_path, 'r') as f:
            lines = f.readlines()
            if i == 0:
                # line 61 as header
                headline = lines[-5]
                # remove '\n'
                headline = headline.strip()
            # read line 62
            resline = lines[-4]
            # remove '\n'
            resline = resline.strip()

        # split
        header = headline.split()
        res = resline.split()
        # repalce 'NA' with np.nan
        res = [np.nan if x == 'NA' else x for x in res]
        results.append(res)

    # convert to dataframe, set column type: first two columns as string, others as float
    df = pd.DataFrame(results, columns=header)
    # set first two columns as string, others as float
    df.iloc[:, 0:2] = df.iloc[:, 0:2].astype(str)
    df.iloc[:, 2:] = df.iloc[:, 2:].astype(float)
    return df

In [3]:
df_dis_summary = pd.read_csv('gwas_data/cvd_gwas.csv')
diseases = df_dis_summary['pheno'].tolist()

In [4]:
df_disease = read_ldsc_res('ecg_noninsonmia', diseases, res_path='results/ldsc/')

reading genetic correlation results ecg_noninsonmia Atrial_fibrillation
reading genetic correlation results ecg_noninsonmia Angina
reading genetic correlation results ecg_noninsonmia Chronic_artery_disease
reading genetic correlation results ecg_noninsonmia Heart_failure
reading genetic correlation results ecg_noninsonmia Stroke
reading genetic correlation results ecg_noninsonmia Peripheral_artery_disease


In [5]:
# rename columns
# copy
df_disease['path1'] = df_disease['p1']
df_disease['path2'] = df_disease['p2']
# replace
df_disease['p1'] = 'T+'
df_disease['p2'] = df_disease['path2'].apply(lambda x: x.split('/')[-1].split('.')[0])
# drop path1 and path2
df_disease.drop(['path1', 'path2'], axis=1, inplace=True)
# nan to 1
df_disease.fillna({'p': 1}, inplace=True)

  df_disease['p'] = df_disease['p'].copy().fillna(1)


In [6]:
# fdr correction for each subtype
df_disease['P_FDR'] = multipletests(df_disease['p'].values, method='fdr_bh')[1]
df_disease['Sig_note_FDR'] = df_disease['P_FDR'].apply(lambda x: 'p.adj < 0.05' if x < 0.05 else 'NS' )
# save
df_disease.to_csv('results/rg_cvds.csv', index=False, na_rep='NA')