# Collating LD for all loci

In [1]:
import pandas as pd
import os

In [4]:
# Set the directory path
directory = '/Output/TOPmed_LD/'

# List all files with .ld extension
files = [f for f in os.listdir(directory) if f.endswith('.ld')]

# Read the first file
column_names = ['CHR_A', 'BP_A', 'SNP_A', 'CHR_B', 'BP_B', 'SNP_B', 'R2'] 
dat_ld = pd.read_csv(os.path.join(directory, files[0]), sep="\s+", header=0, names=column_names)

# Append other files
for file in files[1:]:
    temp_df = pd.read_csv(os.path.join(directory, file), sep="\s+", header=0, names=column_names)
    dat_ld = pd.concat([dat_ld, temp_df])

# Write the concatenated dataframe to a file
dat_ld.to_csv("/Output/collated_ld_all_pheno_all_loci.txt", index=False, sep="\t")

# Read in all lead snps
pheno_file = pd.read_csv("/Input/All_pheno_all_loci_topmed_list_38.csv")

# Drop specific columns
pheno_file.drop(columns=["CHROM", "GENPOS", "ALLELE1", "ALLELE0", "A1FREQ", "MAF", "INFO", "BETA", "SE", "P"], inplace=True)

# Rename columns
dat_ld.rename(columns={'SNP_A': 'ID', 'SNP_B':'SNP'}, inplace=True)

# Merge dat_ld and pheno_file
ld_pheno = pd.merge(dat_ld, pheno_file, on='ID', how='left')

# Rename columns
ld_pheno.rename(columns={'ID': 'lead_snp', 'SNP_B': 'SNP'}, inplace=True)

# Drop specific columns
ld_pheno.drop(columns=["CHR_A", "BP_A", "CHR_B", "BP_B", 'locus38', 'locus37'], inplace=True)

In [5]:
traits = ["RVEDV", "RVESV", "RVSV", "RVEF", "RV_LV_ratio", "RVEDV_BSA", "RVESV_BSA", "RVSV_BSA"]

for trait in traits:
    # Filter for the current trait
    trait_pheno = ld_pheno[ld_pheno['Phenotype'] == trait]

    gwas_file = f'/Output/{trait}_GWAS_38_37_rsid.txt'
    gwas = pd.read_csv(gwas_file, sep="\t")
    #gwas.rename(columns={'ID': 'SNP'}, inplace=True)

    print(f"Merging {trait} GWAS with LD data...")
    # Merge with ld_pheno
    ld_merged = pd.merge(trait_pheno, gwas, on='SNP', how='left')

    # Write output to a file
    output_file = f'/Output/{trait}_38_loci_with_ld.txt'
    ld_merged.to_csv(output_file, index=False, sep="\t")


Merging RVEDV GWAS with LD data...
Merging RVESV GWAS with LD data...
Merging RVSV GWAS with LD data...
Merging RVEF GWAS with LD data...
Merging RV_LV_ratio GWAS with LD data...
Merging RVEDV_BSA GWAS with LD data...
Merging RVESV_BSA GWAS with LD data...
Merging RVSV_BSA GWAS with LD data...


In [6]:
# Combine all traits
all_traits = pd.DataFrame()


In [7]:
for trait in traits:
    input_file = f'/Output/{trait}_38_loci_with_ld.txt'
    trait_data = pd.read_csv(input_file, sep="\t")
    all_traits = pd.concat([all_traits, trait_data], ignore_index=True, sort=False)

# Removing rows of SNPs in LD but not in GWAS
all_traits = all_traits[all_traits['CHROM'].notna()]
all_traits = all_traits.sort_values(by='CHROM')

# Write the combined data to a final file
final_output_file = '/Output/all_RV_traits_loci_38_with_ld.txt'
all_traits.to_csv(final_output_file, index=False, sep="\t")

In [8]:
all_traits

Unnamed: 0,lead_snp,SNP,R2,Phenotype,Locus_n,Locus_name,CHROM,GENPOS,ALLELE0,ALLELE1,...,TEST,BETA,SE,CHISQ,LOG10P,P,MAF,locus38,locus37,rsid
34295,1:228369087:C:T,1:228269335:T:A,0.117192,RVEF,2,OBSCN,1.0,228269335.0,T,A,...,ADD,-0.025964,0.011492,5.10443,1.62224,2.386492e-02,0.071729,chr1:228269335,1:228457036,rs3738684
73106,1:16023533:A:G,1:16010757:C:T,0.220987,RVESV_BSA,2,CLCNKA,1.0,16010757.0,C,T,...,ADD,0.016001,0.009716,2.71205,1.00177,9.959327e-02,0.102461,chr1:16010757,1:16337252,rs1739836
73105,1:16023533:A:G,1:16010159:G:A,0.291568,RVESV_BSA,2,CLCNKA,1.0,16010159.0,G,A,...,ADD,0.010683,0.006007,3.16260,1.12296,7.534250e-02,0.393320,chr1:16010159,1:16336654,rs11578845
73104,1:16023533:A:G,1:16009332:T:C,0.290947,RVESV_BSA,2,CLCNKA,1.0,16009332.0,T,C,...,ADD,0.010473,0.006007,3.03967,1.09016,8.125311e-02,0.393303,chr1:16009332,1:16335827,rs34840873
73103,1:16023533:A:G,1:16009032:G:C,0.890923,RVESV_BSA,2,CLCNKA,1.0,16009032.0,G,C,...,ADD,0.036354,0.006151,34.92610,8.46538,3.424680e-09,0.345774,chr1:16009032,1:16335527,rs1763610
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67169,22:23836092:G:A,22:23742246:C:CA,0.189349,RVESV_BSA,34,DERL3,22.0,23742246.0,C,CA,...,ADD,-0.035215,0.010467,11.31860,3.11501,7.673438e-04,0.086636,chr22:23742246,22:24084433,
67170,22:23836092:G:A,22:23743667:G:T,0.190204,RVESV_BSA,34,DERL3,22.0,23743667.0,G,T,...,ADD,-0.035049,0.010400,11.35700,3.12399,7.516402e-04,0.087204,chr22:23743667,22:24085854,rs2067621
67171,22:23836092:G:A,22:23745727:G:C,0.188581,RVESV_BSA,34,DERL3,22.0,23745727.0,G,C,...,ADD,-0.034494,0.010401,10.99840,3.04004,9.119268e-04,0.087405,chr22:23745727,22:24087914,rs5996607
67357,22:23836092:G:A,22:23820814:G:A,0.529181,RVESV_BSA,34,DERL3,22.0,23820814.0,G,A,...,ADD,-0.045858,0.009116,25.30730,6.31082,4.888549e-07,0.116625,chr22:23820814,22:24163001,rs9608195
