In [1]:
import os
import glob
import pandas as pd
import liftover
import numpy as np
from biolib import liftover as liftover_utility
os.chdir('/mnt/BioHome/jreyna/jreyna/projects/dchallenge/')

# get the converter from hg38 to hg19
from liftover import get_lifter
lift_converter = get_lifter('hg38', 'hg19', cache='temp')

In [15]:
lift_converter['chr1'][1000000]

[('chr1', 935380, '+')]

## Loading colocalization data

In [2]:
coloc_fns = 'results/main/coloc/Results/eQTL_Catalogue/*/*/*/FINAL_Summary_Coloc_Gene_SNP_Pairs.bed'
coloc_fns = glob.glob(coloc_fns)
coloc_data = []
for fn in coloc_fns:
    tdf = pd.read_table(fn)
    coloc_data.append(tdf)
coloc_data = pd.concat(coloc_data)

In [3]:
# lifting over from hg38 to hg19
lifted_coords = liftover_utility.hg38_to_hg19_vectorized(coloc_data[['chr', 'pos']].values)
coloc_data.loc[:, 'chr'] = lifted_coords[:, 0].astype(str)
coloc_data.loc[:, 'pos'] = lifted_coords[:, 1].astype(int)

In [4]:
all_data = []
for (chrom, chrom_data) in coloc_data.groupby('chr'):
    
    print('Processing {}.'.format(chrom))
    
    meta_fn = '/mnt/BioAdHoc/Groups/vd-vijay/sourya/Projects/2020_IQTL_HiChIP/Data/'
    meta_fn += 'SNPInfo/SNPInfo_merged_tables/snpinfo_{}.txt'.format(chrom) 
    snp_info = pd.read_table(meta_fn, sep=' ')
    merged_df = pd.merge(chrom_data, snp_info, on=['chr', 'pos'], how='left')
    all_data.append(merged_df)


Processing chr1.
Processing chr10.
Processing chr11.
Processing chr12.
Processing chr14.
Processing chr15.
Processing chr16.
Processing chr18.
Processing chr2.
Processing chr21.
Processing chr4.
Processing chr6.
Processing chr8.
Processing chr9.


In [10]:
all_df = pd.concat(all_data)

In [9]:
pd.set_option('display.min_rows', None)

In [11]:
all_df

Unnamed: 0,chr,pos,pp_H0_Coloc_Summary,pp_H1_Coloc_Summary,pp_H2_Coloc_Summary,pp_H3_Coloc_Summary,pp_H4_Coloc_Summary,rs_id,variant_id,geneName,...,slope_se_gwas,pval_nominal,SampleSize,original.name,rsnumber,ref_y,alt_y,AC_y,AF_y,AN_y
0,chr1,64124830,0.000000e+00,0.000000e+00,1.779348e-02,0.172358,0.809848,rs2819176,1:63659159:G:A,ENSG00000185483,...,0.017205,1.140000e-10,520580.0,1:64124830-G-A,rs2819176,G,A,1687,0.336861,5008
1,chr1,113828107,1.711058e-210,3.814479e-208,1.022411e-04,0.021815,0.978083,rs773560,1:113285485:G:A,ENSG00000273483,...,0.015663,4.230000e-27,,rs773560:113828107:G:A,rs773560,G,A,1854,0.370208,5008
2,chr1,192514029,5.657758e-79,4.444076e-78,1.588594e-02,0.123921,0.860193,rs10921191,1:192544899:C:A,ENSG00000232077,...,0.015068,1.940000e-09,520580.0,rs10921191,rs10921191,C,A,2911,0.581270,5008
3,chr1,192515849,2.341411e-39,1.634791e-38,1.695876e-02,0.117542,0.865499,rs10801128,1:192546719:A:G,ENSG00000090104,...,0.015681,8.980000e-10,520580.0,rs10801128:192515849:A:G,rs10801128,A,G,2892,0.577476,5008
4,chr1,113828107,0.000000e+00,0.000000e+00,1.022411e-04,0.021815,0.978083,rs773560,1:113285485:G:A,ENSG00000231128,...,0.015663,4.230000e-27,520580.0,rs773560:113828107:G:A,rs773560,G,A,1854,0.370208,5008
5,chr1,192511104,1.186779e-168,9.321957e-168,1.357045e-02,0.105713,0.880717,rs3886948,1:192541974:T:C,ENSG00000232077,...,0.015087,1.130000e-09,520580.0,rs3886948:192511104:T:C,rs3886948,T,C,2908,0.580671,5008
6,chr1,192514029,8.389343e-20,6.589692e-19,1.588592e-02,0.123921,0.860193,rs10921191,1:192544899:C:A,ENSG00000232077,...,0.015068,1.940000e-09,520580.0,rs10921191,rs10921191,C,A,2911,0.581270,5008
7,chr1,64123158,0.000000e+00,0.000000e+00,1.917969e-02,0.167179,0.813641,rs6670731,1:63657487:C:T,ENSG00000158966,...,0.017239,1.370000e-10,520580.0,1:64123158-C-T,rs6670731,C,T,1545,0.308506,5008
8,chr1,113828107,1.203625e-142,2.683252e-140,1.022411e-04,0.021815,0.978083,rs773560,1:113285485:G:A,ENSG00000273483,...,0.015663,4.230000e-27,520580.0,rs773560:113828107:G:A,rs773560,G,A,1854,0.370208,5008
0,chr10,6098824,1.248598e-04,1.775259e-03,9.870427e-03,0.139489,0.848740,rs706779,10:6056861:T:C,ENSG00000224034,...,0.023800,2.250000e-12,,exm-rs706779,rs706779,T,C,2294,0.458067,5008


In [12]:
snp_info

Unnamed: 0,chr,pos,original.name,rsnumber,ref,alt,AC,AF,AN
0,chr9,10163,9:10163:CT:C,rs557530731,CT,C,15,0.002995,5008
1,chr9,10273,9:10273:AAC:A,rs574047666,AAC,A,4,0.000799,5008
2,chr9,10327,9:10327:T:C,rs535827433,T,C,2,0.000399,5008
3,chr9,10329,9:10329:A:G,rs555670059,A,G,2,0.000399,5008
4,chr9,10362,rs141734683:10362:C:CT,rs141734683,C,CT,3413,0.681510,5008
5,chr9,10469,rs56377469:10469:G:C,rs56377469,G,C,3575,0.713858,5008
6,chr9,10498,9:10498:T:A,rs544446874,T,A,1,0.000200,5008
7,chr9,10513,9:10513:G:A,rs556678566,G,A,2,0.000399,5008
8,chr9,10548,9:10548:G:A,rs578035845,G,A,1,0.000200,5008
9,chr9,10559,9:10559:C:G,rs545457369,C,G,2,0.000399,5008
