# fine-mapping

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# from qmplot import manhattanplot
from bioinfokit import visuz
# from functools import reduce

## Data Preprocessing

In [None]:
# Load the summary statistics
df = pd.read_csv("input/IGAP_stage_1.txt", header=0, sep='\t')

# Add the column of ZScores = Beta / SE
df['Zscore'] = df['Beta'] / df['SE']

# Add the empty column of minor allele frequency
# df['MAF'] = ''

# Reorder the columns
cols = df.columns.tolist()
cols = cols[:3] + [cols[-1]] + cols[3:-1]
df = df[cols]

# Output the dataset in the required form
h = ['CHR', 'BP', 'rsID', 'MAF', 'EA', 'NEA', 'BETA', 'SE', 'P', 'Zscore']
df.to_csv('input/summary.txt', sep='\t', header=h, index=False)

In [54]:
# Load the summary statistics
df = pd.read_csv("input/IGAP_stage_1.txt", header=0, sep='\t')

# Add the column of ZScores = Beta / SE
df['Zscore'] = df['Beta'] / df['SE']

# Add the empty column of minor allele frequency
# df['MAF'] = ''

# Reorder the columns
cols = df.columns.tolist()
cols = cols[:3] + [cols[-1]] + cols[3:-1]
df = df[cols]

# Output the dataset in the required form
h = ['#CHROM', 'POS', 'ID', 'REF', 'ALT', 'BETA', 'SE', 'P', 'Zscore']
df.to_csv('input/variants.txt', sep='\t', header=h, index=False)

In [1]:
# Input sample sizes
sample_size = 17008 + 37154
sample_size

54162

## Commands used to run the programs

In [None]:
# %%bash
# # download programs
# cd bin
# ./00_set_up.sh

# # Compute LD and build the reference panel
# cd ../ref
# mkdir ./ld/vcf
# cp /datasets/cs284-sp21-A00-public/1000Genomes/*.gz ./ld/vcf
# cp /datasets/cs284-sp21-A00-public/1000Genomes/*.tbi ./ld/vcf
# nohup python 01_prepare_reference.py &

# # Run fine-mapping tools
# cd ..
# nohup python fine_map_pipe.py -s 54162 input/summary.txt output &
# mv nohup.out def_param_nohup.out
# mv output/summary_total_credible_set.txt output/def_param_tot_cred_set.txt

# # Run fine-mapping tools with diff params
# nohup python fine_map_pipe.py -s 54162 -n 2 input/summary.txt output &
# mv nohup.out n_2_nohup.out
# mv output/summary_total_credible_set.txt output/n_2_tot_cred_set.txt

## Process the results

In [2]:
## Extact the executation time from the nohup outputs
def extract_exec_time(logs):
    ''' Extract the executation time printed in the nohup log file '''
    exec_time = {'PAINTOR': 0, 'CAVIARBF': 0, 'FINEMAP': 0}
    with open(logs, 'r') as infile:
        for line in infile:
            if line.startswith('---'):
                _, prog, _, _, time, _ = line.strip().split()
                exec_time[prog] += int(time[:-1])
    return exec_time

In [7]:
exec_time = extract_exec_time("output/def_param_nohup.out")
print("Execution time: PAINTOR = {} s, CAVIARBF = {} s, FINEMAP = {} s\n".format(*exec_time.values()))

# Use FINEMAP runtime as a reference
print("Relative exec time: PAINTOR = {}, CAVIARBF = {}, FINEMAP = {}".format(*np.array(list(exec_time.values()))/exec_time['FINEMAP']))

Execution time: PAINTOR = 1094 s, CAVIARBF = 810 s, FINEMAP = 335 s

Relative exec time: PAINTOR = 3.265671641791045, CAVIARBF = 2.417910447761194, FINEMAP = 1.0


In [2]:
## Load the final results (credible sets)
res = pd.read_csv("output/def_param_tot_cred_set.txt", sep='\t', header=0)
res = res.sort_values(['FINEMAP'], ascending=False)
res['cred_set'] = -1
print(res.shape)
res.head(10)

(6684, 16)


Unnamed: 0,CHR,BP,rsID,MAF,EA,NEA,BETA,SE,P,Zscore,PAINTOR,CAVIARBF,FINEMAP,block_id,label,cred_set
6683,19,45351516,rs41289512,0.0308,G,C,1.6384,0.0594,2.2399999999999998e-167,27.582492,1.0,1.0,1.0,1609,7,-1
433,18,29088958,rs8093731,0.0119,T,C,-0.6136,0.1123,4.63e-08,-5.463936,0.989497,0.934788,0.99224,1547,7,-1
396,11,121435587,rs11218343,0.0427,C,T,-0.2697,0.041,4.976e-11,-6.578049,0.983449,0.986059,0.985594,1169,7,-1
387,11,85867875,rs10792832,0.3718,A,G,-0.1297,0.0161,6.534e-16,8.055901,0.582329,0.599754,0.596197,1142,7,-1
397,14,92938855,rs12590654,0.337,A,G,-0.0965,0.0176,4.097e-08,-5.482955,0.543408,0.551195,0.56098,1369,7,-1
15,2,127892810,rs6733839,0.3797,T,C,0.188,0.0176,1.659e-26,10.681818,0.504097,0.50422,0.504194,207,7,-1
16,2,127891427,rs4663105,0.4036,C,A,0.1837,0.0172,1.0009999999999999e-26,10.680233,0.495903,0.495779,0.495806,207,7,-1
6675,19,1063443,rs4147929,0.1849,A,G,0.1348,0.0224,1.701e-09,-6.017857,0.380885,0.392008,0.389905,1580,7,-1
312,7,143099133,rs10808026,0.2227,A,C,-0.1393,0.0206,1.417e-11,-6.762136,0.342352,0.348517,0.347266,832,7,-1
326,11,60103385,rs72924659,0.2942,T,C,-0.1413,0.0196,5.354e-13,-7.209184,0.326261,0.345285,0.341369,1129,7,-1


In [3]:
## load the summary statistics
df = pd.read_csv("input/summary.txt", sep='\t')
df['P'] = df['P'] + 1e-100
df.head(10)

Unnamed: 0,CHR,BP,rsID,MAF,EA,NEA,BETA,SE,P,Zscore
0,1,751343,rs28544273,,A,T,-0.0146,0.0338,0.6651,-0.431953
1,1,751756,rs143225517,,C,T,-0.0146,0.0338,0.6651,-0.431953
2,1,752566,rs3094315,,G,A,-0.0122,0.0294,0.6773,-0.414966
3,1,753405,rs61770173,,C,A,-0.0126,0.0339,0.7104,-0.371681
4,1,768253,rs2977608,,A,C,-0.0394,0.0261,0.1308,-1.509579
5,1,768448,rs77786510,,A,G,-0.0385,0.0303,0.2034,-1.270627
6,1,769963,rs7518545,,A,G,-0.0471,0.036,0.1904,-1.308333
7,1,845274,rs112856858,,T,G,0.0234,0.0329,0.4766,0.711246
8,1,845635,rs117086422,,T,C,0.0317,0.0303,0.2944,1.046205
9,1,845938,rs57760052,,A,G,0.0307,0.0295,0.2967,1.040678


In [4]:
## load identified causal blocks
blocks = pd.read_csv("output/summary_significant_blocks.txt", sep='\t', header=0)
print(blocks.shape)
blocks

(13, 3)


Unnamed: 0,chr,start,stop
0,1,206073265,208410364
1,2,127373764,128034347
2,6,31571218,32682664
3,6,47311898,48391125
4,7,142656310,144968289
5,8,26682525,28162392
6,11,58780549,62223771
7,11,84381272,86619301
8,11,121175943,122591910
9,14,91296860,93132299


In [5]:
markers = []
for i in range(blocks.shape[0]):
    chr_, start, stop = blocks.iloc[i]
    # print()
    res_sel = res[(res['CHR'] == chr_) & (res['BP'] >= start) & (res['BP'] <= stop)].sort_values(['FINEMAP'], ascending=False)
    res.loc[(res['CHR'] == chr_) & (res['BP'] >= start) & (res['BP'] <= stop), ['cred_set']] = i
    # res_sel
    marker = res_sel.iloc[0]['rsID']
    # print(marker)
    markers.append(marker)

In [27]:
# Manhattan plot
visuz.marker.mhat(df=df, chr='CHR', pv='P', gwas_sign_line=True, gwasp=5E-8, dotsize=1, color=['pink','skyblue'], valpha=0.5,
ylm=(0,101,5), markernames=markers, markeridcol='rsID', gfont=3)

In [47]:
res.head(10)

Unnamed: 0,CHR,BP,rsID,MAF,EA,NEA,BETA,SE,P,Zscore,PAINTOR,CAVIARBF,FINEMAP,block_id,label,cred_set
6683,19,45351516,rs41289512,0.0308,G,C,1.6384,0.0594,2.2399999999999998e-167,27.582492,1.0,1.0,1.0,1609,7,12
433,18,29088958,rs8093731,0.0119,T,C,-0.6136,0.1123,4.63e-08,-5.463936,0.989497,0.934788,0.99224,1547,7,10
396,11,121435587,rs11218343,0.0427,C,T,-0.2697,0.041,4.976e-11,-6.578049,0.983449,0.986059,0.985594,1169,7,8
387,11,85867875,rs10792832,0.3718,A,G,-0.1297,0.0161,6.534e-16,8.055901,0.582329,0.599754,0.596197,1142,7,7
397,14,92938855,rs12590654,0.337,A,G,-0.0965,0.0176,4.097e-08,-5.482955,0.543408,0.551195,0.56098,1369,7,9
15,2,127892810,rs6733839,0.3797,T,C,0.188,0.0176,1.659e-26,10.681818,0.504097,0.50422,0.504194,207,7,1
16,2,127891427,rs4663105,0.4036,C,A,0.1837,0.0172,1.0009999999999999e-26,10.680233,0.495903,0.495779,0.495806,207,7,1
6675,19,1063443,rs4147929,0.1849,A,G,0.1348,0.0224,1.701e-09,-6.017857,0.380885,0.392008,0.389905,1580,7,11
312,7,143099133,rs10808026,0.2227,A,C,-0.1393,0.0206,1.417e-11,-6.762136,0.342352,0.348517,0.347266,832,7,4
326,11,60103385,rs72924659,0.2942,T,C,-0.1413,0.0196,5.354e-13,-7.209184,0.326261,0.345285,0.341369,1129,7,6


In [6]:
## credible sets
res[res['cred_set'] == 12]

Unnamed: 0,CHR,BP,rsID,MAF,EA,NEA,BETA,SE,P,Zscore,PAINTOR,CAVIARBF,FINEMAP,block_id,label,cred_set
6683,19,45351516,rs41289512,0.0308,G,C,1.6384,0.0594,2.2399999999999998e-167,27.582492,1.0,1.0,1.0,1609,7,12


In [7]:
res[(res['cred_set'] == 11)]

Unnamed: 0,CHR,BP,rsID,MAF,EA,NEA,BETA,SE,P,Zscore,PAINTOR,CAVIARBF,FINEMAP,block_id,label,cred_set
6675,19,1063443,rs4147929,0.1849,A,G,0.1348,0.0224,1.701e-09,-6.017857,0.380885,0.392008,0.389905,1580,7,11
6676,19,1056492,rs3752246,0.1899,G,C,0.1345,0.0229,4.264e-09,-5.873362,0.165866,0.166279,0.166278,1580,7,11
6677,19,1048393,rs67027268,0.1044,A,G,0.2074,0.0354,4.702e-09,5.858757,0.15267,0.152648,0.152729,1580,7,11
6678,19,1047687,rs4147911,0.1044,G,C,0.1869,0.0328,1.25e-08,5.698171,0.0622,0.060448,0.060831,1580,7,11
6679,19,1047078,rs4147910,0.1044,G,A,0.1863,0.0327,1.261e-08,5.697248,0.061884,0.060131,0.060515,1580,7,11
6680,19,1048021,rs73505217,0.1044,G,A,0.1874,0.0329,1.208e-08,5.696049,0.061477,0.059722,0.060106,1580,7,11
6681,19,1048051,rs78410552,0.1024,T,G,0.1859,0.0329,1.664e-08,5.650456,0.047864,0.046131,0.046503,1580,7,11
6682,19,1048116,rs76348507,0.1044,A,G,0.1856,0.033,1.803e-08,5.624242,0.041487,0.039803,0.040161,1580,7,11


In [52]:
res[(res['cred_set'] == 10) & (res['label'] == 7)]

Unnamed: 0,CHR,BP,rsID,MAF,EA,NEA,BETA,SE,P,Zscore,PAINTOR,CAVIARBF,FINEMAP,block_id,label,cred_set
433,18,29088958,rs8093731,0.0119,T,C,-0.6136,0.1123,4.63e-08,-5.463936,0.989497,0.934788,0.99224,1547,7,10


In [8]:
res[(res['cred_set'] == 10)]

Unnamed: 0,CHR,BP,rsID,MAF,EA,NEA,BETA,SE,P,Zscore,PAINTOR,CAVIARBF,FINEMAP,block_id,label,cred_set
433,18,29088958,rs8093731,0.0119,T,C,-0.6136,0.1123,4.630000e-08,-5.463936,9.894970e-01,9.347876e-01,9.922400e-01,1547,7,10
434,18,27935498,rs3110689,0.1998,T,C,-0.0684,0.0197,5.063000e-04,-3.472081,1.800520e-04,1.292257e-04,1.450230e-04,1547,2,10
435,18,27940854,rs11083394,0.1988,G,A,-0.0686,0.0199,5.546000e-04,-3.447236,1.656820e-04,1.185954e-04,1.331650e-04,1547,2,10
436,18,28333975,rs74354143,0.0785,T,C,0.1004,0.0301,8.415000e-04,3.335548,1.148410e-04,8.124188e-05,9.143930e-05,1547,2,10
437,18,28008295,rs61705963,0.2167,A,C,-0.0628,0.0193,1.141000e-03,-3.253886,8.851740e-05,6.209862e-05,7.001100e-05,1547,2,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6661,18,28294595,rs2733126,0.3807,A,G,0.0000,0.0160,9.983000e-01,-0.000000,5.275970e-07,3.138325e-07,3.657450e-07,1547,2,10
6660,18,28292940,rs1943573,0.3807,G,A,0.0000,0.0160,9.980000e-01,-0.000000,5.275970e-07,3.138325e-07,3.657450e-07,1547,2,10
6659,18,29682501,rs72940728,0.0905,G,A,0.0000,0.0249,9.991000e-01,0.000000,5.275970e-07,3.138325e-07,3.657450e-07,1547,2,10
6658,18,29680776,rs112923970,0.0944,T,C,-0.0000,0.0249,9.999000e-01,-0.000000,5.275970e-07,3.138325e-07,3.657450e-07,1547,2,10


In [11]:
res[(res['cred_set'] == 9) & (res['label'] == 7)]

Unnamed: 0,CHR,BP,rsID,MAF,EA,NEA,BETA,SE,P,Zscore,PAINTOR,CAVIARBF,FINEMAP,block_id,label,cred_set
397,14,92938855,rs12590654,0.337,A,G,-0.0965,0.0176,4.097e-08,-5.482955,0.543408,0.551195,0.56098,1369,7,9
398,14,92926952,rs10498633,0.2147,T,G,-0.1044,0.0199,1.467e-07,-5.246231,0.159018,0.155123,0.159136,1369,7,9
399,14,92934120,rs12590273,0.1441,C,T,-0.1348,0.0264,3.131e-07,-5.106061,0.078803,0.075175,0.077471,1369,7,9
400,14,92932828,rs12881735,0.2197,C,T,-0.1008,0.0202,5.745e-07,-4.990099,0.044724,0.041903,0.043341,1369,7,9
401,14,92938382,rs36026988,0.2137,C,T,-0.0979,0.0197,6.458e-07,-4.969543,0.040506,0.037832,0.039155,1369,7,9
402,14,92936690,rs8008388,0.1938,G,A,0.0878,0.0191,4.047e-06,-4.596859,0.007217,0.00638,0.006678,1369,7,9
403,14,92936683,rs12435220,0.1938,C,G,0.0878,0.0191,4.195e-06,-4.596859,0.007217,0.00638,0.006678,1369,7,9
404,14,92935753,rs12878457,0.2247,T,C,0.0837,0.0183,5.107e-06,-4.57377,0.006515,0.00574,0.006012,1369,7,9
405,14,92935770,rs12897398,0.2296,A,G,0.0834,0.0183,5.411e-06,-4.557377,0.00606,0.005327,0.005581,1369,7,9
406,14,92937293,rs4904929,0.1938,C,T,0.0864,0.0192,6.567e-06,-4.5,0.004712,0.004109,0.004313,1369,7,9


In [9]:
res[res['cred_set'] ==9]

Unnamed: 0,CHR,BP,rsID,MAF,EA,NEA,BETA,SE,P,Zscore,PAINTOR,CAVIARBF,FINEMAP,block_id,label,cred_set
397,14,92938855,rs12590654,0.337,A,G,-0.0965,0.0176,4.097e-08,-5.482955,0.543408,0.551195,0.56098,1369,7,9
398,14,92926952,rs10498633,0.2147,T,G,-0.1044,0.0199,1.467e-07,-5.246231,0.159018,0.155123,0.159136,1369,7,9
399,14,92934120,rs12590273,0.1441,C,T,-0.1348,0.0264,3.131e-07,-5.106061,0.078803,0.075175,0.077471,1369,7,9
400,14,92932828,rs12881735,0.2197,C,T,-0.1008,0.0202,5.745e-07,-4.990099,0.044724,0.041903,0.043341,1369,7,9
401,14,92938382,rs36026988,0.2137,C,T,-0.0979,0.0197,6.458e-07,-4.969543,0.040506,0.037832,0.039155,1369,7,9
402,14,92936690,rs8008388,0.1938,G,A,0.0878,0.0191,4.047e-06,-4.596859,0.007217,0.00638,0.006678,1369,7,9
403,14,92936683,rs12435220,0.1938,C,G,0.0878,0.0191,4.195e-06,-4.596859,0.007217,0.00638,0.006678,1369,7,9
404,14,92935753,rs12878457,0.2247,T,C,0.0837,0.0183,5.107e-06,-4.57377,0.006515,0.00574,0.006012,1369,7,9
405,14,92935770,rs12897398,0.2296,A,G,0.0834,0.0183,5.411e-06,-4.557377,0.00606,0.005327,0.005581,1369,7,9
406,14,92937293,rs4904929,0.1938,C,T,0.0864,0.0192,6.567e-06,-4.5,0.004712,0.004109,0.004313,1369,7,9


In [12]:
res[res['cred_set'] == 8]

Unnamed: 0,CHR,BP,rsID,MAF,EA,NEA,BETA,SE,P,Zscore,PAINTOR,CAVIARBF,FINEMAP,block_id,label,cred_set
396,11,121435587,rs11218343,0.0427,C,T,-0.2697,0.041,4.976e-11,-6.578049,0.983449,0.986059,0.985594,1169,7,8


In [13]:
res[res['cred_set'] == 7]

Unnamed: 0,CHR,BP,rsID,MAF,EA,NEA,BETA,SE,P,Zscore,PAINTOR,CAVIARBF,FINEMAP,block_id,label,cred_set
387,11,85867875,rs10792832,0.3718,A,G,-0.1297,0.0161,6.534e-16,8.055901,0.582329,0.599754,0.596197,1142,7,7
388,11,85868640,rs3851179,0.3708,T,C,-0.1305,0.0165,2.844e-15,7.909091,0.187362,0.186282,0.186538,1142,7,7
389,11,85856187,rs7110631,0.3161,C,G,-0.1303,0.0168,9.342e-15,7.755952,0.058697,0.056287,0.056788,1142,7,7
390,11,85831541,rs471470,0.333,C,A,-0.1273,0.0165,1.189e-14,7.715152,0.043249,0.041081,0.041528,1142,7,7
391,11,85858538,rs7941541,0.3161,G,A,-0.1284,0.0168,2.2e-14,7.642857,0.025274,0.023608,0.023948,1142,7,7
392,11,85820077,rs543293,0.33,A,G,-0.1257,0.0165,2.409e-14,7.618182,0.021065,0.019564,0.01987,1142,7,7
393,11,85828551,rs474479,0.3181,C,G,-0.126,0.0166,3.867e-14,7.590361,0.017165,0.015841,0.01611,1142,7,7
394,11,85830157,rs567075,0.3181,T,C,-0.1258,0.0167,4.273e-14,7.532934,0.011276,0.010271,0.010473,1142,7,7
395,11,85831246,rs573167,0.333,G,A,-0.1242,0.0165,5.612e-14,7.527273,0.01082,0.009843,0.01004,1142,1,7


In [15]:
res[(res['cred_set'] == 6) & (res['label'] == 7)]

Unnamed: 0,CHR,BP,rsID,MAF,EA,NEA,BETA,SE,P,Zscore,PAINTOR,CAVIARBF,FINEMAP,block_id,label,cred_set
326,11,60103385,rs72924659,0.2942,T,C,-0.1413,0.0196,5.354e-13,-7.209184,0.326261,0.345285,0.341369,1129,7,6
327,11,60021948,rs1582763,0.3628,A,G,-0.1152,0.0164,2.036e-12,-7.02439,0.091388,0.092934,0.092639,1129,7,6
328,11,60099225,rs4939338,0.3588,T,C,-0.1153,0.0165,2.602e-12,6.987879,0.071349,0.071995,0.071881,1129,7,6
329,11,60076940,rs11824773,0.3668,C,G,-0.1107,0.0159,3.72e-12,-6.962264,0.060021,0.060237,0.060209,1129,7,6
330,11,60076693,rs11824734,0.3628,A,G,-0.1111,0.016,3.967e-12,-6.94375,0.052992,0.052975,0.052992,1129,7,6
331,11,60095740,rs72924626,0.3579,C,T,-0.1143,0.0165,4.185e-12,-6.927273,0.047444,0.047265,0.047314,1129,7,6
332,11,60070946,rs55777218,0.3668,C,T,-0.1108,0.016,3.944e-12,-6.925,0.046727,0.046528,0.046581,1129,7,6
333,11,60078475,rs61900467,0.3579,T,C,-0.1102,0.016,4.955e-12,-6.8875,0.036369,0.03593,0.03603,1129,7,6
334,11,60039917,rs2162254,0.4016,T,A,-0.1094,0.016,7.833e-12,6.8375,0.026094,0.025511,0.025637,1129,7,6
335,11,60099912,rs11827324,0.3579,G,A,-0.1133,0.0166,9.294e-12,-6.825301,0.024072,0.023475,0.023603,1129,7,6


In [17]:
res[(res['cred_set'] == 5)]

Unnamed: 0,CHR,BP,rsID,MAF,EA,NEA,BETA,SE,P,Zscore,PAINTOR,CAVIARBF,FINEMAP,block_id,label,cred_set
318,8,27462481,rs7982,0.3857,A,G,-0.14,0.0165,2.4770000000000003e-17,8.484848,0.206824,0.210079,0.209402,869,7,5
319,8,27467821,rs2070926,0.3936,C,G,-0.1455,0.0172,2.6560000000000004e-17,8.459302,0.167744,0.169284,0.168966,869,7,5
320,8,27465312,rs11787077,0.3897,T,C,-0.1411,0.0167,2.7650000000000003e-17,8.449102,0.154314,0.15533,0.155121,869,7,5
321,8,27464929,rs4236673,0.3877,A,G,-0.1409,0.0167,2.818e-17,8.437126,0.139931,0.140425,0.140326,869,7,5
322,8,27456253,rs2279590,0.4046,T,C,-0.1428,0.017,3.753e-17,8.4,0.103411,0.102809,0.102937,869,7,5
323,8,27466157,rs1532276,0.3887,T,C,-0.141,0.0168,5.717e-17,8.392857,0.097581,0.096839,0.096995,869,7,5
324,8,27467686,rs9331896,0.3956,C,T,-0.1457,0.0175,9.626000000000001e-17,8.325714,0.056689,0.055319,0.055603,869,7,5
325,8,27466315,rs1532278,0.3857,T,C,-0.1426,0.0173,1.672e-16,8.242775,0.029157,0.027872,0.028136,869,7,5


In [20]:
res[res['cred_set'] == 4]

Unnamed: 0,CHR,BP,rsID,MAF,EA,NEA,BETA,SE,P,Zscore,PAINTOR,CAVIARBF,FINEMAP,block_id,label,cred_set
312,7,143099133,rs10808026,0.2227,A,C,-0.1393,0.0206,1.417e-11,-6.762136,0.342352,0.348517,0.347266,832,7,4
313,7,143108841,rs11763230,0.2187,T,C,-0.135,0.0201,2.111e-11,-6.716418,0.254093,0.256255,0.255827,832,7,4
314,7,143109208,rs75045569,0.1759,G,T,-0.144,0.0216,2.845e-11,-6.666667,0.184115,0.183809,0.183885,832,7,4
315,7,143103481,rs56402156,0.2187,A,G,-0.135,0.0204,3.395e-11,-6.617647,0.134356,0.13281,0.133135,832,7,4
316,7,143099107,rs7791765,0.2227,G,T,-0.1346,0.0211,1.705e-10,-6.379147,0.029987,0.028274,0.028619,832,7,4
317,7,143107876,rs11762262,0.2177,T,C,-0.1339,0.0211,2.086e-10,-6.345972,0.024447,0.022903,0.023213,832,7,4


In [22]:
# only the last SNP is not reported by all 3 programs
res[res['cred_set'] == 3]

Unnamed: 0,CHR,BP,rsID,MAF,EA,NEA,BETA,SE,P,Zscore,PAINTOR,CAVIARBF,FINEMAP,block_id,label,cred_set
241,6,47432637,rs9381563,0.3221,C,T,0.0968,0.0166,5.300000e-09,-5.831325,0.116724,0.124419,0.122865,667,7,3
242,6,47362439,rs6934355,0.3330,C,T,0.0952,0.0172,3.282000e-08,-5.534884,0.022865,0.023145,0.023098,667,7,3
243,6,47487762,rs10948363,0.2515,G,A,0.0978,0.0177,3.052000e-08,5.525424,0.021736,0.021967,0.021930,667,7,3
244,6,47452270,rs9296559,0.2515,C,T,0.0975,0.0177,3.644000e-08,5.508475,0.019856,0.020009,0.019987,667,7,3
245,6,47494759,rs9296564,0.2555,G,A,0.0956,0.0174,3.657000e-08,5.494253,0.018408,0.018506,0.018494,667,7,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
307,6,47585912,rs1948047,0.2555,C,T,0.0916,0.0173,1.167000e-07,5.294798,0.006499,0.006321,0.006360,667,7,3
308,6,47383890,rs13207334,0.2117,G,A,0.1009,0.0191,1.185000e-07,5.282723,0.006109,0.005931,0.005969,667,7,3
309,6,47493940,rs7749167,0.2555,A,G,0.0913,0.0173,1.295000e-07,5.277457,0.005947,0.005768,0.005807,667,7,3
310,6,47379843,rs9369686,0.2038,T,C,0.1052,0.0200,1.385000e-07,5.260000,0.005441,0.005262,0.005300,667,7,3


In [25]:
res[(res['cred_set'] == 2) & (res['label'] == 7)]

Unnamed: 0,CHR,BP,rsID,MAF,EA,NEA,BETA,SE,P,Zscore,PAINTOR,CAVIARBF,FINEMAP,block_id,label,cred_set
17,6,32607141,rs9272561,0.4463,A,G,-0.1360,0.0230,3.376000e-09,-5.913043,0.022702,0.023880,0.023641,657,7,2
18,6,32521785,rs72844190,0.3032,T,G,0.1388,0.0235,3.759000e-09,-5.906383,0.021854,0.022960,0.022736,657,7,2
19,6,32519600,rs73729117,0.2942,A,T,0.1329,0.0228,5.385000e-09,-5.828947,0.014079,0.014587,0.014486,657,7,2
20,6,32570375,rs9270839,0.2813,A,C,0.1121,0.0193,6.617000e-09,-5.808290,0.012533,0.012938,0.012858,657,7,2
21,6,32572249,rs9270914,0.2962,A,T,0.1172,0.0202,7.083000e-09,-5.801980,0.012097,0.012474,0.012399,657,7,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
226,6,32607729,rs9272606,0.4205,T,C,-0.1163,0.0221,1.365000e-07,-5.262443,0.000673,0.000634,0.000642,657,7,2
228,6,32575087,rs9271031,0.2893,C,A,0.1050,0.0200,1.493000e-07,-5.250000,0.000632,0.000594,0.000601,657,7,2
227,6,32575083,rs9271030,0.2903,C,T,0.1050,0.0200,1.497000e-07,-5.250000,0.000632,0.000594,0.000601,657,7,2
229,6,32576796,rs9271118,0.3340,A,G,0.1033,0.0197,1.636000e-07,-5.243655,0.000612,0.000574,0.000582,657,7,2


In [27]:
res[(res['cred_set'] == 2)]

Unnamed: 0,CHR,BP,rsID,MAF,EA,NEA,BETA,SE,P,Zscore,PAINTOR,CAVIARBF,FINEMAP,block_id,label,cred_set
17,6,32607141,rs9272561,0.4463,A,G,-0.1360,0.0230,3.376000e-09,-5.913043,0.022702,0.023880,0.023641,657,7,2
18,6,32521785,rs72844190,0.3032,T,G,0.1388,0.0235,3.759000e-09,-5.906383,0.021854,0.022960,0.022736,657,7,2
19,6,32519600,rs73729117,0.2942,A,T,0.1329,0.0228,5.385000e-09,-5.828947,0.014079,0.014587,0.014486,657,7,2
20,6,32570375,rs9270839,0.2813,A,C,0.1121,0.0193,6.617000e-09,-5.808290,0.012533,0.012938,0.012858,657,7,2
21,6,32572249,rs9270914,0.2962,A,T,0.1172,0.0202,7.083000e-09,-5.801980,0.012097,0.012474,0.012399,657,7,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
236,6,32503128,rs112652539,0.3390,G,A,0.1211,0.0232,1.720000e-07,-5.219828,0.000543,0.000507,0.000514,657,1,2
237,6,32571690,rs9270881,0.3847,C,T,0.1028,0.0197,1.706000e-07,-5.218274,0.000538,0.000503,0.000510,657,1,2
238,6,32579671,rs13191975,0.4632,T,C,-0.0975,0.0187,1.841000e-07,-5.213904,0.000527,0.000492,0.000499,657,1,2
239,6,32571705,rs9270882,0.3807,G,C,0.1027,0.0197,1.865000e-07,-5.213198,0.000525,0.000490,0.000497,657,1,2


In [30]:
res[res['cred_set'] == 1]

Unnamed: 0,CHR,BP,rsID,MAF,EA,NEA,BETA,SE,P,Zscore,PAINTOR,CAVIARBF,FINEMAP,block_id,label,cred_set
15,2,127892810,rs6733839,0.3797,T,C,0.188,0.0176,1.659e-26,10.681818,0.504097,0.50422,0.504194,207,7,1
16,2,127891427,rs4663105,0.4036,C,A,0.1837,0.0172,1.0009999999999999e-26,10.680233,0.495903,0.495779,0.495806,207,7,1


In [33]:
res[res['cred_set'] == 0][['CHR','BP','rsID','P','PAINTOR','CAVIARBF','FINEMAP','label']]

Unnamed: 0,CHR,BP,rsID,P,PAINTOR,CAVIARBF,FINEMAP,label
0,1,207750568,rs679515,4.105e-15,0.17525,0.179341,0.178496,7
1,1,207738077,rs7515905,3.93e-15,0.168609,0.172337,0.171568,7
2,1,207747296,rs1752684,3.653e-15,0.167714,0.171394,0.170635,7
3,1,207698044,rs6661489,7.613e-15,0.082756,0.082732,0.082742,7
4,1,207692049,rs6656401,7.726e-15,0.075954,0.075729,0.07578,7
5,1,207786828,rs2093760,1.163e-14,0.056235,0.055545,0.055691,7
6,1,207799874,rs10863420,1.85e-14,0.042207,0.041318,0.041503,7
7,1,207685786,rs4266886,2.342e-14,0.03022,0.029277,0.029472,7
8,1,207806730,rs6697005,2.574e-14,0.027705,0.026768,0.026961,7
9,1,207798694,rs10779335,2.645e-14,0.026641,0.025709,0.025901,7


In [37]:
res[res['cred_set'] == 0][['PAINTOR','CAVIARBF','FINEMAP']].sum(axis=0)

PAINTOR     0.958655
CAVIARBF    0.961138
FINEMAP     0.960634
dtype: float64

In [None]:
# %%bash
# vim input/summary.txt
# # add a '#' to the first line
# bgzip -c summary.txt > summary.txt.gz
# tabix -f -p vcf summary.txt.gz 