In [None]:
# ammend DHS index annotations by doing a down-lift from hg38 to hg19
# First collect positions that could not be downlifted
import collections
blackdict = collections.defaultdict(lambda: False)
errfile = "/cbscratch/franco/datasets/DHSindex/hglft_genome_32b00_ea89f0.err"
with open(errfile) as instream:
    for line in instream:
        if line[0] == "#":
            continue
        blackdict[line.rstrip()] = True

# Go line by line through DHSindex and check if the range was downlifted or not
# and keep its annotation
liftfile = "/cbscratch/franco/datasets/DHSindex/hglft_genome_32b00_ea89f0.bed"
dhsindexfile = "/cbscratch/franco/datasets/DHSindex/DHS_Index_and_Vocabulary_hg38_WM20190703.txt"
hg19_dhs_dict = collections.defaultdict(dict)
hglft = open(liftfile)
with open(dhsindexfile) as instream:
    next(instream)
    for line in instream:
        arr = line.rstrip().split("\t")
        chrm = arr[0]
        start = arr[1]
        end   = arr[2]
        annot = arr[9]
        key = "{:s}:{:s}-{:s}".format(chrm, start, end)
        if not blackdict[key]:
            hg19chrom, hg19range = hglft.readline().rstrip().split(":")
            hg19_dhs_dict[hg19chrom][hg19range] = annot

# Write down the newly annotated dictionary in a file, sorted by genomic position
hg19_DHS_outfile = "/cbscratch/franco/datasets/DHSindex/DHS_Index_downlift_hg19.txt"
with open(hg19_DHS_outfile, 'w') as outstream:
    for chrm in range(1, 23):
        chrom = "chr"+str(chrm)
        for key in sorted(hg19_dhs_dict[chrom], key=lambda t: int(t.split("-")[0])):
            # print(chrom, key, hg19_dhs_dict[chrom][key])
            outstream.write("{:s}\t{:s}\t{:s}\t{:s}\n".format(chrom, key.split("-")[0], key.split("-")[1], hg19_dhs_dict[chrom][key]))

In [1]:
import time
import os
# here we annotate the best trans-eQTLs for EQTLgen

def annotate_snps(dhsfile, gtfile, annot_outfile, mode="tejaas"):
    current_chrm = None
    dhs = open(dhsfile)
    line = dhs.readline()
    f = None
    with open(annot_outfile, 'w') as outf:
        outf.write("chr\trsid\tpos\n")
        while line:
            arr = line.rstrip().split()
            if arr[0][3:] == "X":
                break
            chrm = int(arr[0][3:])
            start = int(arr[1])
            end = int(arr[2])
            if chrm != current_chrm:
                if f:
                    f.close()
                # close previous GT file and open new one
                if current_chrm is not None:
                    tend = time.time()
                    print("CHR{:d} took {:g}s".format(current_chrm, tend-tstart))

                current_chrm = chrm
                if not os.path.exists(gtfile.format(current_chrm)):
                    continue
                print("Processing CHRM", current_chrm, end=" ")
                f = open(gtfile.format(current_chrm), 'r')
                next(f)
                tstart = time.time()
                gtline = f.readline()
            if not gtline:
                line = dhs.readline()
            while gtline:
                gtarr = gtline.split()
                if mode == "tejaas":
                    rsid = gtarr[0]
                    pos = int(gtarr[1])
                if mode == "meqtl":
                    rsid = gtarr[0]
                    pos = int(gtarr[6])
                # print(rsid, pos, start, end)
                if pos < start:
                    gtline = f.readline()
                    continue # go to next snp
                elif pos > end:
                    line = dhs.readline()
                    break # go to next DHS line
                else:
                    # print("-->",chrm, rsid, pos, start, end)
                    outf.write("{:d}\t{:s}\t{:d}\n".format(chrm, rsid, pos))
                    gtline = f.readline()
                    continue
        if f:
            f.close()
            tend = time.time()
            print("CHR{:d} took {:g}s".format(current_chrm, tend-tstart))
        print("Done DHS file")

In [2]:
import mpmath
import collections
import numpy as np

mpmath.mp.dps = 500
def pval(x): return float(mpmath.log10(1 - 0.5 * (1 + mpmath.erf(x/mpmath.sqrt(2)))))

SNPRES_FIELDS = ['rsid', 'chrom', 'pos', 'logp', 'inDHS']
class SNPRes(collections.namedtuple('_SNPRes', SNPRES_FIELDS)):
    __slots__ = ()
       
def eqtlgen(filepath, chrom, snpannot):
    res = list()
    with open(filepath, 'r') as mfile:
        next(mfile)
        for line in mfile:
            arr   = line.strip().split("\t")
            rsid  = arr[0]
            pos   = int(arr[1])
            p     = float(arr[2])
            logp  = np.log10(p) if p != 0 else -99
            inDHS = snpannot.get(rsid, False)
            res.append(SNPRes(rsid=rsid, chrom=chrom, pos=pos, logp=-logp, inDHS=inDHS))
    return res
        
def tejaas(filepath, chrom, snpannot):
    res = list()
    with open(filepath, 'r') as mfile:
        next(mfile)
        for line in mfile:
            arr   = line.strip().split("\t")
            rsid  = arr[0]
            pos   = int(arr[1])
            p     = float(arr[5])
            q     = float(arr[2])
            mu    = float(arr[3])
            sigma = float(arr[4])
            if sigma == 0:
                continue
            logp  = np.log10(p) if p != 0 else pval( (q - mu) / sigma)
            inDHS = snpannot.get(rsid, False)
            res.append(SNPRes(rsid=rsid, chrom=chrom, pos=pos, logp=-logp, inDHS=inDHS))
    return res

# TODO: first I need to report snp positions in matrixeqtl output..

def matrixeqtl(filepath, chrom, snpannot):
    res = dict()
    res_list = list()
    with open(filepath, 'r') as mfile:
        next(mfile)
        for line in mfile:
            arr  = line.strip().split("\t")
            rsid = arr[0]
            pos  = int(arr[6])
            pval = float(arr[4])
            if not res.get(rsid, False):
                res[rsid] = [np.log10(pval), pos]
    for key in res.keys():
        inDHS = snpannot.get(rsid, False)
        res_list.append(SNPRes(rsid=key, chrom=chrom, pos=res[key][1], logp=-res[key][0], inDHS=inDHS))
    return res_list

def matrixeqtl_fdr(filepath, chrom, snpannot):
    res = dict()
    res_list = list()
    with open(filepath, 'r') as mfile:
        next(mfile)
        for line in mfile:
            arr  = line.strip().split("\t")
            rsid = arr[0]
            pos  = int(arr[6])
            pval = float(arr[5])
            if not res.get(rsid, False):
                res[rsid] = [np.log10(pval), pos]
    for key in res.keys():
        inDHS = snpannot.get(rsid, False)
        res_list.append(SNPRes(rsid=key, chrom=chrom, pos=res[key][1], logp=-res[key][0], inDHS=inDHS))
    return res_list

def read_annot(infile):
    annot_dict = dict()
    with open(infile) as fin:
        _ = fin.readline()
        for line in fin:
            arr = line.rstrip().split()
            annot_dict[arr[1]] = True
    return annot_dict

# Null enrichment

In [3]:
# Sample genotype positions randomly and check enrichment
# generate files from bash
# for CHRM in `seq 1 22`; do mkdir $CHRM; for i in `seq 1 50`; do grep -P "^$CHRM " all_snp_pos| shuf -n 2000 | sort -nk 1 -k 3 | tr " " "\t" |cut -f 2- > ${CHRM}/random_snp_sample_${i}; done; done

basedir = "/cbscratch/franco/datasets/DHS_null_enrichment/"
gtfile = "/cbscratch/franco/datasets/DHS_null_enrichment/{:d}/random_snp_sample_"
dhsfile = "/cbscratch/franco/datasets/multi-tissue.master.ntypes.simple.hg19.bed"
null_annot_outfile = basedir + "null_SNPs_annots_"
for N in range(1,51):
    if not os.path.exists(null_annot_outfile+str(N)+".txt"):
        annotate_snps(dhsfile, gtfile+str(N), null_annot_outfile+str(N)+".txt")

In [4]:
def file_len(fname):
    with open(fname) as f:
        for i, l in enumerate(f):
            pass
    return i + 1

null_frac_annot = list()
basedir = "/cbscratch/franco/datasets/DHS_null_enrichment/"
null_annot_outfile = basedir + "null_SNPs_annots_"
for N in range(1,51):
    nullfile = null_annot_outfile+str(N)+".txt"
    L = file_len(nullfile)
    frac_annot = L / (22*2000)
    null_frac_annot.append(frac_annot)
    
null_frac_annot = np.array(null_frac_annot)
mean_null_frac_annot = np.mean(null_frac_annot)
std_null_frac_annot = np.std(null_frac_annot)
print(mean_null_frac_annot, std_null_frac_annot)

0.1614268181818182 0.0018046845034589253


In [20]:
dhs_mode = "multi"
if dhs_mode == "multi":
    dhsfile = "/cbscratch/franco/datasets/multi-tissue.master.ntypes.simple.hg19.bed"
    dhs_annot_out = "SNPs_annots.txt"
if dhs_mode == "dhsindex":
    # dhsfile = "/cbscratch/franco/datasets/DHSindex/DHS_Index_downlift_hg19.txt"
    dhsfile = "/cbscratch/franco/datasets/DHSindex/DHS_Index_and_Vocabulary_hg38_WM20190703.txt"
    dhs_annot_out = "dhsindex_SNPs_annots.txt"



# EQTLgen enrichment

In [5]:

basedir = "/cbscratch/franco/datasets/EQTLgen/"
eqtlgen_gtfile = basedir + "signif/trans-eQTLs_CHR{:d}"
               
eqtlgen_annot_outfile = basedir + dhs_annot_out
      
annotate_snps(dhsfile, eqtlgen_gtfile, eqtlgen_annot_outfile)

Processing CHRM 1 CHR1 took 0.756557s
Processing CHRM 10 CHR10 took 0.466239s
Processing CHRM 11 CHR11 took 0.364025s
Processing CHRM 12 CHR12 took 0.338149s
Processing CHRM 13 CHR13 took 0.198827s
Processing CHRM 14 CHR14 took 0.224862s
Processing CHRM 15 CHR15 took 0.231487s
Processing CHRM 16 CHR16 took 0.248599s
Processing CHRM 17 CHR17 took 0.300454s
Processing CHRM 18 CHR18 took 0.180515s
Processing CHRM 19 CHR19 took 0.210141s
Processing CHRM 2 CHR2 took 0.596454s
Processing CHRM 20 CHR20 took 0.222431s
Processing CHRM 21 CHR21 took 0.0965607s
Processing CHRM 22 CHR22 took 0.150762s
Processing CHRM 3 CHR3 took 0.46116s
Processing CHRM 4 CHR4 took 0.37988s
Processing CHRM 5 CHR5 took 0.409307s
Processing CHRM 6 CHR6 took 0.42467s
Processing CHRM 7 CHR7 took 0.381395s
Processing CHRM 8 CHR8 took 0.342839s
Processing CHRM 9 CHR9 took 0.304899s
Done DHS file


In [7]:
eqtlgen_annot_outfile = basedir + dhs_annot_out
chrms = np.arange(1,23)

eqtlgen_snp_annot = read_annot(eqtlgen_annot_outfile)
eqtlgen_snp_res = list()
for chrom in chrms:
    eqtlgen_snp_res += eqtlgen(eqtlgen_gtfile.format(chrom), chrom, eqtlgen_snp_annot)
    
eqtlgen_dhs_dict = dict()
for snp in eqtlgen_snp_res:
    eqtlgen_dhs_dict[snp.rsid] = snp.inDHS
    
# Number of EQTLgen signif trans-eQTLS in DHS regions

total_signif_SNPs = len(list(eqtlgen_dhs_dict.keys()))
signif_inDHS      = np.sum(np.array(list(eqtlgen_dhs_dict.values())))
frac = signif_inDHS / total_signif_SNPs
print("Fraction of trans-eQTLs in DHS regions (EQTLgen):",frac, signif_inDHS,"/", total_signif_SNPs)
print("EQTLgen total enrichment: {:g}".format(frac / mean_null_frac_annot))


Fraction of trans-eQTLs in DHS regions (EQTLgen): 0.30753192598384155 1180 / 3837
EQTLgen total enrichment: 1.90509


In [22]:
import time
import numpy as np
import os
# here we only try to annotate the best trans-eQTLs only!

methods = ["tejaas", "tejaas_rand"]
tissues = ["gtex-ms", "gtex-wb", "gtex-sse", "gtex-as"]
sbs = ["0.1_knn30"]
expressions = ["raw"]

for tissue in tissues:
    for expression in expressions:
        for sb in sbs:
            for method in methods:
                print("METHOD:",tissue, expression, sb, method)
                basedir = "/cbscratch/franco/trans-eqtl/dev-pipeline/gtex_v6_gtknn/{:s}/{:s}/{:s}/permnull_sb{:s}/".format(expression, tissue, method, sb)
                gtfile = basedir + "chr{:d}/rr.txt" #.ld_prune"
                annot_outfile = basedir + dhs_annot_out
                if not os.path.exists(annot_outfile):
                    annotate_snps(dhsfile, gtfile, annot_outfile)

METHOD: gtex-ms raw 0.1_knn30 tejaas
METHOD: gtex-ms raw 0.1_knn30 tejaas_rand
METHOD: gtex-wb raw 0.1_knn30 tejaas
METHOD: gtex-wb raw 0.1_knn30 tejaas_rand
METHOD: gtex-sse raw 0.1_knn30 tejaas
METHOD: gtex-sse raw 0.1_knn30 tejaas_rand
METHOD: gtex-as raw 0.1_knn30 tejaas
METHOD: gtex-as raw 0.1_knn30 tejaas_rand


In [23]:
import os

datadir = "/cbscratch/franco/trans-eqtl/dev-pipeline/gtex_v6_gtknn/"
snp_res_dict = collections.defaultdict(dict)
for sb in sbs:
    for expr in expressions:
        snp_res_dict[sb][expr] = collections.defaultdict(dict)
        for tissue in tissues:
            snp_res_dict[sb][expr][tissue] = collections.defaultdict(dict)
            for method in methods:
                snp_res = list()
                print(tissue, method, expr, end=" ")
                annotfile = os.path.join(datadir,expr,tissue,method,"permnull_sb{:s}".format(sb),dhs_annot_out)
                snp_annot = read_annot(annotfile)
                for chrom in chrms:
                    print(chrom, end=" ")
                    inputfile = os.path.join(datadir,expr,tissue,method,"permnull_sb{:s}".format(sb),"chr"+str(chrom),"rr.txt")
                    snp_res += tejaas(inputfile, chrom, snp_annot)
                print("")
                snp_res_dict[sb][expr][tissue][method] = snp_res


gtex-ms tejaas raw 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 
gtex-ms tejaas_rand raw 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 
gtex-wb tejaas raw 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 
gtex-wb tejaas_rand raw 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 
gtex-sse tejaas raw 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 
gtex-sse tejaas_rand raw 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 
gtex-as tejaas raw 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 
gtex-as tejaas_rand raw 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 


In [21]:
from operator import attrgetter

def count_inDHS(top_list, cutoff):
    teqtls = list()
    for x in top_list:
        if x.logp > -np.log10(cutoff):
            teqtls.append(x.inDHS)
        else:
            break
    return teqtls

tejaas_enrich_dict = dict()

for mysb in sbs:
    for mytissue in tissues:
        for mymethod in methods:
            for myexpr in expressions:

                snp_list = snp_res_dict[mysb][myexpr][mytissue][mymethod]
                top_list = sorted(snp_list, key=attrgetter('logp'), reverse=True)

                cutoffs = [1e-11, 1e-10, 1e-9, 1e-8, 5e-8] #, 1e-7, 5e-7, 1e-6, 1e-5, 1e-3, 5e-2]

                # rr_teqtls = [x.inDHS for x in top_list if x.logp > -np.log10(cutoff)]
                fracs_rr = list()
                for cutoff in cutoffs:
                    rr_teqtls = count_inDHS(top_list, cutoff)
                    total_rr_teqtls = len(rr_teqtls)
                    if total_rr_teqtls > 0:
                        rr_inDHS = np.sum(np.array(rr_teqtls))
                        frac_rr = rr_inDHS/total_rr_teqtls
                        fracs_rr.append(frac_rr)
                        print("Fraction of trans-eQTLS in DHS regions @ {:g} ({:s}): {:g}, {:d}/{:d}".format(cutoff, mymethod, frac_rr, rr_inDHS, total_rr_teqtls))
                    else:
                        print("No signif trans-eQTLs found at {:g} cutoff".format(cutoff))
                        fracs_rr.append(None)
                key = "{:s}_{:s}_{:s}_{:s}".format(mysb, myexpr, mytissue, mymethod)
                tejaas_enrich_dict[key] = fracs_rr
            

KeyError: 'tmm_cclm'

In [14]:
# Enrichments
for mysb in sbs:
    for mytissue in tissues:
        for mymethod in methods:
            for myexpr in expressions:
                key = "{:s}_{:s}_{:s}_{:s}".format(mysb, myexpr, mytissue, mymethod)
                print("{:s} total enrichment: {:s}".format(key, str( np.array(tejaas_enrich_dict[key]) / mean_null_frac_annot ) ))

0.1_knn30_raw_gtex-ms_tejaas total enrichment: [1.54868939 1.27539126]
0.1_knn30_raw_gtex-ms_tejaas_rand total enrichment: []
0.1_knn30_raw_gtex-wb_tejaas total enrichment: []
0.1_knn30_raw_gtex-wb_tejaas_rand total enrichment: []
0.1_knn30_raw_gtex-sse_tejaas total enrichment: [1.54868939 0.4955806  0.96988628 0.89793717 0.98232498]
0.1_knn30_raw_gtex-sse_tejaas_rand total enrichment: [0.]
0.1_knn30_raw_gtex-as_tejaas total enrichment: [0.88496536 1.03245959 0.96253205 1.04383029 1.00538371]
0.1_knn30_raw_gtex-as_tejaas_rand total enrichment: []


In [15]:
import time
import numpy as np
import os

methods = ["matrixeqtl"] #, "matrixeqtl_rand"]
tissues = ["gtex-ms", "gtex-sse", "gtex-as", "gtex-wb"]
expressions = ["tmm_cclm", "tmm_lasso", "raw_cclm"]

for tissue in tissues:
    for expression in expressions:
        for method in methods:
            print("METHOD:",tissue, expression, method)
            basedir = "/cbscratch/franco/trans-eqtl/dev-pipeline/gtex_v6_gtknn/{:s}/{:s}/{:s}/".format(expression, tissue, method)
            gtfile = basedir + "chr{:d}/trans_eqtl.txt.pos"
            annot_outfile = basedir + dhs_annot_out
            if not os.path.exists(annot_outfile):
                annotate_snps(dhsfile, gtfile, annot_outfile, mode="meqtl")

METHOD: gtex-ms tmm_cclm matrixeqtl
Processing CHRM 1 CHR1 took 4.12564s
Processing CHRM 10 CHR10 took 2.59979s
Processing CHRM 11 CHR11 took 2.59018s
Processing CHRM 12 CHR12 took 0.541229s
Processing CHRM 13 CHR13 took 1.99152s
Processing CHRM 14 CHR14 took 1.66439s
Processing CHRM 15 CHR15 took 1.67161s
Processing CHRM 16 CHR16 took 1.55531s
Processing CHRM 17 CHR17 took 1.6461s
Processing CHRM 18 CHR18 took 1.4529s
Processing CHRM 19 CHR19 took 1.34868s
Processing CHRM 2 CHR2 took 4.49375s
Processing CHRM 20 CHR20 took 0.219798s
Processing CHRM 21 CHR21 took 0.756137s
Processing CHRM 22 CHR22 took 0.899945s
Processing CHRM 3 CHR3 took 2.80239s
Processing CHRM 4 CHR4 took 3.84319s
Processing CHRM 5 CHR5 took 3.39421s
Processing CHRM 6 CHR6 took 0.520547s
Processing CHRM 7 CHR7 took 3.40756s
Processing CHRM 8 CHR8 took 2.85502s
Processing CHRM 9 CHR9 took 2.24425s
Done DHS file
METHOD: gtex-ms tmm_lasso matrixeqtl
Processing CHRM 1 CHR1 took 5.02856s
Processing CHRM 10 CHR10 took 3.1

In [16]:

datadir = "/cbscratch/franco/trans-eqtl/dev-pipeline/gtex_v6_gtknn/"
chrms = np.arange(1,23)
snp_res_dict_meqtl = collections.defaultdict(dict)
for expr in expressions:
    for tissue in tissues:
        snp_res_dict_meqtl[expr][tissue] = collections.defaultdict(dict)
        for method in methods:
            snp_res = list()
            print(tissue, method, expr, end=" ")
            annotfile = os.path.join(datadir,expr,tissue,method,dhs_annot_out)
            snp_annot = read_annot(annotfile)
            for chrom in chrms:
                print(chrom, end=" ")
                inputfile = os.path.join(datadir,expr,tissue,method,"chr"+str(chrom),"trans_eqtl.txt.pos")
                snp_res += matrixeqtl(inputfile, chrom, snp_annot)
            print("")
            snp_res_dict_meqtl[expr][tissue][method] = snp_res

gtex-ms matrixeqtl tmm_cclm 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 
gtex-sse matrixeqtl tmm_cclm 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 
gtex-as matrixeqtl tmm_cclm 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 
gtex-wb matrixeqtl tmm_cclm 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 
gtex-ms matrixeqtl tmm_lasso 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 
gtex-sse matrixeqtl tmm_lasso 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 
gtex-as matrixeqtl tmm_lasso 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 
gtex-wb matrixeqtl tmm_lasso 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 
gtex-ms matrixeqtl raw_cclm 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 
gtex-sse matrixeqtl raw_cclm 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 
gtex-as matrixeqtl raw_cclm 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 
gtex-wb matrixeqtl raw_cclm 1 2 3 4 5 6 7 8 9 1

In [17]:
meqtl_enrich_dict = dict()

for mytissue in tissues:
    for mymethod in methods:
        for myexpr in expressions:
            snp_list = snp_res_dict_meqtl[myexpr][mytissue][mymethod]
            top_list = sorted(snp_list, key=attrgetter('logp'), reverse=True)

            cutoffs = [1e-11, 1e-10, 1e-9, 1e-8, 5e-8] #, 1e-7, 5e-7, 1e-6, 1e-5, 1e-3, 5e-2]

            fracs_rr = list()
            for cutoff in cutoffs:
                rr_teqtls = count_inDHS(top_list, cutoff)
                total_rr_teqtls = len(rr_teqtls)
                if total_rr_teqtls > 0:
                    rr_inDHS = np.sum(np.array(rr_teqtls))
                    frac_rr = rr_inDHS/total_rr_teqtls
                    fracs_rr.append(frac_rr)
                    print("Fraction of trans-eQTLS in DHS regions @ {:g} ({:s}): {:g}, {:d}/{:d}".format(cutoff, mymethod, frac_rr, rr_inDHS, total_rr_teqtls))
                else:
                    print("No signif trans-eQTLs found at {:g} cutoff".format(cutoff))
            key = "{:s}_{:s}_{:s}".format(myexpr, mytissue, mymethod)
            meqtl_enrich_dict[key] = fracs_rr  

Fraction of trans-eQTLS in DHS regions @ 1e-11 (matrixeqtl): 0, 0/904
Fraction of trans-eQTLS in DHS regions @ 1e-10 (matrixeqtl): 0, 0/1032
Fraction of trans-eQTLS in DHS regions @ 1e-09 (matrixeqtl): 0, 0/1190
Fraction of trans-eQTLS in DHS regions @ 1e-08 (matrixeqtl): 0, 0/1699
Fraction of trans-eQTLS in DHS regions @ 5e-08 (matrixeqtl): 0, 0/3077
Fraction of trans-eQTLS in DHS regions @ 1e-11 (matrixeqtl): 0, 0/987
Fraction of trans-eQTLS in DHS regions @ 1e-10 (matrixeqtl): 0, 0/1114
Fraction of trans-eQTLS in DHS regions @ 1e-09 (matrixeqtl): 0, 0/1346
Fraction of trans-eQTLS in DHS regions @ 1e-08 (matrixeqtl): 0, 0/1999
Fraction of trans-eQTLS in DHS regions @ 5e-08 (matrixeqtl): 0, 0/3551
Fraction of trans-eQTLS in DHS regions @ 1e-11 (matrixeqtl): 0, 0/584
Fraction of trans-eQTLS in DHS regions @ 1e-10 (matrixeqtl): 0, 0/768
Fraction of trans-eQTLS in DHS regions @ 1e-09 (matrixeqtl): 0, 0/1141
Fraction of trans-eQTLS in DHS regions @ 1e-08 (matrixeqtl): 0, 0/2959
Fraction o

In [18]:
import matplotlib.pyplot as plt

fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(cutoffs, tejaas_enrich_dict)

{'tmm_cclm_gtex-ms_matrixeqtl': [0.0, 0.0, 0.0, 0.0, 0.0],
 'tmm_lasso_gtex-ms_matrixeqtl': [0.0, 0.0, 0.0, 0.0, 0.0],
 'raw_cclm_gtex-ms_matrixeqtl': [0.0, 0.0, 0.0, 0.0, 0.0],
 'tmm_cclm_gtex-sse_matrixeqtl': [0.0, 0.0, 0.0, 0.0, 0.0],
 'tmm_lasso_gtex-sse_matrixeqtl': [0.0, 0.0, 0.0, 0.0, 0.0],
 'raw_cclm_gtex-sse_matrixeqtl': [0.0, 0.0, 0.0, 0.0, 0.0],
 'tmm_cclm_gtex-as_matrixeqtl': [0.0, 0.0, 0.0, 0.0, 0.0],
 'tmm_lasso_gtex-as_matrixeqtl': [0.0, 0.0, 0.0, 0.0, 0.0],
 'raw_cclm_gtex-as_matrixeqtl': [0.0, 0.0, 0.0, 0.0, 0.0],
 'tmm_cclm_gtex-wb_matrixeqtl': [0.0, 0.0, 0.0, 0.0, 0.0],
 'tmm_lasso_gtex-wb_matrixeqtl': [0.0, 0.0, 0.0, 0.0, 0.0],
 'raw_cclm_gtex-wb_matrixeqtl': [0.0, 0.0, 0.0, 0.0, 0.0]}

## Old code

In [None]:
from operator import attrgetter

MAX_TOP = 200
TOP_W = 100
mytissue = "gtex-ms"
mysb = "0.1_knn30"
myexpr = "raw"
mymethod = "tejaas"
snp_list = snp_res_dict[mysb][myexpr][mytissue][mymethod]
top_list = sorted(snp_list, key=attrgetter('logp'), reverse=True)

eqtlgen_top_list = sorted(eqtlgen_snp_res, key=attrgetter('logp'), reverse=True)

x = np.array([])
y = np.array([])
ygen = np.array([])

wx = np.array([])
wy = np.array([])
wygen = np.array([])

eqtlgen_true = 0
true_eqtls = 0
rand_eqtls = 0

prev = 0
for TOPN in range(TOP_W, MAX_TOP, TOP_W):
    
    window_true_eqtls = np.sum([x.inDHS for x in top_list[prev:TOPN]])
    true_eqtls += window_true_eqtls

    rand_distrib = []
    # myrandmethods = ["tejaas_rand"] # + randmethods
    # for mymethod in myrandmethods:
    myrandmethod = "tejaas_rand"
    snp_list_rand = snp_res_dict[mysb][myexpr][mytissue][myrandmethod]
    top_list_rand = sorted(snp_list_rand, key=attrgetter('logp'), reverse=True)
      
    window_eqtlgen_true = np.sum([x.inDHS for x in eqtlgen_top_list[prev:TOPN]])
    eqtlgen_true += window_eqtlgen_true
    
    window_rand_eqtls = np.sum([x.inDHS for x in top_list_rand[prev:TOPN]])
    rand_eqtls += window_rand_eqtls

    print(true_eqtls, rand_eqtls, true_eqtls/rand_eqtls, "||", eqtlgen_true, eqtlgen_true/rand_eqtls, window_eqtlgen_true, window_eqtlgen_true/window_rand_eqtls)
    x = np.append(x, TOPN)
    y = np.append(y, true_eqtls/rand_eqtls)
    ygen = np.append(ygen, eqtlgen_true/rand_eqtls)
    
    wx = np.append(wx, TOPN)
    wy = np.append(wy, window_true_eqtls/window_rand_eqtls)
    wygen = np.append(wygen, window_eqtlgen_true/window_rand_eqtls)
    prev += TOP_W


In [None]:
import matplotlib.pyplot as plt
outplotfile = "test.dhs.png"

fig = plt.figure(figsize=(16,8))
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
ax1.plot(x, y, label="RR")
ax1.plot(x, ygen, label="EQTLgen")
ax1.set_title("Cumulative enrichment")
ax1.axhline(y=1, color='red')

ax2.bar(wx, wy, 100, alpha=0.3, label="RR")
ax2.bar(wx, wygen, 100, alpha=0.3, label="EQTLgen")
ax2.axhline(y=1, color='red')
ax2.set_title("Windowed enrichment")

ax1.set_xlabel("First N SNPS")
ax2.set_xlabel("First N SNPS")
ax1.set_ylabel("Enrichment")
ax2.set_ylabel("Enrichment")

fig.suptitle('Main title')
ax1.legend()
ax2.legend()

# fig.tight_layout()
# fig.subplots_adjust(top=0.88)
plt.savefig(outplotfile, bbox_inches='tight')
plt.show()