In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
import time
from statsmodels.distributions.empirical_distribution import ECDF
sys.path.append('../')
sys.path.append('/usr/users/fsimone/tejaas')
from utils import readgtf
from utils import utils
import mpmath
import collections
from operator import attrgetter
import gzip

# gene_info = readgtf.gencode_v12("/cbscratch/franco/datasets/GENCODE/gencode.v26.annotation.gtf.gz", trim=True)
# gene_info_dict = collections.defaultdict(dict)
# for gene in gene_info:
#     gene_info_dict[gene.chrom][gene.ensembl_id] = gene.typ

mpmath.mp.dps = 50
def pvalue(x): return float(mpmath.log10(1 - 0.5 * (1 + mpmath.erf(x/mpmath.sqrt(2)))))

SNPRES_FIELDS = ['rsid', 'chrom', 'pos', 'logp', 'target', 'maf']
class SNPRes(collections.namedtuple('_SNPRes', SNPRES_FIELDS)):
    __slots__ = ()
    
CT_FIELDS = ['tissue', 'ncis', 'ntrans', 'ncistrans', 'randtrans', 'enrichment', 'pval']
class CisTrans(collections.namedtuple('_CisTrans', CT_FIELDS)):
    __slots__ = ()
    
CT_TYPE_FIELDS = ['tissue', 'genetype', 'hits', 'enrichment', 'pval', 'frac']
class CisTrans_type(collections.namedtuple('_CisTransType', CT_TYPE_FIELDS)):
    __slots__ = ()

def load_snp_maf(filepath, tissue):
    snp_maf_dict = collections.defaultdict(lambda:False)
    for chrm in np.arange(1,23):
        with open(filepath.format(tissue, chrm)) as instream:
            for line in instream:
                arr = line.strip().split("\t")
                snp_maf_dict[arr[0]] = float(arr[1])
    return snp_maf_dict

def tejaas(filepath, mafcutoff=0.01):
    res = list()
    with open(filepath, 'r') as mfile:
        next(mfile)
        for line in mfile:
            arr   = line.strip().split("\t")
            rsid  = arr[0]
            chrom = int(arr[1])
            pos   = int(arr[2])
            maf   = float(arr[3])
            if maf < mafcutoff or maf > (1-mafcutoff):
                continue
            q     = float(arr[4])
            mu    = float(arr[5])
            sigma = float(arr[6])
            p     = float(arr[7])
            if sigma == 0:
                continue
            logp  = np.log10(p) if p != 0 else pvalue( (q - mu) / sigma)
            res.append(SNPRes(rsid=rsid, chrom=chrom, pos=pos, logp=-logp, maf=maf, target=None))
    return res

def read_cis(filepath):
    res = list()
    if not os.path.exists(filepath) or os.stat(filepath).st_size == 0:
        print("File empty or does not exist")
        return res
    with gzip.open(filepath, 'r') as mfile:
        next(mfile)
        for line in mfile:
            arr  = line.decode().strip().split("\t")
            rsid = arr[0]
            if rsid.startswith("chrX"):
                continue
            pos = int(rsid.split("_")[1])
            chrom = int(rsid.split("_")[0][3:])
            gene = arr[1].split(":")[-1].split(".")[0]
            maf  = float(arr[5])
            logp = np.log10(float(arr[6]))
            res.append(SNPRes(rsid=rsid, chrom=chrom, pos=pos, logp=-logp, target=gene, maf=maf))
    return res


In [2]:
# Filter by allowed snps according to MAF
basepath = "/cbscratch/franco/trans-eqtl/dev-pipeline/gtex_v8_lncRNA_freeze/"
baseoutdir = os.path.join(basepath, "GTExPortal_sqtl_analysis")
if not os.path.exists(baseoutdir): os.makedirs(baseoutdir)

MAF = 0.01
title = "maf{:g}".format(MAF*100)
maffile = "/cbscratch/franco/datasets/gtex_v8/genotypes/snpMAFs/{:s}_snp_CHR{:d}.maf"
randompath = "/usr/users/fsimone/vcfs_{:.2f}/".format(MAF)

print(title, maffile, randompath)

outdir = os.path.join(baseoutdir, title)
if not os.path.exists(outdir): os.makedirs(outdir)

# NTOT_SNPS = 0
# snp_maf_dict = collections.defaultdict(lambda:False)
# with open(maffile) as instream:
#     for line in instream:
#         snp_maf_dict[line.rstrip().split()[1]] = True
#         NTOT_SNPS += 1
# print("Total Number of SNPs at MAF {:g}%: {:d}".format(MAF*100, NTOT_SNPS))

maf1 /cbscratch/franco/datasets/gtex_v8/genotypes/snpMAFs/{:s}_snp_CHR{:d}.maf /usr/users/fsimone/vcfs_0.01/


In [3]:
# for a set of eQTLs, get me those targeting a specific type of targets
def cis_typespecific_eqtls(cistrans_target_eqtls, genetype_dict):
    cis_typespecific_eqtls = [x for x in cistrans_target_eqtls if genetype_dict[x.target]]
    
    uniq_cis_snps = list(set([x.rsid for x in cis_typespecific_eqtls]))
    unique_targets = list(set([x.target for x in cis_typespecific_eqtls]))
    return cis_typespecific_eqtls, uniq_cis_snps, unique_targets

# for a set of cis and trans-eQTLs, return the cis-trans ids and the cis-eQTLs with its targets
def cross_ref_cis_trans(trans_ids, cis_eqtls):
    cis_ids = list(set([x.rsid for x in cis_eqtls]))
    
    #Intersection between cis-eqtls (MatrixEQTL) and trans-eqtls (TEJAAS)
    cis_trans_eqtls_ids = list(set.intersection(set(trans_ids), set(cis_ids)))
    
    #set up a dict for fast look up later
    cis_trans_dict = dict()
    for x in cis_trans_eqtls_ids:
        cis_trans_dict[x] = True
    
    # List of cis-trans-eqtls with its target gene
    cis_target_eqtls = [x for x in cis_eqtls if cis_trans_dict.get(x.rsid, False)]

    return cis_trans_eqtls_ids, cis_target_eqtls

def crossref_trans_tejaas(transeqtls, cis_eqtls):
    trans_ids = [x.rsid for x in transeqtls]
    a, b = cross_ref_cis_trans(trans_ids, cis_eqtls)
    return a, b

def get_cistype_fractions(ciseqtls, valid_types, alltypes_dict):
    cistype_frac_dict = dict()
    NCIS = len(list(set([x.rsid for x in ciseqtls])))
    for gtype in valid_types:
        cishits = list()
        for ciseqtl in ciseqtls:
            if alltypes_dict[gtype][ciseqtl.target]:
                cishits.append(ciseqtl.rsid)
        NCIS_TYPE = len(list(set(cishits)))
        cistype_frac_dict[gtype] = NCIS_TYPE / NCIS
        # print("CIS_frac:", gtype, NCIS_TYPE, NCIS)
    return cistype_frac_dict

def sample_background_50000_simple(ciseqtls, randompath):
    randtrans = list()
    chroms    = [str(x) for x in np.arange(1,23)]
    for nid in ["{:03d}".format(x) for x in np.arange(1, 11)]:
        Nrand="50000"
        randomfile = randompath+"random_"+Nrand+"_"+nid

        rand_ids = list()
        for chrm in chroms:
            with open(os.path.join(randomfile, "chr{:s}.txt".format(chrm))) as ins:
                rand_ids += [line.rstrip() for line in ins]

        a, b = cross_ref_cis_trans(rand_ids, ciseqtls)
        randtrans.append( len(a) )
    return np.mean(randtrans)

def sample_background_50000(ciseqtls, randompath, valid_types = [], alltypes_dict = dict()):
    randtrans = list()
    res_dict  = collections.defaultdict(list)
    chroms    = [str(x) for x in np.arange(1,23)]
    for nid in ["{:03d}".format(x) for x in np.arange(1, 11)]:
        Nrand="50000"
        randomfile = randompath+"random_"+Nrand+"_"+nid

        rand_ids = list()
        for chrm in chroms:
            with open(os.path.join(randomfile, "chr{:s}.txt".format(chrm))) as ins:
                rand_ids += [line.rstrip() for line in ins]

        a, b = cross_ref_cis_trans(rand_ids, ciseqtls)
        randtrans.append( len(a) )
        if len(valid_types) > 0:
            for gtype in valid_types:
                cis_types_eqtls, uniq_cis_snps, uniq_targets = cis_typespecific_eqtls(b, alltypes_dict[gtype])
                # save only the number of uniq cistrans snps found for that gene type
                res_dict[gtype].append(len(uniq_cis_snps)) 
    return np.mean(randtrans), res_dict


def sample_binomial(n, p, NTIMES):
    array_n = list()
    for i in range(NTIMES):
        n_success = np.random.binomial(n, p)
        array_n.append(n_success)
    return array_n

In [4]:
# GENEINFO_FIELDS = ['name', 'ensembl_id', 'chrom', 'start', 'end', 'typ']
# class GeneInfo(collections.namedtuple('_GeneInfo', GENEINFO_FIELDS)):
#     __slots__ = ()
    
# def read_TFannot(infile):
#     TF_list = list()
#     with open(infile) as instream:
#         next(instream)
#         for line in instream:
#             arr = line.rstrip().split()
#             TF_list.append(GeneInfo(ensembl_id=arr[0], chrom=int(arr[1]), start=int(arr[2]), end=int(arr[3]), name=arr[4], typ="TF"))
#     return TF_list

# base_dir = "/cbscratch/franco/datasets"
# TF_annot = read_TFannot("../TF_annotation.txt")

# TF_dict = collections.defaultdict(dict)
# for g in TF_annot:
#     TF_dict[g.chrom][g.ensembl_id] = "TF"

# # Reformat genetype dict, we can add as many gene annotations as we want here
# alltypes_dict = collections.defaultdict(dict)
# genetypes = []
# for chrm in range(1,23):
#     gene_info_dict[chrm]
#     for k in gene_info_dict[chrm].keys():
#         genetype = gene_info_dict[chrm][k]
#         if genetype not in alltypes_dict:
#             alltypes_dict[genetype] = collections.defaultdict(lambda:False)
#             genetypes.append(genetype)
#         alltypes_dict[genetype][k] = True
#     # Add TF dictionary
#     for k in TF_dict[chrm].keys():
#         genetype = "TF"
#         if genetype not in alltypes_dict:
#             alltypes_dict[genetype] = collections.defaultdict(lambda:False)
#             genetypes.append(genetype)
#         alltypes_dict[genetype][k] = True

In [5]:
import json
json_file = "../gtex_v8_metadata.json"
with open(json_file) as instream:
    gtex_meta = json.load(instream)
    
tissue_file = "/usr/users/fsimone/trans-eqtl-pipeline/main/tissues.txt"
tissues, descriptions = utils.read_tissues(tissue_file)
tissue_names   = dict()
tissue_colors  = dict()
tissue_samples = dict()
for tshort, tfull in zip(tissues, descriptions):
    tissue_names[tshort] = tfull
    tissue_colors[tshort] = "#" + gtex_meta[tfull.replace(" ", "_")]["colorHex"]
    tissue_samples[tshort] = gtex_meta[tfull.replace(" ", "_")]["rnaSeqAndGenotypeSampleCount"]

In [None]:
gtexportal_dir = "/cbscratch/franco/datasets/gtex_v8/expression/gtex_portal/eQTLs/GTEx_Analysis_v8_sQTL/"
dataset = "gtex_v8"
# meqtl_expr  = "tmm_cclm"
tejaas_expr = "raw"
K = 30
pcutoff = 5e-8
MIN_TRANS = 1
MIN_CIS   = 1

res_dict = dict()
res_dict_cistype = collections.defaultdict(dict)
res_dict_randomtype = collections.defaultdict(dict)
for tissue in tissues:
    gtex_t = "-".join([dataset, tissue])
    
    snp_maf_dict = load_snp_maf(maffile, tissue)
    
    tejaas_file = os.path.join(basepath, tejaas_expr, gtex_t, "tejaas", "permnull_sb0.1_knn{:d}".format(K), "trans_eqtls_{:g}.txt".format(pcutoff))
    if not os.path.exists(tejaas_file):
        print("{:s} has no trans-eqtl results".format(tissue))
        continue
    transeqtls = tejaas(tejaas_file)
    # transeqtls = [x for x in transeqtls if snp_maf_dict[x.rsid]]
    
    if len(transeqtls) < MIN_TRANS:
        print("{:s} has less than {:d} trans-eqtls".format(tissue, MIN_TRANS))
        continue
    
    signif_cisfile = os.path.join(gtexportal_dir, "{:s}.v8.sqtl_signifpairs.txt.gz".format(tissue_names[tissue].replace(" ", "_")))
    if not os.path.exists(signif_cisfile) or os.stat(signif_cisfile).st_size == 0:
        print("{:s} has no cis-file in GTEx!".format(tissue_names[tissue]))
        continue
    ciseqtls = read_cis(signif_cisfile)
    cis_ids = list(set([x.rsid for x in ciseqtls]))
    
    if len(ciseqtls) < MIN_CIS:
        print("{:s} has less than {:d} cis-eqtls".format(tissue, MIN_CIS))
        continue
    
    cis_trans_eqtls_ids, cistrans_target_eqtls = crossref_trans_tejaas(transeqtls, ciseqtls)

    
    valid_types = []
    genetarget_counts = dict()
    for genetype in genetypes:
        cis_types_eqtls, uniq_cis_snps, unique_targets = cis_typespecific_eqtls(cistrans_target_eqtls, alltypes_dict[genetype])
        if len(uniq_cis_snps) > 0:
            # print(genetype, len(uniq_cis_snps), len(unique_targets))
            valid_types.append(genetype)
            genetarget_counts[genetype] = [cis_types_eqtls, uniq_cis_snps, unique_targets]
    
    # randtrans: contains avg number of random snps that are cis-trans-eQTLs
    # rand_res_dict: contains, for each genetype, a list with the number of those random cis-trans-eQTLs targeting that genetype
    randtrans, rand_res_dict = sample_background_50000(ciseqtls, randompath, valid_types, alltypes_dict)
    FRAC_CISTRANS = len(cis_trans_eqtls_ids) / len(transeqtls)
    FRAC_RANDOM_GWCISTRANS = randtrans / 50000 
    
    enrichment = FRAC_CISTRANS / FRAC_RANDOM_GWCISTRANS
   
    ncis = len(cis_ids)
    ntrans = len(transeqtls)
    ncistrans = len(cis_trans_eqtls_ids)
    
    randtrans1k_bin = sample_binomial(ntrans, FRAC_RANDOM_GWCISTRANS, 10000000)
    num_null = np.array(randtrans1k_bin) /  ntrans

    null_enrichments = num_null / FRAC_RANDOM_GWCISTRANS
    ecdf = ECDF(null_enrichments)
    pval = 1 - ecdf(enrichment)
    
    
    res_dict[tissue] = CisTrans(tissue=tissue, ncis=ncis, ntrans=ntrans, 
                                ncistrans=ncistrans, randtrans=FRAC_RANDOM_GWCISTRANS,
                                enrichment=enrichment, pval=pval)
    
    print(f"########## Tissue: {tissue} - {ntrans} trans-eqtls - {ncistrans} cis-trans-eqtls #########")
    print(f"{tissue:>20}        Enrichment: {enrichment:>g} - pval: {pval:>g}")
    
    cistype_fracs_dict = get_cistype_fractions(ciseqtls, valid_types, alltypes_dict)
    
    for vt in valid_types:
        # from the 50000 random SNPs, randtrans is the nº that are cistrans
        FRAC_RANDOM_TYPE_NULL = np.mean(np.array(rand_res_dict[vt])) / 50000 #randtrans
        FRAC_CIS_TYPE_NULL    = cistype_fracs_dict[vt]
        uniq_snps_targettype  = len(genetarget_counts[vt][1]) # contains --> genetarget_counts[genetype] = [cis_types_eqtls, uniq_cis_snps, unique_targets]
        FRAC_CISTYPE =  uniq_snps_targettype / ntrans
        
        if FRAC_CIS_TYPE_NULL != 1.0:        
            cis_type_enrichment    = FRAC_CISTYPE / FRAC_CIS_TYPE_NULL
            # Calculate pvalue
            randtrans1k_bin = sample_binomial(ntrans, FRAC_CIS_TYPE_NULL, 10000000)
            num_null = np.array(randtrans1k_bin) /  ntrans
            null_enrichments = num_null / FRAC_CIS_TYPE_NULL
            ecdf = ECDF(null_enrichments)
            cis_pval = 1 - ecdf(cis_type_enrichment)
        else:
            cis_pval = 1.0
            cis_type_enrichment = 1.0
          
        random_type_enrichment = FRAC_CISTYPE / FRAC_RANDOM_TYPE_NULL
        # Calculate pvalue
        randtrans1k_bin = sample_binomial(ntrans, FRAC_RANDOM_TYPE_NULL, 10000000)
        num_null = np.array(randtrans1k_bin) /  ntrans
        null_enrichments = num_null / FRAC_RANDOM_TYPE_NULL
        ecdf = ECDF(null_enrichments)
        random_pval = 1 - ecdf(random_type_enrichment)
                  
        print(f"{vt:>20}    CIS Enrichment: {cis_type_enrichment:>g} - pval: {cis_pval:>g}")
        print(f"{vt:>20} RANDOM Enrichment: {random_type_enrichment:>g} - pval: {random_pval:>g}")
    
        res_dict_cistype[tissue][vt]    = CisTrans_type(tissue=tissue, genetype=vt,
                                                        hits=uniq_snps_targettype,
                                                        frac=FRAC_CIS_TYPE_NULL,
                                                        enrichment=cis_type_enrichment, pval=cis_pval)
        res_dict_randomtype[tissue][vt] = CisTrans_type(tissue=tissue, genetype=vt, 
                                                        hits=uniq_snps_targettype,
                                                        frac=FRAC_RANDOM_TYPE_NULL,
                                                        enrichment=random_type_enrichment, pval=random_pval)

In [8]:
gtexportal_dir = "/cbscratch/franco/datasets/gtex_v8/expression/gtex_portal/eQTLs/GTEx_Analysis_v8_sQTL/"
dataset = "gtex_v8"
# meqtl_expr  = "tmm_cclm"
tejaas_expr = "raw"
K = 30
pcutoff = 5e-8
MIN_TRANS = 1
MIN_CIS   = 1

res_dict = dict()
res_target_dict = dict()
for tissue in tissues:
    gtex_t = "-".join([dataset, tissue])
    
    snp_maf_dict = load_snp_maf(maffile, tissue)
    
    tejaas_file = os.path.join(basepath, tejaas_expr, gtex_t, "tejaas", "permnull_sb0.1_knn{:d}".format(K), "trans_eqtls_{:g}.txt".format(pcutoff))
    if not os.path.exists(tejaas_file):
        print("{:s} has no trans-eqtl results".format(tissue))
        continue
    transeqtls = tejaas(tejaas_file)
    # transeqtls = [x for x in transeqtls if snp_maf_dict[x.rsid]]
    
    if len(transeqtls) < MIN_TRANS:
        print("{:s} has less than {:d} trans-eqtls".format(tissue, MIN_TRANS))
        continue
    
    signif_cisfile = os.path.join(gtexportal_dir, "{:s}.v8.sqtl_signifpairs.txt.gz".format(tissue_names[tissue].replace(" ", "_")))
    if not os.path.exists(signif_cisfile) or os.stat(signif_cisfile).st_size == 0:
        print("{:s} has no cis-file in GTEx!".format(tissue_names[tissue]))
        continue
    ciseqtls = read_cis(signif_cisfile)
    cis_ids = list(set([x.rsid for x in ciseqtls]))
    
    if len(ciseqtls) < MIN_CIS:
        print("{:s} has less than {:d} cis-eqtls".format(tissue, MIN_CIS))
        continue
    
    cis_trans_eqtls_ids, cistrans_target_eqtls = crossref_trans_tejaas(transeqtls, ciseqtls)
    
    randtrans = sample_background_50000_simple(ciseqtls, randompath)
    FRAC_CISTRANS = len(cis_trans_eqtls_ids) / len(transeqtls)
    FRAC_RANDOM_GWCISTRANS = randtrans / 50000 
    
    enrichment = FRAC_CISTRANS / FRAC_RANDOM_GWCISTRANS
   
    ncis = len(cis_ids)
    ntrans = len(transeqtls)
    ncistrans = len(cis_trans_eqtls_ids)
    
    randtrans1k_bin = sample_binomial(ntrans, FRAC_RANDOM_GWCISTRANS, 10000000)
    num_null = np.array(randtrans1k_bin) /  ntrans

    null_enrichments = num_null / FRAC_RANDOM_GWCISTRANS
    ecdf = ECDF(null_enrichments)
    pval = 1 - ecdf(enrichment)
    
    res_dict[tissue] = CisTrans(tissue=tissue, ncis=ncis, ntrans=ntrans, 
                                ncistrans=ncistrans, randtrans=FRAC_RANDOM_GWCISTRANS,
                                enrichment=enrichment, pval=pval)
    
    res_target_dict[tissue] = cistrans_target_eqtls
    
    print(f"########## Tissue: {tissue} - {ntrans} trans-eqtls - {ncistrans} cis-trans-eqtls #########")
    print(f"{tissue:>20}        Enrichment: {enrichment:>g} - pval: {pval:>g}")



########## Tissue: as - 1280 trans-eqtls - 26 cis-trans-eqtls #########
                  as        Enrichment: 0.40798 - pval: 1
########## Tissue: av - 671 trans-eqtls - 29 cis-trans-eqtls #########
                  av        Enrichment: 1.14276 - pval: 0.199263
########## Tissue: ag - 349 trans-eqtls - 99 cis-trans-eqtls #########
                  ag        Enrichment: 14.4272 - pval: 0
########## Tissue: aa - 2615 trans-eqtls - 182 cis-trans-eqtls #########
                  aa        Enrichment: 2.06536 - pval: 0
########## Tissue: ac - 3479 trans-eqtls - 90 cis-trans-eqtls #########
                  ac        Enrichment: 1.54426 - pval: 3.93e-05
########## Tissue: at - 920 trans-eqtls - 14 cis-trans-eqtls #########
                  at        Enrichment: 0.324506 - pval: 1
########## Tissue: bam - 2598 trans-eqtls - 59 cis-trans-eqtls #########
                 bam        Enrichment: 3.93857 - pval: 0
########## Tissue: ban - 2064 trans-eqtls - 4 cis-trans-eqtls #########
    

In [11]:
outcisfilename = os.path.join(outdir,"CisSQTL_enrichment_results_"+title+".txt")
# if os.path.exists(outcisfilename):
#     print("Warning! File exists")
#     raise
with open(outcisfilename, 'w') as outstream:
    for tissue in tissues:
        if tissue in res_dict:
            line = f"{tissue}\t{res_dict[tissue].ncis}\t{res_dict[tissue].ntrans}\t{res_dict[tissue].ncistrans}\t{res_dict[tissue].randtrans}\t{res_dict[tissue].enrichment}\t{res_dict[tissue].pval}\n"
            outstream.write(line)

targets_outfile = os.path.join(outdir,"CistransSQTL_targets.txt")
# if os.path.exists(targets_outfile):
#     print("Warning! File exists")
#     raise
with open(targets_outfile, 'w') as outstream:
    for tissue in tissues:
        if tissue in res_target_dict:
            for snp in res_target_dict[tissue]:
                line = f"{tissue}\t{snp.rsid}\t{snp.logp}\t{snp.target}\t{snp.maf}\n"
                outstream.write(line)
            
# outcistypefilename = os.path.join(outdir,"CisSQTL_target_enrichment_results_"+title+".txt")
# if os.path.exists(outcistypefilename):
#     print("Warning! File exists")
#     raise
# with open(outcistypefilename, 'w') as outstream:
#     for tissue in tissues:
#         if tissue in res_dict_cistype:
#             for genetype in res_dict_cistype[tissue].keys():
#                 cis_hits = res_dict_cistype[tissue][genetype].hits
#                 cis_frac = res_dict_cistype[tissue][genetype].frac
#                 cis_e    = res_dict_cistype[tissue][genetype].enrichment
#                 cis_pval = res_dict_cistype[tissue][genetype].pval
#                 random_hits = res_dict_randomtype[tissue][genetype].hits
#                 random_frac = res_dict_randomtype[tissue][genetype].frac
#                 random_e    = res_dict_randomtype[tissue][genetype].enrichment
#                 random_pval = res_dict_randomtype[tissue][genetype].pval
#                 cisline    = f"{tissue}\t{genetype}\tCIS\t{cis_hits}\t{cis_frac}\t{cis_e}\t{cis_pval}\n"
#                 randomline = f"{tissue}\t{genetype}\tRANDOM\t{random_hits}\t{random_frac}\t{random_e}\t{random_pval}\n"
#                 outstream.write(cisline)
#                 outstream.write(randomline)

In [None]:
target_data_dict = dict()
targets_outfile = os.path.join(outdir,"CistransSQTL_targets.txt")
with open(targets_outfile) as instream: