In [6]:
import os, sys
sys.path.append('../')
from utils import utils
import json
import numpy as np

tissue_file = "/usr/users/fsimone/trans-eqtl-pipeline/main/tissues.txt"
tissues, descriptions = utils.read_tissues(tissue_file)

json_file = "../gtex_v8_metadata.json"
with open(json_file) as instream:
    gtex_meta = json.load(instream)
tissue_colors = dict()
tissue_names = dict()
tissue_samples = dict()

for tshort, tfull in zip(tissues, descriptions):
    tissue_names[tshort] = tfull
    tissue_colors[tshort] = "#" + gtex_meta[tfull.replace(" ", "_")]["colorHex"]
    tissue_samples[tshort] = gtex_meta[tfull.replace(" ", "_")]["rnaSeqAndGenotypeSampleCount"]


In [9]:
import mpmath
import collections
mpmath.mp.dps = 50
def pvalue(x): return float(mpmath.log10(1 - 0.5 * (1 + mpmath.erf(x/mpmath.sqrt(2)))))

SNPRES_FIELDS = ['rsid', 'chrom', 'pos', 'logp', 'target', 'maf']
class SNPRes(collections.namedtuple('_SNPRes', SNPRES_FIELDS)):
    __slots__ = ()

def tejaas(filepath, mafcutoff=0.01):
    res = list()
    with open(filepath, 'r') as mfile:
        next(mfile)
        for line in mfile:
            arr   = line.strip().split("\t")
            rsid  = arr[0]
            pos   = int(arr[1])
            p     = float(arr[5])
            chrom = int(arr[6])
            q     = float(arr[2])
            mu    = float(arr[3])
            sigma = float(arr[4])
            maf   = float(arr[7])
            if maf < mafcutoff or maf > (1-mafcutoff):
                continue
            if sigma == 0:
                continue
            logp  = np.log10(p) if p != 0 else pvalue( (q - mu) / sigma)
            res.append(SNPRes(rsid=rsid, chrom=chrom, pos=pos, logp=-logp, target=None, maf=maf))
    return res

alltranseqtls = list()
transeqtl_dict = dict()
tejaas_file = "/cbscratch/franco/trans-eqtl/dev-pipeline/gtex_v8_lncRNA_freeze/summary_5e-08/{:s}/tejaas/trans_eqtls.txt"
for tissue in tissues:
    tissue_tejaas_file = tejaas_dir.format(tissue)
    tissue_trans_eqtls = tejaas(tissue_tejaas_file)
    transeqtl_dict[tissue] = tissue_trans_eqtls
    alltranseqtls += tissue_trans_eqtls
    

In [10]:
alltranseqtls_ids = [x.rsid for x in alltranseqtls]
len(set(alltranseqtls_ids))

21610

In [22]:
gtex_file = "/cbscratch/franco/datasets/gtex_v8/expression/gtex_portal/eQTLs/GTEx_Analysis_v8_trans_eGenes_fdr05.txt"
gtex_trans_dict = collections.defaultdict(list)
gtex_trans_list = list()
with open(gtex_file) as instream:
    next(instream)
    for line in instream:
        arr = line.strip().split("\t")
        gtex_trans_list.append(arr[6])
print(len(gtex_trans_list))
print(len(set(gtex_trans_list)))
gtex_tejaas_intersection = set.intersection(set(gtex_trans_list), set(alltranseqtls_ids))
for tissue in tissues:
    res = [x for x in transeqtl_dict[tissue] if x.rsid in gtex_tejaas_intersection]
    if len(res) > 0:
        print(tissue, [x.rsid for x in res])

# chr17_61209908_C_T_b38 originally in Nerve Tibial
# chr2_218830602_T_C_b38 originally in Cell Cultured fibroblasts
# chr14_87900975_C_T_b38 originally in Whole Blood
# chr18_7175423_G_A_b38 originally  in Testis

162
142
bam ['chr2_218830602_T_C_b38']
ban ['chr2_218830602_T_C_b38', 'chr14_87900975_C_T_b38']
ms ['chr2_218830602_T_C_b38', 'chr17_61209908_C_T_b38']
pro ['chr18_7175423_G_A_b38']
