In [1]:
import os, sys
sys.path.append('../')
from utils import utils
import json
import numpy as np

tissue_file = "/usr/users/fsimone/trans-eqtl-pipeline/analysis/plots/tissue_table.txt"
tissues, descriptions, tstrings = utils.read_tissues_str(tissue_file)

json_file = "../gtex_v8_metadata.json"
with open(json_file) as instream:
    gtex_meta = json.load(instream)
tissue_colors = dict()
tissue_names = dict()
tissue_samples = dict()

tshorst_dict = dict(zip(tstrings, tissues))

for tshort, tfull in zip(tissues, descriptions):
    tissue_names[tshort] = tfull
    tissue_colors[tshort] = "#" + gtex_meta[tfull.replace(" ", "_")]["colorHex"]
    tissue_samples[tshort] = gtex_meta[tfull.replace(" ", "_")]["rnaSeqAndGenotypeSampleCount"]

special_tissues = ['ag', 'haa', 'liv', 'msg', 'pan', 'pit', 'si', 'spl', 'va', 'wb']

In [3]:
import mpmath
import collections
mpmath.mp.dps = 50
def pvalue(x): return float(mpmath.log10(1 - 0.5 * (1 + mpmath.erf(x/mpmath.sqrt(2)))))

SNPRES_FIELDS = ['rsid', 'chrom', 'pos', 'logp', 'target', 'maf']
class SNPRes(collections.namedtuple('_SNPRes', SNPRES_FIELDS)):
    __slots__ = ()

def tejaas(filepath, mafcutoff=0.01):
    res = list()
    with open(filepath, 'r') as mfile:
        next(mfile)
        for line in mfile:
            arr   = line.strip().split("\t")
            rsid  = arr[0]
            pos   = int(arr[2])
            p     = float(arr[7])
            chrom = int(arr[1])
            q     = float(arr[4])
            mu    = float(arr[5])
            sigma = float(arr[6])
            maf   = float(arr[3])
            if maf < mafcutoff or maf > (1-mafcutoff):
                continue
            if sigma == 0:
                continue
            logp  = np.log10(p) if p != 0 else pvalue( (q - mu) / sigma)
            res.append(SNPRes(rsid=rsid, chrom=chrom, pos=pos, logp=-logp, target=None, maf=maf))
    return res

alltranseqtls = list()
transeqtl_dict = dict()
# tejaas_file = "/cbscratch/franco/trans-eqtl/protein_coding_lncRNA_gamma01_knn30_cut5e-8/{:s}/trans_eqtls.txt"
for tissue in tissues:
    if tissue in special_tissues:
        tejaas_file = "/cbscratch/franco/trans-eqtl/protein_coding_lncRNA_gamma0006_knn30_cut5e-8/{:s}/trans_eqtls.txt"
    else:
        tejaas_file = "/cbscratch/franco/trans-eqtl/protein_coding_lncRNA_gamma01_knn30_cut5e-8/{:s}/trans_eqtls.txt"
    tissue_tejaas_file = tejaas_file.format(tissue)
    tissue_trans_eqtls = tejaas(tissue_tejaas_file)
    transeqtl_dict[tissue] = tissue_trans_eqtls
    alltranseqtls += tissue_trans_eqtls
    

In [4]:
alltranseqtls_ids = [x.rsid for x in alltranseqtls]
len(set(alltranseqtls_ids))

85108

In [5]:
gtex_file = "/cbscratch/franco/datasets/gtex_v8/expression/gtex_portal/eQTLs/GTEx_Analysis_v8_trans_eGenes_fdr05.txt"
gtex_trans_dict = collections.defaultdict(list)
gtex_trans_list = list()
gtex_trans_dict = collections.defaultdict(list)
with open(gtex_file) as instream:
    next(instream)
    for line in instream:
        arr = line.strip().split("\t")
        tstring = gtex_meta[arr[0]]['tissueSiteDetailAbbr']
        gtex_trans_list.append(arr[6])
        gtex_trans_dict[tshorst_dict[tstring]].append(arr[6])
print(f"GTEx nº trans-eQTLs: {len(gtex_trans_list)}")
print(f"GTEx nº unique trans-eQTLs: {len(set(gtex_trans_list))}")
gtex_tejaas_intersection = set.intersection(set(gtex_trans_list), set(alltranseqtls_ids))
all_global_matches = list()
all_tissue_matches = list()
for tissue in tissues:
    res = [x for x in transeqtl_dict[tissue] if x.rsid in gtex_tejaas_intersection]
    if len(res) > 0:
        print(f"TEJAAS tissue: {tissue}")
        print(f"Global matches {[x.rsid for x in res]}")
        all_global_matches += [x.rsid for x in res]
        for k in gtex_trans_dict.keys():
            for snp in res:
                if snp.rsid in gtex_trans_dict[k] and tissue == k:
                    print(f"--->{k} - {snp.rsid}")
                    all_tissue_matches += [snp.rsid]

# chr17_61209908_C_T_b38 originally in Nerve Tibial
# chr2_218830602_T_C_b38 originally in Cell Cultured fibroblasts
# chr14_87900975_C_T_b38 originally in Whole Blood
# chr18_7175423_G_A_b38 originally  in Testis
print(f"\n\nTotal Replicated trans-eQTLs: {len(all_global_matches)}")
print(f"Tissue Replicated trans-eQTLs: {len(all_tissue_matches)}")

GTEx nº trans-eQTLs: 162
GTEx nº unique trans-eQTLs: 142
TEJAAS tissue: ag
Global matches ['chr9_25575432_A_G_b38', 'chr10_15169740_A_G_b38', 'chr18_52052163_G_A_b38']
TEJAAS tissue: ac
Global matches ['chr18_52052163_G_A_b38']
TEJAAS tissue: bam
Global matches ['chr2_218830602_T_C_b38']
TEJAAS tissue: ban
Global matches ['chr2_218830602_T_C_b38', 'chr14_87900975_C_T_b38']
TEJAAS tissue: ms
Global matches ['chr2_218830602_T_C_b38']
TEJAAS tissue: pro
Global matches ['chr9_25575432_A_G_b38', 'chr18_7175423_G_A_b38']
TEJAAS tissue: si
Global matches ['chr9_25575432_A_G_b38']
TEJAAS tissue: tes
Global matches ['chr17_61209908_C_T_b38']
TEJAAS tissue: thy
Global matches ['chr9_97826557_A_G_b38']
--->thy - chr9_97826557_A_G_b38
TEJAAS tissue: wb
Global matches ['chr6_28324929_T_C_b38', 'chr17_61209908_C_T_b38']


Total Replicated trans-eQTLs: 15
Tissue Replicated trans-eQTLs: 1


In [None]:
# GTEx nº trans-eQTLs: 162
# GTEx nº unique trans-eQTLs: 142
# TEJAAS tissue: ag
# Global matches ['chr9_25575432_A_G_b38', 'chr10_15169740_A_G_b38', 'chr18_52052163_G_A_b38']
# TEJAAS tissue: ac
# Global matches ['chr18_52052163_G_A_b38']
# TEJAAS tissue: bam
# Global matches ['chr2_218830602_T_C_b38']
# TEJAAS tissue: ban
# Global matches ['chr2_218830602_T_C_b38', 'chr14_87900975_C_T_b38']
# TEJAAS tissue: liv
# Global matches ['chr9_25575432_A_G_b38']
# TEJAAS tissue: ms
# Global matches ['chr2_218830602_T_C_b38']
# TEJAAS tissue: pro
# Global matches ['chr9_25575432_A_G_b38', 'chr18_7175423_G_A_b38']
# TEJAAS tissue: si
# Global matches ['chr9_25575432_A_G_b38', 'chr17_61209908_C_T_b38', 'chr18_47702814_T_C_b38']
# TEJAAS tissue: spl
# Global matches ['chr17_61209908_C_T_b38']
# TEJAAS tissue: tes
# Global matches ['chr17_61209908_C_T_b38']
# TEJAAS tissue: thy
# Global matches ['chr9_97826557_A_G_b38']
# --->thy - chr9_97826557_A_G_b38
# TEJAAS tissue: wb
# Global matches ['chr6_28324929_T_C_b38', 'chr17_61209908_C_T_b38']


# Total Replicated trans-eQTLs: 19
# Tissue Replicated trans-eQTLs: 1

In [6]:
for tissue in tissues:
    print(tissue, len(transeqtl_dict[tissue]))

as 2011
av 1664
ag 17983
aa 5760
ac 7551
at 1463
bam 4780
ban 3890
bca 27
bceh 270
bce 96
bco 113
bfr 58
bhi 170
bhy 325
bnu 1451
bpu 14
bsp 186
bsu 202
br 1764
ebv 1241
fib 767
cols 320
colt 5282
esog 71
esom 343
esomu 68
haa 340
hlv 953
kc 39
liv 8566
lu 1975
msg 1570
ms 11347
nt 2457
ov 321
pan 624
pit 1605
pro 4193
snse 3010
sse 1548
si 13439
spl 5706
sto 905
tes 5452
thy 4097
ut 8563
va 665
wb 33706
