In [34]:
%load_ext autoreload
%autoreload 2
import os, sys
import numpy as np
import collections
sys.path.append('../../')
sys.path.append('/usr/users/fsimone/tejaas')

SNPRES_FIELDS = ['rsid', 'chrom', 'pos', 'logp', 'maf']
class SNPRes(collections.namedtuple('_SNPRes', SNPRES_FIELDS)):
    __slots__ = ()

def tejaas(filepath):
    res = list()
    with open(filepath, 'r') as mfile:
        next(mfile)
        for line in mfile:
            arr   = line.strip().split("\t")
            rsid  = arr[0]
            #chrom = rsid.split("_")[0][3:]
            chrom = int(arr[1])
            pos   = int(arr[2])
            p     = float(arr[7])
            logp  = np.log10(p) if p!=0 else np.log10(10e-30)
            maf   = float(arr[3])
            res.append(SNPRes(rsid=rsid, chrom=chrom, pos=pos, logp=-logp, maf=maf))
    return res    
    
basename = "protein_coding_lncRNA_{:s}_knn30_cut5e-8_fst_high"
gammas = ["gamma01", "gamma0006"]

basedir = "/cbscratch/franco/trans-eqtl"

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [35]:
from utils import utils
# tissue_file = "/usr/users/fsimone/trans-eqtl-pipeline/analysis/plots/tissues.txt"
tissue_file = "/usr/users/fsimone/trans-eqtl-pipeline/analysis/plots/tissue_table.txt"
tissuenames, descriptions = utils.read_tissues(tissue_file)
tissue_names = dict(zip(tissuenames, descriptions))

special_tissues = ['ag', 'haa', 'liv', 'msg', 'pan', 'pit', 'si', 'spl', 'va', 'wb']
brain_tissues = ['bam', 'ban', 'bca', 'bceh', 'bce', 'bco', 'bfr', 'bhi', 'bhy', 'bnu', 'bpu', 'bsp', 'bsu']

In [36]:
basepath = "/cbscratch/franco/trans-eqtl"

trans_dict = dict()
for gamma in gammas:
    config = basename.format(gamma)
    trans_dict[config] = dict()

    for tissue in tissuenames:
        tejaas_file = os.path.join(basepath, config, tissue, "trans_eqtls_ldpruned.txt")
        if not os.path.exists(tejaas_file):
            print("{:s} has no trans-eqtl results".format(tissue))
            continue
        print("Loading ", tissue, end="")
        transeqtls = tejaas(tejaas_file)
        if len(transeqtls) > 0:
            trans_dict[config][tissue] = transeqtls
            print(" has {:d} trans-eqtls".format(len(transeqtls)))
        else:
            trans_dict[config][tissue] = []
            print(" has 0 trans-eqtls")

Loading  as has 166 trans-eqtls
Loading  av has 421 trans-eqtls
Loading  ag has 184 trans-eqtls
Loading  aa has 1217 trans-eqtls
Loading  ac has 1509 trans-eqtls
Loading  at has 19 trans-eqtls
Loading  bam has 1059 trans-eqtls
Loading  ban has 835 trans-eqtls
Loading  bca has 1 trans-eqtls
Loading  bceh has 7 trans-eqtls
Loading  bce has 1 trans-eqtls
Loading  bco has 2 trans-eqtls
Loading  bfr has 1 trans-eqtls
Loading  bhi has 35 trans-eqtls
Loading  bhy has 76 trans-eqtls
Loading  bnu has 349 trans-eqtls
Loading  bpu has 2 trans-eqtls
Loading  bsp has 52 trans-eqtls
Loading  bsu has 28 trans-eqtls
Loading  br has 432 trans-eqtls
Loading  ebv has 288 trans-eqtls
Loading  fib has 172 trans-eqtls
Loading  cols has 72 trans-eqtls
Loading  colt has 1158 trans-eqtls
Loading  esog has 5 trans-eqtls
Loading  esom has 3 trans-eqtls
Loading  esomu has 1 trans-eqtls
Loading  haa has 0 trans-eqtls
Loading  hlv has 234 trans-eqtls
Loading  kc has 10 trans-eqtls
Loading  liv has 37 trans-eqtls
Lo

In [37]:
outdir = "summary_transeqtls_fst_high"
if not os.path.exists(outdir): os.makedirs(outdir)

for gamma in gammas:
    config = basename.format(gamma)
    with open(os.path.join(outdir, f"transeqtls_per_tissue_{gamma}_cut5e-8.txt"), 'w') as outstream:
        for k in trans_dict[config].keys():
            outstream.write(f"{k}\t{len(trans_dict[config][k])}\n")

In [38]:
trans_dict_full = dict()
for gamma in gammas:
    config = basename.format(gamma)
    trans_dict_full[config] = dict()

    for tissue in tissuenames:
        tejaas_file = os.path.join(basepath, config, tissue, "trans_eqtls.txt")
        if not os.path.exists(tejaas_file):
            print("{:s} has no trans-eqtl results".format(tissue))
            continue
        print("Loading ", tissue, end="")
        transeqtls = tejaas(tejaas_file)
        if len(transeqtls) > 0:
            trans_dict_full[config][tissue] = transeqtls
            print(" has {:d} trans-eqtls".format(len(transeqtls)))
        else:
            trans_dict_full[config][tissue] = []
            print(" has 0 trans-eqtls")

Loading  as has 527 trans-eqtls
Loading  av has 1561 trans-eqtls
Loading  ag has 695 trans-eqtls
Loading  aa has 5454 trans-eqtls
Loading  ac has 7457 trans-eqtls
Loading  at has 55 trans-eqtls
Loading  bam has 4353 trans-eqtls
Loading  ban has 3587 trans-eqtls
Loading  bca has 1 trans-eqtls
Loading  bceh has 15 trans-eqtls
Loading  bce has 3 trans-eqtls
Loading  bco has 2 trans-eqtls
Loading  bfr has 1 trans-eqtls
Loading  bhi has 114 trans-eqtls
Loading  bhy has 234 trans-eqtls
Loading  bnu has 1397 trans-eqtls
Loading  bpu has 3 trans-eqtls
Loading  bsp has 172 trans-eqtls
Loading  bsu has 201 trans-eqtls
Loading  br has 1523 trans-eqtls
Loading  ebv has 1198 trans-eqtls
Loading  fib has 647 trans-eqtls
Loading  cols has 309 trans-eqtls
Loading  colt has 5215 trans-eqtls
Loading  esog has 25 trans-eqtls
Loading  esom has 47 trans-eqtls
Loading  esomu has 4 trans-eqtls
Loading  haa has 0 trans-eqtls
Loading  hlv has 921 trans-eqtls
Loading  kc has 33 trans-eqtls
Loading  liv has 188 

In [39]:
trans_counter = collections.defaultdict(int)

for gamma in gammas:
    config = basename.format(gamma)
    for tissue in tissuenames:
        if tissue in brain_tissues or tissue in special_tissues:
            continue
        else:
            if tissue in trans_dict_full[config]:
                for snp in trans_dict_full[config][tissue]:
                    trans_counter[snp.rsid] += 1

In [40]:
count_counter = collections.defaultdict(int)
for varid in trans_counter:
    count_counter[trans_counter[varid]] += 1
with open(os.path.join(outdir, "overlapping_transeqtls_nobrain_alt_haa_pan_spl_wb.txt"), 'w') as outstream:
    for i in sorted(list(count_counter.keys())):
        outstream.write("{:d} {:d}\n".format(count_counter[i], i))