In [5]:
import numpy as np
import pygtrie
from statsmodels.stats import multitest
import collections
import os

import matplotlib.pyplot as plt
#plt.switch_backend('agg')
import matplotlib
import sys
sys.path.append('../../')
from utils import utils
import json

# from utils import mpl_stylesheet
# mpl_stylesheet.banskt_presentation(fontfamily = 'latex-clearsans', fontsize = 24, colors = 'banskt', dpi = 72)


tissue_file = "../../plots/tissue_table.txt"
json_file   = "../../gtex_v8_metadata.json"
tshorts, tfulls, tstrings = utils.read_tissues_str(tissue_file)
with open(json_file) as instream:
    gtex_meta = json.load(instream)
tissue_colors = dict()
tissue_names = dict()
tissue_nsamples = dict()

for tshort, tfull, tstring in zip(tshorts, tfulls, tstrings):
    if tshort in tshorts:
        tissue_names[tshort] = tstring
        tissue_colors[tshort] = "#" + gtex_meta[tfull]["colorHex"]
        tissue_nsamples[tshort] = gtex_meta[tfull]["rnaSeqSampleCount"]

In [31]:
def read_tejaas(filename, pval_limit):
    rsidlist = list()
    bplist   = list()
    pvallist = list()
    qlist    = list()
    maflist  = list()
    mulist   = list()
    siglist  = list()
    with open(filename, 'r') as instream:
        next(instream)
        for line in instream:
            linesplit = line.strip().split()
            rsid = linesplit[0]
            bppos = int(linesplit[2])
            pval = float(linesplit[7])
            qval = float(linesplit[4])
            mu   = float(linesplit[5])
            sig  = float(linesplit[6])
            maf  = float(linesplit[3])
            if pval >= pval_limit:
                rsidlist.append(rsid)
#             bplist.append(bppos)
#             pvallist.append(pval)
#             qlist.append(qval)
#             maflist.append(maf)
#             mulist.append(mu)
#             siglist.append(sig)
    return rsidlist #, pvallist, bplist, qlist, maflist, mulist, siglist

def read_tejaas_all_chrom(filefmt, chrmlist, pval_limit=0.1):
    gwrsids = list()
    gwnsnps = dict()
    
    for chrm in chrmlist:
        filepath = filefmt.format(chrm)
        rsids = read_tejaas(filepath, pval_limit)
        gwrsids += rsids
        
    return gwrsids

def write_snp_list(outfilename, snplist):
    with open(outfilename, 'w') as outstream:
        outstream.write("ID\n")
        for varid in snplist:
            outstream.write(f"{varid}\n")

In [32]:
altsb_tissues = ['haa', 'pan', 'spl', 'wb']

resdir  = "/cbscratch/franco/from_saikat/gtex_v8_202003"

rrfile = "rr.txt"
chrmlist = np.arange(1,23) #[10,11]
pval_lim = 0.1
NSNPs = 1000

for tissue in tshorts:
    if tissue in altsb_tissues:
        sb_variant = "permnull_sb0.006_knn30"
    else:
        sb_variant = "permnull_sb0.1_knn30"
    filefmt = f'{resdir}/{tissue}/tejaas/raw_std/{sb_variant}/chr' + "{:d}" + f'/{rrfile}'
    print(filefmt)
    gwrsids = read_tejaas_all_chrom(filefmt, chrmlist, pval_lim)
    choose_ix = np.sort(np.random.choice(len(gwrsids), size = NSNPs, replace = False))
    chosen_snps = [gwrsids[x] for x in choose_ix]
    outfilename = f"non_eqtls/{tissue}_non_eqtls.txt"
    write_snp_list(outfilename, chosen_snps)

/cbscratch/franco/from_saikat/gtex_v8_202003/as/tejaas/raw_std/permnull_sb0.1_knn30/chr{:d}/rr.txt
/cbscratch/franco/from_saikat/gtex_v8_202003/av/tejaas/raw_std/permnull_sb0.1_knn30/chr{:d}/rr.txt
/cbscratch/franco/from_saikat/gtex_v8_202003/ag/tejaas/raw_std/permnull_sb0.1_knn30/chr{:d}/rr.txt
/cbscratch/franco/from_saikat/gtex_v8_202003/aa/tejaas/raw_std/permnull_sb0.1_knn30/chr{:d}/rr.txt
/cbscratch/franco/from_saikat/gtex_v8_202003/ac/tejaas/raw_std/permnull_sb0.1_knn30/chr{:d}/rr.txt
/cbscratch/franco/from_saikat/gtex_v8_202003/at/tejaas/raw_std/permnull_sb0.1_knn30/chr{:d}/rr.txt
/cbscratch/franco/from_saikat/gtex_v8_202003/bam/tejaas/raw_std/permnull_sb0.1_knn30/chr{:d}/rr.txt
/cbscratch/franco/from_saikat/gtex_v8_202003/ban/tejaas/raw_std/permnull_sb0.1_knn30/chr{:d}/rr.txt
/cbscratch/franco/from_saikat/gtex_v8_202003/bca/tejaas/raw_std/permnull_sb0.1_knn30/chr{:d}/rr.txt
/cbscratch/franco/from_saikat/gtex_v8_202003/bceh/tejaas/raw_std/permnull_sb0.1_knn30/chr{:d}/rr.txt
/cbsc