In [8]:
import numpy as np
import collections
import os
import time
import matplotlib.pyplot as plt
#plt.switch_backend('agg')
import matplotlib
import sys
sys.path.append('../')
import pandas as pd
# from utils import mpl_stylesheet
# mpl_stylesheet.banskt_presentation(fontfamily = 'latex-clearsans', fontsize = 24, colors = 'banskt', dpi = 72)

In [9]:
def read_tejaas(filename):
    rsidlist = list()
    bplist   = list()
    pvallist = list()
    qlist    = list()
    maflist  = list()
    with open(filename, 'r') as instream:
        next(instream)
        for line in instream:
            linesplit = line.strip().split()
            rsid = linesplit[0]
            bppos = int(linesplit[2])
            pval = float(linesplit[7])
            # qval = float(linesplit[4])
            maf  = float(linesplit[3])
            rsidlist.append(rsid)
            bplist.append(bppos)
            pvallist.append(pval)
            # qlist.append(qval)
            maflist.append(maf)
    return rsidlist, pvallist, bplist, qlist, maflist

def read_tejaas_old(filename):
    rsidlist = list()
    bplist   = list()
    pvallist = list()
    qlist    = list()
    maflist  = list()
    with open(filename, 'r') as instream:
        next(instream)
        for line in instream:
            linesplit = line.strip().split("\t")
            rsid = linesplit[0]
            bppos = int(linesplit[1])
            pval = float(linesplit[5])
            # qval = float(linesplit[2])
            maf  = float(linesplit[7])
            rsidlist.append(rsid)
            bplist.append(bppos)
            pvallist.append(pval)
            # qlist.append(qval)
            maflist.append(maf)
    return rsidlist, pvallist, bplist, qlist, maflist

def read_tejaas_all_chrom(filefmt, chrmlist):
    gwrsids = list()
    gwpvals = list()
    gwnsnps = dict()
    gwbppos = dict()
    gwqvals = list()
    gwmafs  = list()
    
    for chrm in chrmlist:
        filepath = filefmt.format(chrm)
        rsids, pvals, bplist, qlist, maflist = read_tejaas(filepath)
        gwrsids += rsids
        gwpvals += pvals
        gwqvals += qlist
        gwmafs  += maflist
        gwnsnps[chrm] = len(rsids)
        gwbppos[chrm] = np.array(bplist)
        
    gwpvals = np.array(gwpvals)
    pmax  = np.max(gwpvals[np.where(gwpvals > 0)])
    nonzero_pvals = gwpvals.copy()
    nonzero_pvals[np.where(gwpvals == 0)] = pmax
    
    return gwrsids, gwnsnps, gwbppos, nonzero_pvals


In [5]:
from utils import utils
tissue_file = "/usr/users/fsimone/trans-eqtl-pipeline/analysis/plots/tissue_table.txt"
tshorts, tdescs, tstrings = utils.read_tissues_str(tissue_file)

In [40]:
resdir  = "/cbscratch/franco/trans-eqtl/dev-pipeline/gtex_v8_lncRNA_freeze/"
tissuelist = ["gtex_v8-"+t for t in tshorts]

tejaas_preproc = ["raw"]
tejaas_method_variant  = ["permnull_sb0.1_knn30"]

rrfile = "rr.txt"
chrmlist = np.arange(1,23)

compilation = pd.DataFrame([])
for tissue in tissuelist:
    print(f"{tissue} .. ", end="")
    st = time.time()
    filefmt = f'{resdir}/{tejaas_preproc[0]}/{tissue}/tejaas/{tejaas_method_variant[0]}/chr' + "{:d}" + f'/{rrfile}'
    gwrsids, gwnsnps, gwbppos, gwpvals = read_tejaas_all_chrom(filefmt, chrmlist)
    gwlog10pvals = -np.log10(gwpvals)
    df = pd.DataFrame(gwlog10pvals, index=gwrsids, columns=[tissue])
    if compilation.shape[0] == 0:
        compilation = df
    else:
        compilation = pd.merge(compilation, df, how='outer', left_index=True, right_index=True)
    endt = time.time()
    print(f"took {endt - st}")
compilation.index.name = "tissues"    


gtex_v8-as .. took 8.610471487045288
gtex_v8-av .. took 19.227826356887817
gtex_v8-ag .. took 32.07252049446106
gtex_v8-aa .. took 32.85236406326294
gtex_v8-ac .. took 29.209378242492676
gtex_v8-at .. took 28.964735746383667
gtex_v8-bl .. took 22.996384859085083
gtex_v8-bam .. took 29.1875159740448
gtex_v8-ban .. took 26.892094612121582
gtex_v8-bca .. took 26.00601887702942
gtex_v8-bceh .. took 26.045687913894653
gtex_v8-bce .. took 25.931861877441406
gtex_v8-bco .. took 25.806033611297607
gtex_v8-bfr .. took 26.66654658317566
gtex_v8-bhi .. took 24.95831823348999
gtex_v8-bhy .. took 26.02777123451233
gtex_v8-bnu .. took 27.509639024734497
gtex_v8-bpu .. took 26.556803226470947
gtex_v8-bsp .. took 28.262579441070557
gtex_v8-bsu .. took 28.234033823013306
gtex_v8-br .. took 28.405027866363525
gtex_v8-ebv .. took 28.48702383041382
gtex_v8-fib .. took 27.016149520874023
gtex_v8-cols .. took 31.52826952934265
gtex_v8-colt .. took 26.844249963760376
gtex_v8-esog .. took 26.15397000312805
gt

In [46]:
compilation.to_csv("/cbscratch/franco/trans-eqtl/dev-pipeline/gtex_v8_lncRNA_freeze/raw/all_variants_pvalues_tejaas.txt", sep="\t", header=True, index=True)

# Compile new SHAPEIT2 results

In [11]:
resdir  = "/cbscratch/franco/from_saikat/gtex_v8_202003/"
tissuelist = tshorts

tejaas_preproc = ["raw_std"]
tejaas_method_variant  = ["permnull_sb0.1_knn30"]

rrfile = "rr.txt"
chrmlist = np.arange(1,23)

compilation = pd.DataFrame([])
for tissue in tissuelist:
    if not os.path.exists(f'{resdir}/{tissue}'):
        print(f"{tissue} folder does not exist")
        continue
    print(f"{tissue} .. ", end="")
    st = time.time()
    filefmt = f'{resdir}/{tissue}/tejaas/{tejaas_preproc[0]}/{tejaas_method_variant[0]}/chr' + "{:d}" + f'/{rrfile}'
    gwrsids, gwnsnps, gwbppos, gwpvals = read_tejaas_all_chrom(filefmt, chrmlist)
    gwlog10pvals = -np.log10(gwpvals)
    df = pd.DataFrame(gwlog10pvals, index=gwrsids, columns=[tissue])
    if compilation.shape[0] == 0:
        compilation = df
    else:
        compilation = pd.merge(compilation, df, how='outer', left_index=True, right_index=True)
    endt = time.time()
    print(f"took {endt - st}")
compilation.index.name = "tissues"

as .. took 25.31270694732666
av .. took 52.735108852386475
ag .. took 47.4991934299469
aa .. took 46.318761587142944
ac .. took 46.30136847496033
at .. took 46.92723536491394
bl folder does not exist
bam .. took 42.96416473388672
ban .. took 51.66261100769043
bca .. took 48.55023670196533
bceh .. took 46.57441163063049
bce .. took 45.93611764907837
bco .. took 46.010725259780884
bfr .. took 46.59201431274414
bhi .. took 47.28931021690369
bhy .. took 46.750786542892456
bnu .. took 44.6730682849884
bpu .. took 46.37399935722351
bsp .. took 46.6786732673645
bsu .. took 47.55704474449158
br .. took 48.69854211807251
ebv .. took 50.978925704956055
fib .. took 53.724297761917114
cols .. took 68.87605142593384
colt .. took 50.18138670921326
esog .. took 48.45770192146301
esom .. took 51.92563533782959
esomu .. took 48.274253606796265
haa .. took 48.03645634651184
hlv .. took 47.32708263397217
kc .. took 46.9708936214447
liv .. took 46.978625774383545
lu .. took 46.97243785858154
msg .. took 5

In [12]:
compilation.to_csv(f"{resdir}/all_variants_pvalues_tejaas.txt", sep="\t", header=True, index=True)