In [99]:
from tqdm import tqdm
import limix
import pickle as pkl
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from numpy import asarray
import xarray as xr
from copy import deepcopy
import os
from os.path import join
import simplejson as json
import hashlib
import pandas as pd
import pdb
from glob import glob
import h5py
from numpy import log10
import re
from plot import plot_poisson_ordered, plot_distplots, plot_distplot
from util import get_amelie_selection
import qgrid
import h5py
import dask
import dask.dataframe
import dask.array
from urllib.request import urlopen
try:
    import ipfsapi
    has_ipfs = True
except ModuleNotFoundError:
    has_ipfs = False


if has_ipfs:
    try:
        ipfs = ipfsapi.connect('127.0.0.1', 5001)
    except Exception:
        has_ipfs = False

only_hash = True

In [100]:
if os.path.exists("data/traits-kinship-hannah-amelie.pkl"):
    data = pkl.load(open("data/traits-kinship-hannah-amelie.pkl", "rb"))
else:
    url = "http://ipfs.io/ipfs/QmXtFuoA3JTkhhpUpqh4dyu3FJLUG7DWAmXwUk1N2TMbAh"
    data = pkl.load(urlopen(url))
    
dst_folder = "3"
if not os.path.exists(dst_folder):
    os.mkdir(dst_folder)
if not os.path.exists(join(dst_folder, 'phenotype')):
    os.mkdir(join(dst_folder, 'phenotype'))
if not os.path.exists(join(dst_folder, 'kinship')):
    os.mkdir(join(dst_folder, 'kinship'))

In [101]:
pos = dict()
for chrom in range(1, 22):
    pos[chrom] = limix.io.hdf5.fetch("/Users/horta/arrayexpress/HS.hdf5",
                                     "/imputed_genotypes/chr{}/col_header/pos".format(chrom))

In [102]:
patts = ["scan_ipheno_normal_(.*)_mean_standardize_chr(.*).json",
         "scan_ipheno_normal_(.*)_quantile_gaussianize_chr(.*).json",
         "scan_measures_poisson_(.*)_chr(.*).json",
         "scan_measures_normal_(.*)_mean_standardize_chr(.*).json",
         "scan_measures_normal_(.*)_quantile_gaussianize_chr(.*).json"]

data = {patt:{} for patt in patts}
for path in glob(join(dst_folder, "*.json")):
    filename = path.split("/")[1]
    for patt in patts:
        match = re.match(patt, filename)
        if match:
            trait_name = match.groups(0)[0]
            chrom = match.groups(0)[1]
            data[patt][trait_name + '_chr' + chrom] = json.load(open(path, "r"))
            break

patt2name = {'scan_ipheno_normal_(.*)_mean_standardize_chr(.*).json':'INormalStd',
             'scan_ipheno_normal_(.*)_quantile_gaussianize_chr(.*).json':'INormalGau',
             'scan_measures_poisson_(.*)_chr(.*).json':'MPoisson',
             'scan_measures_normal_(.*)_mean_standardize_chr(.*).json':'MNormalStd',
             'scan_measures_normal_(.*)_quantile_gaussianize_chr(.*).json':'MNormalGau'}

rows = []
for patt in patts:
    for name, d0 in data[patt].items():
        trait_name = name.split('_chr')[0]
        chrom = int(name.split('_chr')[1])
        print(trait_name, chrom)
        
#         y = pd.read_pickle(open(join(dst_folder, 'phenotype', d0['phenotype'] + ".series.pkl"), "rb"))

        for i in range(len(d0['pv'])):
            rows.append([trait_name, chrom, pos[chrom][i], patt2name[patt], d0['pv'][i]])
rows = sorted(rows, key=lambda x: x[0] + "_{:02d}".format(chrom))
df = pd.DataFrame(data=rows, columns=["trait", "chrom", "pos", "model", "pv"])
# df.loc[df['pv'] == 'exception', 'pv'] = np.nan
# df = df.dropna()

Distance15_20 19
PLT 19
InterTrialCross1_40 19
Crossings 21
TimeOpenSection 20
time_freezing 19
died_of_EAE 20
IPGTT_test_worked 21
BW_week17 20
Rearing10_15 21
IL_nb 19
PosturesClosedToOpen 19
IL_score 19
Avoidances1_20 20
Boli 20
blood_pressure_time 20
AA_IL_nb 19
InterTrialCross31_40 20
Rearing20_25 20
InterTrialCross11_20 21
Distance5_10 21
nb_trials_freezing 19
is.albino 20
is.albino 21
Distance5_10 20
InterTrialCross11_20 20
Rearing20_25 21
InterTrialCross31_40 21
blood_pressure_time 21
Avoidances1_20 21
Boli 21
InterTrialCross21_30 19
Rearing10_15 20
BW_week17 21
IPGTT_test_worked 20
died_of_EAE 21
ALP 19
TimeOpenSection 21
Crossings 20
HeadDips 19
Distance5 19
PltClumps 19
InterTrialCross1_10 19
EAE_score_at_sacrifice 20
InterTrialCross1_20 19
EntriesOpenSection 21
Avoidances1_5 21
Rearing5_10 21
AA_IL_score 19
Rearing5 20
Has1kidney 20
Avoidances6_10 20
Avoidances31_40 20
Avoidances1_10 21
Rearing0_30 20
Distance10_15 20
Sodium 20
Avoidances11_20 21
AA_nb 19
LineCrossings 19
A

In [103]:
df.head()

Unnamed: 0,trait,chrom,pos,model,pv
0,AA_IL_nb,19,32111.0,MPoisson,0.567695
1,AA_IL_nb,19,38109.0,MPoisson,0.354547
2,AA_IL_nb,19,38565.0,MPoisson,0.354547
3,AA_IL_nb,19,38599.0,MPoisson,0.567695
4,AA_IL_nb,19,46625.0,MPoisson,0.354547


In [104]:
amelie_selection = get_amelie_selection()

In [105]:
for trait, df0 in df.groupby("trait"):
    for model, df1 in df0.groupby("model"):
        ax = limix.plot.manhattan(df1, pv='pv', pos='pos', chr='chrom')
        if trait in amelie_selection:
            ax.set_title(f"{trait} - {model}", color='red')
        else:
            ax.set_title(f"{trait} - {model}")
        plt.axhline(-log10(1e-7), color='red')
        folder = f"3/fig/scan/{model}/{trait}"
        try:
            os.makedirs(folder)
        except FileExistsError:
            pass
        plt.savefig(folder + "/manhattan.png")
        plt.close()