In [1]:
import pandas as pd
import numpy as np


df = pd.read_pickle('chrom_null.pkl')
df.set_index(['gene', 'intron'], inplace=True)
df.sort_index(inplace=True)
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,chrom,lmm-pval,lmm-rank-pval,pos,qep-pval,snp_id
gene,intron,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ENSG00000000419,5,20,0.477324,0.406138,42131442,0.386107,snp_19_42131442
ENSG00000000419,5,20,0.039244,0.014254,42131753,0.009565,indel:1I_19_42131753
ENSG00000000419,5,20,0.573356,0.590113,42132193,0.654554,snp_19_42132193
ENSG00000000419,5,20,0.825621,0.94535,42132273,0.963257,snp_19_42132273
ENSG00000000419,5,20,0.91033,0.857315,42132314,0.891363,snp_19_42132314


In [2]:
from bokeh.io import push_notebook, output_notebook
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.plotting import figure, show, output_file
output_notebook()

import numpy as np
import colour
from scipy.special import betaincinv
from limix_plot import cycler_ as cycler
from collections import OrderedDict
from numpy import asarray as asa

def expected(n):
    lnpv = np.linspace(1/(n+1), n/(n+1), n, endpoint=True)
    return np.flipud(-np.log10(lnpv))

def xy(pv):
    
    return 

def rank_confidence_band(nranks):
    alpha = 0.01
    n = nranks
    k0 = np.arange(1, n+1)
    k1 = np.flipud(k0).copy()
    mean = k0 / (n + 1)
    return mean

def qqplot(p, method, color, df0, thr=1e-1, fill_alpha=0.2):
    
    pv = df0['%s-pval' % method].values[:]
    lpv = -np.log10(pv)
    lpv_sort = np.argsort(lpv)
    expected_lpv = expected(len(lpv))

    ok = pv[lpv_sort] <= thr
    
    gene = asa([i[0] for i in df0.index.values[lpv_sort]])
    intron = asa([i[1] for i in df0.index.values[lpv_sort]])
    
    source = ColumnDataSource(data=dict(
        xname=expected_lpv[ok],
        yname=lpv[lpv_sort][ok],
        gene=gene[ok],
        intron=intron[ok],
        snp_id=df0['snp_id'][lpv_sort][ok],
        pval=pv[lpv_sort][ok],
        pos=df0['pos'][lpv_sort][ok]
    ))
    
    p.circle('xname', 'yname', source=source, color=color,
             fill_alpha=fill_alpha, line_width=0, line_color=None,
            legend=method)
    
    mean = rank_confidence_band(len(lpv))
    me = [-np.log10(m) for m in mean]
    p.line([me[0], me[-1]], [me[0], me[-1]], color='black')
    p.legend.location = 'top_left'
    return p

In [3]:
p = figure(title = "All chromossomes",
           tools=['hover,zoom_in,zoom_out,box_zoom,save,pan,reset'], width=900)

qqplot(p, 'qep', 'red', df, thr=1e-7)
qqplot(p, 'lmm', 'blue', df, thr=1e-7)
qqplot(p, 'lmm-rank', 'green', df, thr=1e-7)

p.select_one(HoverTool).tooltips = [
    ('gene', '@gene'),
    ('intron', '@intron'),
    ('snp_id', '@snp_id'),
    ('pos', '@pos'),
    ('p-value', '@pval'),
]

show(p)