In [1]:
import numpy as np
import pandas as pd

In [163]:
from bokeh.io import push_notebook, output_notebook
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.plotting import show, output_file
import bokeh
output_notebook()

import numpy as np
import colour
from scipy.special import betaincinv
from limix_plot import cycler_ as cycler
from collections import OrderedDict
from numpy import asarray as asa

def expected(n):
    lnpv = np.linspace(1/(n+1), n/(n+1), n, endpoint=True)
    return np.flipud(-np.log10(lnpv))

from scipy.special import betaincinv

def rank_confidence_band(nranks):
    alpha = 0.01
    n = nranks

    k0 = np.arange(1, n+1)
    k1 = np.flipud(k0).copy()

    top = betaincinv(k0, k1, 1-alpha)
    mean = k0 / (n + 1)
    bottom = betaincinv(k0, k1, alpha)

    return (bottom, mean, top)

def _qqplot(p, method, color, df0, thr=1e-1, fill_alpha=0.2):
    
    pv = df0['%s-pval' % method].values[:]
    lpv = -np.log10(pv)
    lpv_sort = np.argsort(lpv)
    expected_lpv = expected(len(lpv))

    ok = pv[lpv_sort] <= thr
    
    gene = asa([i[0] for i in df0.index.values[lpv_sort]])
    intron = asa([i[1] for i in df0.index.values[lpv_sort]])
    
    source = ColumnDataSource(data=dict(
        xname=expected_lpv[ok],
        yname=lpv[lpv_sort][ok],
        gene=gene[ok],
        intron=intron[ok],
        snp_id=df0['snp_id'][lpv_sort][ok],
        pval=pv[lpv_sort][ok],
        pos=df0['pos'][lpv_sort][ok]
    ))
    
    p.circle('xname', 'yname', source=source, color=color,
             fill_alpha=fill_alpha, line_width=0, line_color=None,
            legend=method)
    
    mean = rank_confidence_band(len(lpv))
    me = [-np.log10(m) for m in mean]
    p.line([me[0], me[-1]], [me[0], me[-1]], color='black')
    p.legend.location = 'top_left'
    return p

def qqplot(df, figure=None, colors=None, threshold=1.0):
    import bokeh
    import bokeh.palettes
    
    if figure is None:
        figure = bokeh.plotting.figure(tools=['hover,box_zoom,save,pan,reset'], width=900)
    
    labels = list(df.index.get_level_values(0).unique())
    if colors is None:
        colors = dict()
        colors_iter = iter(bokeh.palettes.brewer['Spectral'][11])
        for label in labels:
            colors[label] = next(colors_iter)
    
    for label in labels:
        df0 = df.loc[(label,),:]
        
        pv = df0['p-value'].values
        lpv = -np.log10(pv)
        lpv_sort = np.argsort(lpv)
        expected_lpv = expected(len(lpv))

        ok = pv[lpv_sort] <= threshold

        source = ColumnDataSource(data=dict(
            xname=expected_lpv[ok],
            yname=lpv[lpv_sort][ok],
            pval=pv[lpv_sort][ok]
        ))

        fill_alpha = 0.8
        figure.circle('xname', 'yname', source=source, color=colors[label],
                 fill_alpha=fill_alpha, line_width=0, line_color=None,
                 legend=label)

    (bo, mean, to) = rank_confidence_band(len(lpv))
    me = [-np.log10(m) for m in mean]
    figure.line([me[0], me[-1]], [me[0], me[-1]], color='black')
    figure.legend.location = 'top_left'
    
    bo = np.flipud(-np.log10(bo))
    me = np.flipud(-np.log10(mean))
    to = np.flipud(-np.log10(to))

    band_x = np.append(me, me[::-1])
    band_y = np.append(bo, to[::-1])
    figure.patch(band_x, band_y, line_color='black', fill_color='black', fill_alpha=0.15, line_alpha=0.5)

    return figure

show(qqplot(df))

In [10]:
pvals0 = np.random.rand(100)
pvals1 = np.random.rand(100)

In [30]:
series0 = pd.Series(pvals0, name='qep')
series1 = pd.Series(pvals1, name='lmm')
df = pd.concat([series0, series1], axis=1)
df.head()

Unnamed: 0,qep,lmm
0,0.563736,0.682113
1,0.596962,0.84234
2,0.00514,0.01527
3,0.947496,0.178559
4,0.803048,0.779216


In [47]:
show(qqplot(df))

In [116]:
marker = np.arange(100)

df = pd.DataFrame(columns=['label', 'marker', 'p-value']).set_index(['label', 'marker'])
df['p-value'] = df['p-value'].astype(float)
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,p-value
label,marker,Unnamed: 2_level_1


In [117]:
for (i, m) in enumerate(marker):
    df.loc[('qep', m), 'p-value'] = pvals0[i]

for (i, m) in enumerate(marker):
    df.loc[('lmm', m), 'p-value'] = pvals1[i]

In [118]:
df.sort_index(inplace=True)
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,p-value
label,marker,Unnamed: 2_level_1
qep,0,0.563736
qep,1,0.596962
qep,2,0.00514
qep,3,0.947496
qep,4,0.803048


TypeError: 'NoneType' object does not support item assignment