In [1]:
import pandas as pd
import numpy as np


chrom = 22
df = pd.read_pickle('chrom_%d.pkl' % chrom)
df.set_index(['gene', 'intron'], inplace=True)
df.sort_index(inplace=True)
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,chrom,lmm-pval,lmm-rank-pval,pos,qep-pval,snp_id
gene,intron,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ENSG00000015475,7,22,0.190543,0.08322,18207220,0.235661,snp_22_18207220
ENSG00000015475,7,22,0.990685,0.813105,18207251,0.639112,snp_22_18207251
ENSG00000015475,7,22,0.36729,0.777673,18207730,0.86764,snp_22_18207730
ENSG00000015475,7,22,0.524754,0.607386,18207780,0.418764,indel:1I_22_18207780
ENSG00000015475,7,22,0.836786,0.175274,18207927,0.75866,snp_22_18207927


# Bonferroni correction

In [2]:
ntests = len(df)
print("Number of tests: %d" % ntests)
df['lmm-pval'] = np.clip(df['lmm-pval'] * ntests, 0, 1)
df['qep-pval'] = np.clip(df['qep-pval'] * ntests, 0, 1)

Number of tests: 32981


In [3]:
from bokeh.io import push_notebook, output_notebook
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.plotting import figure, show, output_file
output_notebook()


def manhattan(method, color, df0):
    df0 = df0[df0['%s-pval' % method] < 1e-1]

    p = figure(title = "%s :: chromossome %d" % (method.upper(), chrom),
               tools=['hover,zoom_in,zoom_out,box_zoom,save,pan,reset'], width=900)
    source = ColumnDataSource(data=dict(
        xname=df0['pos'],
        yname=-np.log10(df0['%s-pval' % method]),
        gene=[i[0] for i in df0.index.values],
        intron=[i[1] for i in df0.index.values],
        snp_id=df0['snp_id'],
        pval=df0['%s-pval' % method.lower()],
        pos=df0['pos']
    ))
    p.circle('xname', 'yname', source=source, color=color,
             fill_alpha=0.2, line_width=0, line_color=None)
    p.select_one(HoverTool).tooltips = [
        ('gene', '@gene'),
        ('intron', '@intron'),
        ('snp_id', '@snp_id'),
        ('pos', '@pos'),
        ('p-value', '@pval'),
    ]
    show(p)

In [4]:
manhattan('lmm', 'blue', df)

In [5]:
manhattan('lmm-rank', 'green', df)

In [6]:
manhattan('qep', 'red', df)