In [1]:
import pandas as pd
import numpy as np


chrom = 22
df = pd.read_pickle('chrom_%d_perm.pkl' % chrom)
df.set_index(['gene', 'intron'], inplace=True)
df.sort_index(inplace=True)
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,chrom,lmm-pval,pos,qep-pval,snp_id
gene,intron,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ENSG00000015475,6,22,0.48816,18183494,0.442632,snp_22_18183494
ENSG00000015475,6,22,0.512587,18183623,0.703575,snp_22_18183623
ENSG00000015475,6,22,0.047945,18183724,0.059372,snp_22_18183724
ENSG00000015475,6,22,0.513477,18184011,0.493602,snp_22_18184011
ENSG00000015475,6,22,0.379372,18184169,0.429686,snp_22_18184169


# Bonferroni correction

In [2]:
ntests = len(df)
print("Number of tests: %d" % ntests)
df['lmm-pval'] = np.clip(df['lmm-pval'] * ntests, 0, 1)
df['qep-pval'] = np.clip(df['qep-pval'] * ntests, 0, 1)

Number of tests: 815240


In [3]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,chrom,lmm-pval,pos,qep-pval,snp_id
gene,intron,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ENSG00000015475,6,22,1.0,18183494,1.0,snp_22_18183494
ENSG00000015475,6,22,1.0,18183623,1.0,snp_22_18183623
ENSG00000015475,6,22,1.0,18183724,1.0,snp_22_18183724
ENSG00000015475,6,22,1.0,18184011,1.0,snp_22_18184011
ENSG00000015475,6,22,1.0,18184169,1.0,snp_22_18184169


In [4]:
from bokeh.io import push_notebook, output_notebook
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.plotting import figure, show, output_file
output_notebook()


def manhattan(method, color, df0):
    df0 = df0[df0['%s-pval' % method] < 1e-1]

    p = figure(title = "%s :: chromossome %d" % (method.upper(), chrom),
               tools=['hover,zoom_in,zoom_out,box_zoom,save,pan,reset'], width=900)
    source = ColumnDataSource(data=dict(
        xname=df0['pos'],
        yname=-np.log10(df0['%s-pval' % method]),
        gene=[i[0] for i in df0.index.values],
        intron=[i[1] for i in df0.index.values],
        snp_id=df0['snp_id'],
        pval=df0['%s-pval' % method.lower()],
        pos=df0['pos']
    ))
    p.circle('xname', 'yname', source=source, color=color,
             fill_alpha=0.2, line_width=0, line_color=None)
    p.select_one(HoverTool).tooltips = [
        ('gene', '@gene'),
        ('intron', '@intron'),
        ('snp_id', '@snp_id'),
        ('pos', '@pos'),
        ('p-value', '@pval'),
    ]
    show(p)

In [5]:
manhattan('qep', 'red', df)