In [1]:
import pandas as pd
import numpy as np


chrom = 22
df = pd.read_pickle('chrom_%d.pkl' % chrom)
df.set_index(['gene', 'intron'], inplace=True)
df.sort_index(inplace=True)
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,chrom,lmm-pval,lmm-rank-pval,pos,qep-pval,snp_id
gene,intron,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ENSG00000015475,7,22,0.190543,0.08322,18207220,0.235661,snp_22_18207220
ENSG00000015475,7,22,0.990685,0.813105,18207251,0.639112,snp_22_18207251
ENSG00000015475,7,22,0.36729,0.777673,18207730,0.86764,snp_22_18207730
ENSG00000015475,7,22,0.524754,0.607386,18207780,0.418764,indel:1I_22_18207780
ENSG00000015475,7,22,0.836786,0.175274,18207927,0.75866,snp_22_18207927


# Bonferroni correction

In [2]:
ntests = len(df)
print("Number of tests: %d" % ntests)
df['lmm-pval'] = np.clip(df['lmm-pval'] * ntests, 0, 1)
df['lmm-rank-pval'] = np.clip(df['lmm-rank-pval'] * ntests, 0, 1)
df['qep-pval'] = np.clip(df['qep-pval'] * ntests, 0, 1)

Number of tests: 32981


In [16]:
from bokeh.io import push_notebook, output_notebook
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.plotting import figure, show, output_file
from bokeh.charts import BoxPlot, Bar
output_notebook()

In [73]:
data = pd.DataFrame(columns=('pval', 'method', 'color'))

def append_df(data, df, name):
    N = len(df['%s-pval' % name])
    d = [np.asarray(df['%s-pval' % name]), [name] * N]
    data = pd.concat([data, pd.DataFrame({'pval': d[0], 'method': d[1], 'color': ['white'] * N})])
    return data

data = append_df(data, df, 'lmm')
data = append_df(data, df, 'lmm-rank')
data = append_df(data, df, 'qep')

data.loc[data['method'] == 'lmm', 'color'] = 'blue'
data.loc[data['method'] == 'lmm-rank', 'color'] = 'pink'
data.loc[data['method'] == 'qep', 'color'] = 'red'


grouped = data.groupby(data['method']).agg({'method': lambda x: x[0], 'pval': lambda x: sum(x<1e-3)})

grouped.loc['lmm', 'color'] = 'blue'
grouped.loc['lmm-rank', 'color'] = 'pink'
grouped.loc['qep', 'color'] = 'red'

p = Bar(grouped, values='pval', color='color', label='method', legend=False,
       title = "Chromossome %d" % (chrom, ), tools=['save,reset'], width=900)
p.yaxis.axis_label = 'Number of hits'
 
show(p)
