In [1]:
import pandas as pd
import numpy as np
from limix_genetics import qqplot
import bokeh
from bokeh.io import output_notebook
from bokeh.io import output_file
from bokeh.charts import Histogram, show
from bokeh.layouts import gridplot
from horta_exp.introns.fetch_data import get_intron_events
FILE = 'chrom_null_chrom22.pkl'
output_notebook()

In [2]:
introns = get_intron_events()
df = pd.read_pickle(FILE)
df.set_index(['gene', 'intron'], inplace=True)
df.sort_index(inplace=True)

In [3]:
ii = introns.index.intersection(df.index)
introns = introns.loc[ii,:].sort_index()

In [4]:
DF = pd.DataFrame(columns=['label', 'marker', 'p-value']).set_index(['label', 'marker'])
DF['p-value'] = DF['p-value'].astype(float)

def _create_df(label, pvals):
    df_ = pd.DataFrame(columns=['label', 'marker', 'p-value'])
    df_['p-value'] = df_['p-value'].astype(float)
    df_['p-value'] = pvals
    df_['label'] = label
    df_['marker'] = np.arange(len(pvals))
    df_.set_index(['label', 'marker'], inplace=True)
    return df_

df_ = df.reset_index()
del df_['gene']
del df_['intron']
del df_['snp_id']
del df_['pos']
del df_['chrom']

DFn = DF.copy()
labels = ['lmm', 'lmm-rank', 'qep']
for label in labels:
    DFn = DFn.append(_create_df(label, df_['%s-pval' % label].values))

In [5]:
group = introns.groupby(level=[0, 1]).agg({
    'ntri': {
        'max': 'max',
        'min': 'min',
        'median': 'median',
        'var': 'var',
    }
})
group.sort_index(inplace=True)

In [7]:
def plot_according(introns, group, expression):
    print("Number of gene-intron pairs: %d" % len(group))
    DF = pd.DataFrame(columns=['label', 'marker', 'p-value']).set_index(['label', 'marker'])
    DF['p-value'] = DF['p-value'].astype(float)

    df_ = df.loc[expression,:].reset_index()
    del df_['gene']
    del df_['intron']
    del df_['snp_id']
    del df_['pos']
    del df_['chrom']

    DFn = DF.copy()
    labels = ['lmm', 'lmm-rank', 'qep']
    for label in labels:
        DFn = DFn.append(_create_df(label, df_['%s-pval' % label].values))

    qqplot(DFn, colors={'lmm-rank':'green', 'qep':'#E24A33', 'lmm':'#348ABD'},
           atleast_points=0.01, tools=['save'])
    
    introns = introns.loc[expression,:]
    
    def choice(vals):
        return np.random.RandomState(0).choice(vals, min(5000, len(vals)))

    ntris = choice(introns['ntri'].values)
    h0 = Histogram(ntris, title="Number of trials - across samples",
                   tools=['save'], xlabel='number of trials',
                   width=400, height=400)
    
    values = choice(group.loc[expression,:].values)
    h1 = Histogram(values, title="Number of trials - across gene-intron pairs",
                   tools=['save'], xlabel='number of trials', width=400, height=400)
    
    show(gridplot([[h0, h1]]))

In [8]:
plot_according(introns, group['ntri']['median'],
               group['ntri']['median'] > group['ntri']['median'].quantile(0.0))

In [9]:
plot_according(introns, group['ntri']['median'],
               group['ntri']['median'] > group['ntri']['median'].quantile(0.25))

In [10]:
plot_according(introns, group['ntri']['median'],
               group['ntri']['median'] > group['ntri']['median'].quantile(0.5))

In [11]:
plot_according(introns, group['ntri']['median'],
               group['ntri']['median'] > group['ntri']['median'].quantile(0.75))

In [12]:
plot_according(introns, group['ntri']['median'],
               group['ntri']['median'] > group['ntri']['median'].quantile(0.9))