In [21]:
import numpy as np
import scipy.stats as st
import scipy.special

import bebi103

import pandas as pd
import bokeh.io
import bokeh.plotting
bokeh.io.output_notebook()
import pystan

We estimate the prior for the parameter θ, the probability of reversal for each strain. We choose the beta distribution to model this as it is useful for modeling rare events and probabilities of probabilities. We will varry the a and b values in the beta distribution based on our estimates of how likely it is for a strain to reverse. 

We estimate that the AVA strain is twice as likely to reverse as the ASH strain as it is sensitive to the input of ASH and PLM while ASH is only sensitve to chemosensory stimuli such as toxins.

We estimate that wild type will reverse very rarely as it is not influenced by light since it has no means of detecting light.

In [2]:
sigma = np.linspace(0, 1, 200)
p = bokeh.plotting.figure(width=300, height=200, 
                          x_axis_label='θ', 
                          y_axis_label='g(θ)')
p.line(sigma, st.beta.pdf(sigma, 1, 10, loc=0, scale=1), line_width=2)
bokeh.io.show(p)

The ASH strain will still reverse with probability < 0.5

In [3]:
sigma = np.linspace(0, 1, 200)
p = bokeh.plotting.figure(width=300, height=200, 
                          x_axis_label='θ', 
                          y_axis_label='g(θ)')
p.line(sigma, st.beta.pdf(sigma, 1.5, 7, loc=0, scale=1), line_width=2)
bokeh.io.show(p)

The AVA strain is twice as likely to reverse so we predict the distribution will be shifted to the right and more widely peaked.

In [4]:
sigma = np.linspace(0, 1, 200)
p = bokeh.plotting.figure(width=300, height=200, 
                          x_axis_label='θ', 
                          y_axis_label='g(θ)')
p.line(sigma, st.beta.pdf(sigma, 3, 7, loc=0, scale=1), line_width=2)
bokeh.io.show(p)

Now we plot the posterior probability density function for each of the three strains.

In [14]:
n_ppc_samples = 1000

# Draw parameters out of the prior
theta = np.random.beta(3, 7, size=n_ppc_samples)

# Draw data sets out of the likelihood for each set of prior params
# ell = np.array([the * n for the, n in zip(theta, N)])
bi = np.array([np.random.binomial(124, t, size=1000) for t in theta])

In [9]:
p = bebi103.viz.ecdf(bi[0], 
                     x_axis_label='probability of reversal', 
                     alpha=0.01, 
                     line_alpha=0)
for ell_vals in bi[9::10]:
    p = bebi103.viz.ecdf(ell_vals, alpha=0.02, p=p, line_alpha=0)

bokeh.io.show(p)

In [17]:
data = np.hstack((np.expand_dims(theta, 1), bi))
columns = ['phi'] + [f'[{i+1}]' for i in range(len(bi[0]))]


# Make data frame
df_ppc = pd.DataFrame(data=data, columns=columns)

df_ppc['warmup'] = 0
df_ppc['chain'] = 0
df_ppc['chain_idx'] = np.arange(1, n_ppc_samples+1)

In [16]:
df_ppc.head()

Unnamed: 0,phi,[1],[2],[3],[4],[5],[6],[7],[8],[9],...,[991],[992],[993],[994],[995],[996],[997],[998],[999],[1000]
0,0.139738,14.0,19.0,10.0,21.0,12.0,14.0,20.0,19.0,16.0,...,19.0,14.0,23.0,13.0,17.0,17.0,20.0,15.0,27.0,12.0
1,0.360465,49.0,42.0,41.0,47.0,52.0,51.0,44.0,47.0,43.0,...,45.0,45.0,45.0,50.0,46.0,42.0,40.0,34.0,45.0,42.0
2,0.324865,39.0,38.0,42.0,49.0,36.0,30.0,41.0,36.0,42.0,...,47.0,35.0,36.0,40.0,42.0,37.0,35.0,43.0,43.0,48.0
3,0.17921,24.0,32.0,22.0,17.0,20.0,23.0,15.0,29.0,24.0,...,23.0,18.0,30.0,25.0,18.0,22.0,21.0,26.0,24.0,19.0
4,0.209372,33.0,22.0,29.0,27.0,22.0,26.0,27.0,25.0,32.0,...,19.0,22.0,25.0,31.0,22.0,25.0,36.0,32.0,34.0,15.0


In [22]:
bokeh.io.show(
    bebi103.viz.predictive_ecdf(df_ppc, 
                                'n', 
                                x_axis_label='Number of reversals'))

NameError: name 'stan' is not defined