In [127]:
import numpy as np
import scipy.stats as st
import scipy.special

import bebi103

import pandas as pd
import bokeh.io
from bokeh.charts import Line, output_file, show
import bokeh.plotting
bokeh.io.output_notebook()
import pystan

We estimate the prior for the parameter θ, the probability of reversal for each strain. We choose the beta distribution to model this as it is useful for modeling rare events and probabilities of probabilities. We will varry the a and b values in the beta distribution based on our estimates of how likely it is for a strain to reverse. 

We estimate that the AVA strain is twice as likely to reverse as the ASH strain as it is sensitive to the input of ASH and PLM while ASH is only sensitve to chemosensory stimuli such as toxins.

We estimate that wild type will reverse very rarely as it is not influenced by light since it has no means of detecting light.

In [134]:
sigma = np.linspace(0, 1, 200)
p = bokeh.plotting.figure(width=300, height=200, 
                          x_axis_label='θ', 
                          y_axis_label='g(θ)')
p.line(sigma, st.beta.pdf(sigma, 1, 8, loc=0, scale=1), line_width=2)
bokeh.io.show(p)

The ASH strain will still reverse with probability < 0.5

In [3]:
sigma = np.linspace(0, 1, 200)
p = bokeh.plotting.figure(width=300, height=200, 
                          x_axis_label='θ', 
                          y_axis_label='g(θ)')
p.line(sigma, st.beta.pdf(sigma, 1.5, 7, loc=0, scale=1), line_width=2)
bokeh.io.show(p)

The AVA strain is twice as likely to reverse so we predict the distribution will be shifted to the right and more widely peaked.

In [4]:
sigma = np.linspace(0, 1, 200)
p = bokeh.plotting.figure(width=300, height=200, 
                          x_axis_label='θ', 
                          y_axis_label='g(θ)')
p.line(sigma, st.beta.pdf(sigma, 3, 7, loc=0, scale=1), line_width=2)
bokeh.io.show(p)

Now we plot the posterior probability density function for each of the three strains. We put our priors for a and b and the data points we have for n number of reversals in N trials into arrays to make performing calculations for all three strains easier.

In [142]:
a_array = [1, 1.5, 3]
b_array = [8, 7, 7]
n_array = [13, 39, 91]
N_array = [126, 124, 124]

Now we can do our prior predictive check.

In [163]:
n_ppc_samples = 1000

for i in range(3):
    # Draw parameters out of the prior
    theta = np.random.beta(a_array[i], b_array[i], size=1)

    # Draw data sets out of the likelihood for each set of prior params
    n = [np.random.binomial(N_array[i], t, size=1000)/124 for t in theta]
    
    if i == 0:
        prior = bebi103.viz.ecdf(n[0], 
                             x_axis_label='probability of reversal',
                             alpha=0.01, 
                             line_alpha=0)
    else:
        prior = bebi103.viz.ecdf(n[0], 
                             x_axis_label='probability of reversal',
                             p= prior,
                             alpha=0.01, 
                             line_alpha=0)
bokeh.io.show(prior)

**B)**

In [153]:
theta_array = np.linspace(0.01, 0.99, num=200)
output = []
for i in range(3):
    pp = []

    for t in theta_array:
        # Prior on theta
        log_prior = st.beta.logpdf(t, a_array[i], b_array[i])

        # The likelihood is just the binomial distribution
        like = np.sum(st.binom.logpmf(n_array[i], N_array[i], t))

        pp.append(-1 * (log_prior + like))
        
    output.append(pp)

In [152]:
p = bokeh.plotting.figure(width=300, height=200, 
                          x_axis_label='x', 
                          y_axis_label='y')

p.line(theta_array, output[0],line_width=2)
p.line(theta_array, output[1],line_width=2)
p.line(theta_array, output[2],line_width=2)

bokeh.io.show(p)
