In [127]:
import numpy as np
import scipy.stats as st
import scipy.special

import bebi103

import pandas as pd
import bokeh.io
from bokeh.charts import Line, output_file, show
import bokeh.plotting
bokeh.io.output_notebook()
import pystan

We estimate the prior for the parameter θ, the probability of reversal for each strain. We choose the beta distribution to model this as it is useful for modeling rare events and probabilities of probabilities. We will varry the a and b values in the beta distribution based on our estimates of how likely it is for a strain to reverse. 

We estimate that the AVA strain is twice as likely to reverse as the ASH strain as it is sensitive to the input of ASH and PLM while ASH is only sensitve to chemosensory stimuli such as toxins.

We estimate that wild type will reverse very rarely as it is not influenced by light since it has no means of detecting light.

In [134]:
sigma = np.linspace(0, 1, 200)
p = bokeh.plotting.figure(width=300, height=200, 
                          x_axis_label='θ', 
                          y_axis_label='g(θ)')
p.line(sigma, st.beta.pdf(sigma, 1, 8, loc=0, scale=1), line_width=2)
bokeh.io.show(p)

The ASH strain will still reverse with probability < 0.5

In [229]:
sigma = np.linspace(0, 1, 200)
p = bokeh.plotting.figure(width=300, height=200, 
                          x_axis_label='θ', 
                          y_axis_label='g(θ)')
p.line(sigma, st.beta.pdf(sigma, 2, 7, loc=0, scale=1), line_width=2)
bokeh.io.show(p)

The AVA strain is twice as likely to reverse so we predict the distribution will be shifted to the right and more widely peaked.

In [230]:
sigma = np.linspace(0, 1, 200)
p = bokeh.plotting.figure(width=300, height=200, 
                          x_axis_label='θ', 
                          y_axis_label='g(θ)')
p.line(sigma, st.beta.pdf(sigma, 5, 6, loc=0, scale=1), line_width=2)
bokeh.io.show(p)

Now we plot the posterior probability density function for each of the three strains. We put our priors for a and b and the data points we have for n number of reversals in N trials into arrays to make performing calculations for all three strains easier.

In [231]:
a_array = [1, 2, 5]
b_array = [8, 7, 6]
n_array = [13, 39, 91]
N_array = [126, 124, 124]

Now we can do our prior predictive check.

In [232]:
n_ppc_samples = 1000

for i in range(3):
    # Draw parameters out of the prior
    theta = np.random.beta(a_array[i], b_array[i], size=100)

    # Draw data sets out of the likelihood for each set of prior params
    n = [np.random.binomial(N_array[i], t)/N_array[i] for t in theta]
    
    if i == 0:
        prior = bebi103.viz.ecdf(n, 
                             x_axis_label='probability of reversal',
                             alpha=1, 
                             line_alpha=0)
    else:
        prior = bebi103.viz.ecdf(n, 
                             x_axis_label='probability of reversal',
                             p = prior,
                             alpha=1, 
                             line_alpha=0)
bokeh.io.show(prior)

**B) Plot the posterior probability density function for each of the three strains. What can you conclude from this?**

In [233]:
theta_array = np.linspace(0.01, 0.99, num=200)
output = []

for i in range(3):
    pp = []

    for t in theta_array:
        # Prior on theta
        log_prior = st.beta.logpdf(t, a_array[i], b_array[i])

        # The log_likelihood is just the binomial distribution
        log_like = np.sum(st.binom.logpmf(n_array[i], N_array[i], t))

        pp.append(log_prior + log_like)
        
    output.append(pp)  # need to exp becuase you don't want to plot the log

We can compute the normalization constant (conceptually, the area under the curve of our line). Since we want to plot the probability density, we want the area under the curve to be equal to 1, so we divide our trial values by the normalization constant.

In [234]:
def normalize(data):
    trial = np.exp((data - np.array(data).max()))
    
    # Compute marginalized posterior
    norm_constant = np.trapz(trial, x=theta_array)
    trial_norm = trial_wt / norm_constant
    return trial_norm

In [235]:
trial_wt_norm = normalize(output[0])
trial_ash_norm = normalize(output[1])
trial_ava_norm = normalize(output[2])

In [242]:
p = bokeh.plotting.figure(width=300, height=200, 
                          x_axis_label='probability of reversal (theta)', 
                          y_axis_label='p(theta)')

p.line(theta_array, trial_wt_norm, line_color="#f46d43", line_width=2)
p.line(theta_array, trial_ash_norm, line_color="#249d43", line_width=2)
p.line(theta_array, trial_ava_norm, line_width=2)

bokeh.io.show(p)

In [239]:
output[1] == output[2]

False