In [1]:
import pandas as pd
import numpy as np

import numba

import bebi103

import altair as alt
import altair_catplot as altcat

import bokeh.io
import bokeh.plotting
bokeh.io.output_notebook()

color_palette=['#4e79a7', '#f28e2b', '#e15759', '#76b7b2', '#59a14f', '#edc948', '#b07aa1', '#ff9da7', '#9c755f', '#bab0ac']

In [2]:
df = pd.read_csv('../data/hw_4.2_caulobacter_growth_image_processing_results.csv')

df.head()

Unnamed: 0,time (min),area (sq um),growth_event,bacterium
0,1.0,1.300624,0,1
1,2.0,1.314144,0,1
2,3.0,1.295216,0,1
3,4.0,1.314144,0,1
4,5.0,1.341184,0,1


Since the length of division is different for all growth events we should add another column that restarts the time count from 0 every time there's a division.

In [3]:
time = []
j = 0
for i in df['growth_event'].diff():
    if i == 0:
        j += 1
        time.append(j)
    else:
        j = 0
        time.append(j)

df['t'] = time

In [5]:
# Rename for convenience
df = df.rename(columns={'area (sq um)': 'area'})

In [6]:
df.head()

Unnamed: 0,time (min),area,growth_event,bacterium,t
0,1.0,1.300624,0,1,0
1,2.0,1.314144,0,1,1
2,3.0,1.295216,0,1,2
3,4.0,1.314144,0,1,3
4,5.0,1.341184,0,1,4


Let's start with a subset of the data. We will look at bacterium 1.

In [7]:
df_bacterium1 = df.loc[df['bacterium'] == 1]

In [8]:
p = bokeh.plotting.figure(plot_width=650,
                          plot_height=250,
                          x_axis_label='time (min)',
                          y_axis_label='cell area (sq µm)')

# Specify the glyphs
colors = ['#1f78b4', '#a6cee3']
for i, g in df_bacterium1.groupby('growth_event'):
    p.circle(g['time (min)'], g['area'], size=3, color=colors[i%2])

bokeh.io.show(p)

#### No hierarchy

Let's just model a single growth event for now with no hierarchy using the exponential model.

In [9]:
def data_prior_pred(t):
    '''
    Samples parameter values according to the prior and generates
    data y at the values given in t.
    '''
    # Sample parameter values according to priors
    a = np.random.normal(1.2, 0.4)
    k = np.random.normal(0.01, 0.003)
    sigma = np.abs(np.random.normal(0, 0.1))
    
    # Generate random data according to the likelihood
    return np.random.normal(a * np.exp(k * t), sigma)

In [12]:
p = bokeh.plotting.figure(height=300, width=450,
                          x_axis_label='time',
                          y_axis_label='area')

t = df_bacterium1.loc[df_bacterium1['growth_event'] == 0, 't'].values

# Plot simulated data
for i in range(100):
    p.circle(t, data_prior_pred(t), size=3, alpha=0.1)

bokeh.io.show(p)

Now let's try the linear model.

In [13]:
def data_prior_pred_linear(t):
    '''
    Samples parameter values according to the prior and generates
    data y at the values given in t.
    '''
    # Sample parameter values according to priors
    a = np.random.normal(1.2, 0.4)
    b = np.random.normal(0.01, 0.003)
    sigma = np.abs(np.random.normal(0, 0.1))
    
    # Generate random data according to the likelihood
    return np.random.normal(a + b * t, sigma)

In [14]:
p = bokeh.plotting.figure(height=300, width=450,
                          x_axis_label='time',
                          y_axis_label='area')

t = df_bacterium1.loc[df_bacterium1['growth_event'] == 0, 't'].values

# Plot simulated data
for i in range(100):
    p.circle(t, data_prior_pred_linear(t), size=3, alpha=0.1)

bokeh.io.show(p)

#### One level hierarchical model

Let's try a one level hierarchical model for bacterium 1.

In [17]:
# # Compile
# sm_gen = bebi103.stan.StanModel(model_code=model_code_pri_pred)

In [None]:
# Number of iteration
N_iter = 1000

# input data from df
data = dict(N=100,
            J_1=3,
            hyper_a0_mu=1.4,
            hyper_a0_sigma=0.3,
            hyper_k0_mu=0.01,
            hyper_k0_sigma=0.002,
            hyper_a0_tau=0.1,
            hyper_k0_tau=0.001)

# Sample
df_pred = sm_gen.sampling(data=data,
                     algorithm='Fixed_param',
                     warmup=0,
                     chains=1,
                     iter=N_iter)

df_samples_a = bebi103.stan.extract_array(df_pred, name='a')
df_samples_k = bebi103.stan.extract_array(df_pred, name='k')

p_a = bebi103.viz.predictive_ecdf(df_pred, 
                                'a', 
                                x_axis_label='a')
p_k = bebi103.viz.predictive_ecdf(df_pred, 
                                'k', 
                                x_axis_label='k')
# Plot
bokeh.io.show(bokeh.layouts.gridplot([p_a, p_k], ncols=2))

Check for negative values:

In [None]:
len(df_samples_a[df_samples_a['a'] < 0]) / len(df_samples_a)

In [None]:
len(df_samples_k[df_samples_k['k'] < 0]) / len(df_samples_k)

In [None]:
len(df_samples_a)

Prior predictive checks

In [18]:
# a = df_samples_a['a'].values
# k = df_samples_k['k'].values

# p = bokeh.plotting.figure(height=300, width=450,
#                           x_axis_label='time',
#                           y_axis_label='area')

# t = df_bacterium1.loc[df_bacterium1['growth_event'] == 0, 't'].values

# # Plot simulated data
# for i in range(0, len(df_samples_a), 1000):
#     p.circle(t, a[i] * np.exp(k[i] * t), size=3, alpha=0.1)

# # Plot original data
# p.circle(t, df_bacterium1.loc[df_bacterium1['growth_event'] == 0, 'area (sq um)'].values, 
#          color='black', size=4)
# bokeh.io.show(p)

Moving on to modeling.

Noncentered model code:

In [20]:
model_code_linear_noncentered = """
data {
  // Total number of data points
  int N;
  
  // Number of entries in each level of the hierarchy
  int J_1;

  //Index arrays to keep track of hierarchical structure
  int index_1[N];
  
  // The measurements
  real area[N];
  
  // Time
  vector[N] t;
}

parameters {
  // Hyperparameters level 0
  real a;
  real k;
  real<lower=0> sigma;

  // How hyperparameters vary
  real<lower=0> tau_a;
  real<lower=0> tau_k;

  // Hyperparameters level 1
  vector[J_1] a_1_tilde;
  vector[J_1] k_1_tilde;
}

transformed parameters {
  // Transformations for noncentered
  vector[J_1] a_1 = a + tau_a * a_1_tilde;
  vector[J_1] k_1 = k + tau_k * k_1_tilde;
  vector[N] area_temp;
  
  for (i in 1:N) {
    area_temp[i] = a_1[index_1[i]] + k_1[index_1[i]] * t[i];
  }
}

model {
  a ~ normal(1.4, 0.3);
  k ~ normal(0.01, 0.002);
  sigma ~ normal(0, 0.1);
  tau_a ~ normal(0, 0.1);
  tau_k ~ normal(0, 0.001);

  a_1_tilde ~ normal(0, 1);
  k_1_tilde ~ normal(0, 1);

  area ~ normal(area_temp, sigma);
}

generated quantities {
  vector[N] area_ppc;
  real log_lik[N];
  
  for (i in 1:N) {
    area_ppc[i] = normal_rng(area_temp[i], sigma);
  }
  
  // Compute pointwise log likelihood
  for (i in 1:N) {
    log_lik[i] = normal_lpdf(area[i] | area_temp[i], sigma);
  }
}
"""

In [21]:
sm_noncentered = bebi103.stan.StanModel(model_code=model_code_linear_noncentered)

Using cached StanModel.


In [22]:
# Choose a subset of data
df_sub1 = df_bacterium1.loc[df_bacterium1['growth_event'] == 1]
df_sub2 = df_bacterium1.loc[df_bacterium1['growth_event'] == 2]
df_sub = pd.concat([df_sub1, df_sub2])

df_sub.head()

Unnamed: 0,time (min),area,growth_event,bacterium,t
98,99.0,1.403376,1,1,0
99,100.0,1.400672,1,1,1
100,101.0,1.373632,1,1,2
101,102.0,1.40608,1,1,3
102,103.0,1.362816,1,1,4


In [24]:
data = dict(N=len(df_sub),
            J_1=2,
            index_1=df_sub['growth_event'].values,
            area=df_sub['area'].values,
            t=df_sub['t'].values)

In [25]:
# Sample
samples_linear = sm_noncentered.sampling(data=data, 
                                         seed=2389412, 
                                         control=dict(adapt_delta=0.99, max_treedepth=11))

# Convert to data frame for easy use later
df_linear = bebi103.stan.to_dataframe(samples_linear)

bebi103.stan.check_all_diagnostics(samples_linear)

n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 11.
E-BFMI indicated no pathological behavior.


0

In [26]:
bokeh.io.show(bebi103.viz.trace_plot(samples_linear, pars=['a', 'k'], line_width=2))

In [27]:
df_linear_ppc = bebi103.stan.extract_array(samples_linear, name='k_1')

In [28]:
bokeh.io.show(bebi103.viz.corner(samples_linear, pars=['a', 'k', 'sigma']))

In [29]:
def hw92_predictive(df, x, y=None, namex='index_1', name='F_ppc', perc=[80, 60, 40, 20], 
                    x_axis_label=None, y_axis_label=None, title=None, plot_width=350, plot_height=225, 
                    color='blue', data_color=color_palette[1], diff=False, p=None):
    '''Mimic of predictive ECDF
    df - MCMC sampling data frame
    x - input variable
    y - data
    namex - name of the input varible in the data frame
    name - name of the predictive results in the data frame
    perc - list, default [80, 60, 40, 20]
            Percentiles for making colored envelopes for confidence
            intervals for the predictive ECDFs. Maximally four can be 
            specified.'''
    
    if color not in ['green', 'blue', 'red', 'gray',
                     'purple', 'orange', 'betancourt']:
        raise RuntimeError("Only allowed colors are 'green', 'blue', 'red', 'gray', 'purple', 'orange'")
    
    colors = {'blue': ['#9ecae1','#6baed6','#4292c6','#2171b5','#084594'],
              'green': ['#a1d99b','#74c476','#41ab5d','#238b45','#005a32'],
              'red': ['#fc9272','#fb6a4a','#ef3b2c','#cb181d','#99000d'],
              'orange': ['#fdae6b','#fd8d3c','#f16913','#d94801','#8c2d04'],
              'purple': ['#bcbddc','#9e9ac8','#807dba','#6a51a3','#4a1486'],
              'gray': ['#bdbdbd','#969696','#737373','#525252','#252525'],
              'betancourt': ['#DCBCBC', '#C79999', '#B97C7C',
                             '#A25050', '#8F2727', '#7C0000']}
    if p is None:
        p = bokeh.plotting.figure(plot_width=plot_width,
                                  plot_height=plot_height,
                                  x_axis_label=x_axis_label,
                                  y_axis_label=y_axis_label,
                                  title=title)
    
    if diff:
        x = x[1:]
        if y is not None:
            y = np.diff(y)
        Nb = len(x)
        y_ppc = np.empty((len(perc) * 2 + 1, Nb))
        for i in range(Nb):
            temp = df.loc[df[namex]== i+2, name].values - df.loc[df[namex]== i+1, name].values
            y_ppc[-1, i] = np.median(temp)
            for j in range(len(perc)):
                y_ppc[j * 2, i] = np.percentile(temp, 50 - perc[j] / 2)
                y_ppc[j * 2 + 1, i] = np.percentile(temp, 50 + perc[j] / 2)
    else:                
        Nb = len(x)
        y_ppc = np.empty((len(perc) * 2 + 1, Nb))
        for i in range(Nb):
            temp = df.loc[df[namex]== i+1, name].values
            y_ppc[-1, i] = np.median(temp)
            for j in range(len(perc)):
                y_ppc[j * 2, i] = np.percentile(temp, 50 - perc[j] / 2)
                y_ppc[j * 2 + 1, i] = np.percentile(temp, 50 + perc[j] / 2)
    
    for j in range(len(perc)):
        bebi103.viz.fill_between(x, y_ppc[j * 2, :],
                     x, y_ppc[j * 2 + 1,:],
                     p=p,
                     show_line=False,
                     fill_color=colors[color][j])
        
    p.circle(x, y_ppc[-1, :],
           size=4,
           color=colors[color][-1])
    
    if y is not None:
        p.circle(x, y, size=4, color='orange')
    
    return p

In [30]:
time = df_sub['time (min)'].values
val = df_sub['area'].values
df_linear_ppc = bebi103.stan.extract_array(samples_linear, name='area_ppc')

p1 = hw92_predictive(df_linear_ppc, time, val, perc=[99, 75, 50, 25], name='area_ppc', plot_width=500, plot_height=400)

bokeh.io.show(p1)

In [None]:
# Marginalized distributions of each parameter 
plots = [bebi103.viz.ecdf(df_linear[param], x_axis_label=param, plot_height=200, plot_width=250) 
                 for param in ['a', 'k']]
bokeh.io.show(bokeh.layouts.gridplot(plots, ncols=3))

Noncentered exponential model

In [31]:
model_code_exp_noncentered = """
data {
  // Total number of data points
  int N;
  
  // Number of entries in each level of the hierarchy
  int J_1;

  //Index arrays to keep track of hierarchical structure
  int index_1[N];
  
  // The measurements
  real area[N];
  
  // Time
  vector[N] t;
}

parameters {
  // Hyperparameters level 0
  real a;
  real k;
  real<lower=0> sigma;

  // How hyperparameters vary
  real<lower=0> tau_a;
  real<lower=0> tau_k;

  // Hyperparameters level 1
  vector[J_1] a_1_tilde;
  vector[J_1] k_1_tilde;
}

transformed parameters {
  // Transformations for noncentered
  vector[J_1] a_1 = a + tau_a * a_1_tilde;
  vector[J_1] k_1 = k + tau_k * k_1_tilde;
  vector[N] area_temp;
  
  for (i in 1:N) {
    area_temp[i] = a_1[index_1[i]] * exp(k_1[index_1[i]] * t[i] / 100);
  }
}

model {
  a ~ normal(1.4, 0.3);
  k ~ normal(1, 0.2);
  sigma ~ normal(0, 0.1);
  tau_a ~ normal(0, 0.1);
  tau_k ~ normal(0, 0.1);

  a_1_tilde ~ normal(0, 1);
  k_1_tilde ~ normal(0, 1);

  area ~ normal(area_temp, sigma);
}

generated quantities {
  vector[N] area_ppc;
  real log_lik[N];
  
  for (i in 1:N) {
    area_ppc[i] = normal_rng(area_temp[i], sigma);
  }
  
  // Compute pointwise log likelihood
  for (i in 1:N) {
    log_lik[i] = normal_lpdf(area[i] | area_temp[i], sigma);
  }
}
"""

In [32]:
sm_exp = bebi103.stan.StanModel(model_code=model_code_exp_noncentered)

Using cached StanModel.


In [33]:
# Sample
samples_exp = sm_exp.sampling(data=data, 
                              seed=2389412, 
                              control=dict(adapt_delta=0.99, max_treedepth=11))

# Convert to data frame for easy use later
df_exp = bebi103.stan.to_dataframe(samples_exp)

bebi103.stan.check_all_diagnostics(samples_exp)



n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
8 of 4000 (0.2%) iterations ended with a divergence.
  Try running with larger adapt_delta to remove divergences.
56 of 4000 (1.4%) iterations saturated the maximum tree depth of 11.
  Try running again with max_treedepth set to a larger value to avoid saturation.
E-BFMI indicated no pathological behavior.


12

In [34]:
bokeh.io.show(bebi103.viz.trace_plot(samples_exp, pars=['a', 'k'], line_width=2))

In [35]:
bokeh.io.show(bebi103.viz.corner(samples_exp, pars=['a', 'k']))

In [36]:
time = df_sub['time (min)'].values
val = df_sub['area'].values
df_exp_ppc = bebi103.stan.extract_array(samples_exp, name='area_ppc')

p1 = hw92_predictive(df_exp_ppc, time, val, perc=[99, 75, 50, 25], name='area_ppc', plot_width=500, plot_height=400)

bokeh.io.show(p1)

Compare the two single level models

In [37]:
bebi103.stan.compare({'linear': samples_linear,
                      'exp': samples_exp},
                     log_likelihood='log_lik',
                     ic='loo')

Unnamed: 0,loo,ploo,dloo,weight,se,dse,warning
exp,-1012.47,5.73056,0.0,1.0,25.8899,0.0,0
linear,-718.811,5.30439,293.661,8.29914e-12,18.2026,14.2837,0


#### Two levels

In [224]:
model_code_linear_2 = """
data {
  // Total number of data points
  int N;
  
  // Number of entries in each level of the hierarchy
  int J_1;
  int J_2;
  
  //Index arrays to keep track of hierarchical structure
  int index_1[J_2];
  int index_2[N];
  
  // The measurements
  real area[N];
  
  // Time
  vector[N] t;
}

parameters {
  // Hyperparameters level 0
  real a;
  real k;
  real<lower=0> sigma;

  // How hyperparameters vary
  real<lower=0> tau_a;
  real<lower=0> tau_k;

  // Hyperparameters level 1
  vector[J_1] a_1_tilde;
  vector[J_1] k_1_tilde;
  
  // Hyperparameters level 2
  vector[J_2] a_2_tilde;
  vector[J_2] k_2_tilde;
}

transformed parameters {
  // Transformations for noncentered
  vector[J_1] a_1 = a + tau_a * a_1_tilde;
  vector[J_1] k_1 = k + tau_k * k_1_tilde;
  
  vector[J_2] a_2 = a_1[index_1] + tau_a * a_2_tilde;
  vector[J_2] k_2 = k_1[index_1] + tau_k * k_2_tilde;
  
  vector[N] area_temp;
  
  for (i in 1:N) {
    area_temp[i] = a_2[index_2[i]] + k_2[index_2[i]] * t[i];
  }
}

model {
  a ~ normal(1.4, 0.3);
  k ~ normal(0.01, 0.002);
  sigma ~ normal(0, 0.1);
  tau_a ~ normal(0, 0.1);
  tau_k ~ normal(0, 0.001);

  a_1_tilde ~ normal(0, 1);
  k_1_tilde ~ normal(0, 1);
  
  a_2_tilde ~ normal(0, 1);
  k_2_tilde ~ normal(0, 1);

  area ~ normal(area_temp, sigma);
}

generated quantities {
  vector[N] area_ppc;
  
  for (i in 1:N) {
    area_ppc[i] = normal_rng(area_temp[i], sigma);
  }
}
"""

In [225]:
sm_linear_2 = bebi103.stan.StanModel(model_code=model_code_linear_2)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_1ea7e02dd299844a351cb1ab33e1cf83 NOW.
  tree = Parsing.p_module(s, pxd, full_module_name)


In [234]:
df_sub1 = df.loc[(df['growth_event'] == 1) & (df['bacterium'] == 1)]
df_sub2 = df.loc[(df['growth_event'] == 2) & (df['bacterium'] == 1)]
df_sub3 = df.loc[(df['growth_event'] == 3) & (df['bacterium'] == 2)]
df_sub = pd.concat([df_sub1, df_sub2])
df_sub = pd.concat([df_sub, df_sub3])

df_sub.head()

Unnamed: 0,time (min),area (sq um),growth_event,bacterium,t
98,99.0,1.403376,1,1,0
99,100.0,1.400672,1,1,1
100,101.0,1.373632,1,1,2
101,102.0,1.40608,1,1,3
102,103.0,1.362816,1,1,4


In [236]:
# Rename for convenience
df_sub = df_sub.rename(columns={'area (sq um)': 'area'})

In [246]:
data, df_part = bebi103.stan.df_to_datadict_hier(df_sub,
                                           level_cols=['bacterium', 'growth_event'],
                                           data_cols=['area', 't'])

# Take a look
data

{'N': 288,
 'J_1': 2,
 'J_2': 3,
 'index_1': array([1, 1, 2]),
 'index_2': array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 

In [259]:
data = dict(N=len(df_sub),
            J_1=2,
            J_2=3,
            index_1=np.array([1,1,2]),
            index_2=df_sub['growth_event'].values,
            area=df_sub['area'].values,
            t=df_sub['t'].values)

In [260]:
data

{'N': 288,
 'J_1': 2,
 'J_2': 3,
 'index_1': array([1, 1, 2]),
 'index_2': array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 

In [261]:
# Sample
samples_linear_2 = sm_linear_2.sampling(data=data, 
                              seed=2389412, 
                              control=dict(adapt_delta=0.99, max_treedepth=11))

# Convert to data frame for easy use later
df_linear_2 = bebi103.stan.to_dataframe(samples_linear_2)

bebi103.stan.check_all_diagnostics(samples_linear_2)



n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
640 of 4000 (16.0%) iterations saturated the maximum tree depth of 11.
  Try running again with max_treedepth set to a larger value to avoid saturation.
E-BFMI indicated no pathological behavior.


8

In [262]:
bokeh.io.show(bebi103.viz.trace_plot(samples_linear_2, pars=['a', 'k'], line_width=2))

In [263]:
bokeh.io.show(bebi103.viz.corner(samples_linear_2, pars=['a', 'k']))

In [264]:
time = df_sub['time (min)'].values
val = df_sub['area'].values
df_lin2_ppc = bebi103.stan.extract_array(samples_linear_2, name='area_ppc')

p1 = hw92_predictive(df_lin2_ppc, time, val, perc=[99, 75, 50, 25], name='area_ppc', plot_width=500, plot_height=400)

bokeh.io.show(p1)

In [None]:
%load_ext watermark

In [None]:
%watermark -v -p numpy,scipy,bokeh,jupyterlab