# Chapter 3 Exercises

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
import arviz as az
import pymc3 as pm
np.random.seed(seed=0)



In [2]:
az.style.use('arviz-darkgrid')

## Question 1
***

The priors in this model are

\begin{eqnarray}
\mu \text{~} Normal(0,10) \newline
\sigma \text{~} \left|Normal(0,25) \right|
\end{eqnarray}


The likelihood in our model is  
$$ Normal(\mu, \sigma)$$

And the posteriors are

$$ \mu, \sigma $$

## Question 2
***

There are two parameters in this model, $\mu$ and $\sigma$

## Question 3
***

Without expanding the denominator

$$ p(\mu, \sigma | y) = \frac{\Pi_i Normal(y| \mu, \sigma) Normal(\mu|0,10) |Normal(\sigma|0,25)|}{p(y)}  $$

Expanding the denominator
$$ p(\mu, \sigma | y) = \frac{\Pi_i Normal(y| \mu, \sigma) Normal(\mu|0,10) |Normal(\sigma|0,25)|}{\int \int \Pi_i Normal(y| \mu, \sigma) Normal(\mu|0,10) |Normal(\sigma|0,25)| d\mu d\sigma} $$

## Question 4
***

The linear model is 
\begin{eqnarray}
\mu = \alpha + \beta x
\end{eqnarray}


The likelihood in our model is  
$$ Normal(\mu, \epsilon)$$

The posterior will have three parameters

$$ \alpha, \beta, \epsilon $$

## Question 5
***

For this exercise we will generate 57 datapoints from a distribution of $N(4,. 5)$

In [3]:
data = stats.norm(4,.5).rvs(size=57)

In [4]:
with pm.Model() as model:
    mu = pm.Normal("mu", 0 ,10)
    sd = pm.HalfNormal("sd",25)
    y = pm.Normal("y,", mu, sd, observed=data)
    
    # Compute both prior, and prior predictive
    prior_predictive = pm.sample_prior_predictive()
    
    # Compute posterior
    trace = pm.sample()
    
    # Compute posterior predictive
    posterior_predictive = pm.sample_posterior_predictive(trace)

ValueError: Can't pass both tau and sd

In [None]:
dataset = az.from_pymc3(trace=trace, posterior_predictive=posterior_predictive, prior=prior_predictive)

In [None]:
dataset

Let's plot the prior distributions to get a sense of what the Bayesian model's estimations without data

In [None]:
# The plot_posterior method can be used to plot priors as well
az.plot_posterior(dataset.prior, var_names=["mu", "sd"])

We'll also plot the posterior as well to check the distributions after updates. You'll notice that the posterior for SD is bimodel, this is a result of our model definition which dates the absolute value of sd.

In [None]:
# Compare above plot to posterior distribution below, as well as to original parameters in distribution
az.plot_posterior(dataset)

In [None]:
dataset.prior

Let's also plot the prior predictive values, we'll need to do some data manipulation to get the data into a format we can use with ArviZ

In [None]:
print(dataset.prior["y,"].values.shape)
prior_predictive = dataset.prior["y,"].values.flatten()
prior_predictive.shape

In [None]:
az.plot_kde(prior_predictive)

We can them compare this to the posterior predictive distribution

In [None]:
az.plot_ppc(dataset)