In [None]:
import pymc as pm
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
import arviz as az
import pandas as pd

# Bayes Theorem

## 1. Discrete model's posteriors with PyMC

Below you see a PyMC model that computes and shows the prior distribution of disease. In order compute the posterior of disease given positive blood test, you need to **observe** the `bloodTest` variable by changing the 4th line of the code below as follows

~~~python
    bloodTest = pm.Bernoulli('bloodTest', p = positiveProb, observed=1)
~~~

**Tasks**: Add `observed=1` to bloodTest and see how the probability of disease changes.

In [None]:
with pm.Model() as model:
    disease = pm.Bernoulli('disease', p = 0.2)
    positiveProb = pm.math.switch(disease, 0.7, 0.1)
    bloodTest = pm.Bernoulli('bloodTest', p = positiveProb)
    trace = pm.sample()
pm.stats.summary(trace, kind="stats", var_names=["disease"]).round(2)

## 2. Continuous model's posteriors with Grid Approximation

$$\begin{align} \overbrace{D_i \sim \text{Bernoulli}(p)}^{\text{likelihood}} \\
\overbrace{p \sim \text{Uniform}(0,1)}^{\text{prior}} \end{align}$$

Below, we compute the posterior probability of $D$ when we observe 1 sick (disease) and 1 health (no disease) person using grid approximation. This code approximates the continuous distribution by using `n = 5` discrete points. 

**Tasks**

1. Use `n=50` discrete points to make a finer grid approximation
2. Compute the posterior given 2 sick (disease) and 7 healthy (no disease) people
> Hint: you need to change the numbers in `... ** 1` and `... ** 1` in the likelihood line.

In [None]:
# Grid approximation
n = 6
x = np.linspace(0, 1, n)
prior = stats.uniform.pdf(x)
# Using Bernoulli distribution - 1 disease and 1 healthy observations
likelihood = stats.bernoulli.pmf(1, p = x) ** 1 * stats.bernoulli.pmf(0, p = x) ** 2
unnorm_posterior = likelihood * prior
posterior = unnorm_posterior / np.sum(unnorm_posterior)


# Plotting the posterior
plt.figure(figsize=(10, 6))
plt.plot(x, posterior, 'b-+', linewidth=1, markersize=8)
plt.fill_between(x, posterior, alpha=0.3)

plt.xlabel('Parameter value')
plt.ylabel('Posterior Probability')
plt.title(f'Posterior Distribution (Grid Approximation with {n} points)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## 3. Continuous model's posteriors with PyMC

$$\begin{align} \text{disease} &\sim \text{Bernoulli}(\text{probDisease})
\\\
\text{probDisease} &\sim \text{Uniform}(0,1) \end{align}$$

Below we have the PyMC implementation of the Bayesian model above. Note that this model is observed for 1 healthy (disease) and 1 sick (no disease) patient (see `disease = ... observed=[0,1])`).

**Task**: Observe this model for  2 sick (disease) and 7 healthy (no disease) people and see how the posterior changes. Compare the result with grid approximation.

> Hint: you need to add another `1` and 6 more `0`s to the list in `... observed=[0,1])`

In [None]:
with pm.Model() as model2:
  probDisease = pm.Uniform('probDisease', lower=0, upper=1)
  disease = pm.Bernoulli('disease', p = probDisease, observed = [0,1])
  trace2 = pm.sample()
# Summary table of the posterior
pm.stats.summary(trace2, kind="stats", var_names=["probDisease"]).round(2)
# Density plot of the posterior
az.plot_posterior(trace2, var_names=["probDisease"], hdi_prob=0.95)

## Extra: Estimating both P(Disease) and P(Blood Test | Disease) with PyMC


$$\begin{align}
\text{disease} &\sim \text{Bernoulli}(\text{probDisease}) \\
\text{bloodTest} &\sim \text{Bernoulli}(\text{probTestGivenDis}_{\text{disease}})
\\
\text{probTestGivenDis}_i &\sim \text{Uniform}(0,1) \quad \text{for} \ i=0,1 
\\
\text{probDisease} &\sim \text{Uniform}(0,1)
\end{align}$$

We can use the model above to estimate both probability of disease and conditional probability of test result given disease from data. 

The code below creates a pandas dataframe of 3 patients. First patient has no disease and positive blood test, second patient has disease and positive blood test.

PyMC model estimates the disease and test result probabilities from this data.

**Task**: Expand this dataset with 6 more hypothetical patients, and see how the results change.

In [None]:
df = pd.DataFrame({
    'disease': [0,1,0], 
    'bloodTest': [1,1,0]})
df

In [None]:
with pm.Model() as model3:
  # Priors
  probDisease = pm.Uniform('probDisease', lower=0, upper=1)
  probTestGivenDisease = pm.Uniform('probTestGivenDisease', lower=0, upper=1, shape=2)
  disease = pm.Bernoulli('disease', p = probDisease, observed = df.disease)
  positiveProb = pm.math.switch(disease, probTestGivenDisease[0], probTestGivenDisease[1])
  bloodTest = pm.Bernoulli('bloodTest', p = positiveProb, observed  = df.bloodTest)
  trace4 = pm.sample()
pm.stats.summary(trace4, kind="stats", var_names=["probDisease", "probTestGivenDisease"]).round(2)