In [None]:
import pandas as pd
import numpy as np
import pymc3 as pm
import theano
from scipy import stats
from sklearn import preprocessing

import plotly.graph_objects as go

In [None]:
foxes = pd.read_csv('../../data/foxes.csv', sep=';', header=0)
foxes.head()

# 1

> Use a model to infer the total causal influence of `area` on `weight`. Would increasing the area available to each fox make it heavier (healthier)? You might want to standardize the variables. Regardless, use prior predictive simulation to show that your model’s prior predictions stay within the possible outcome range.

Like the prompt suggests, we start by standardizing the appropriate variables.

In [None]:
features = ['avgfood', 'groupsize', 'area', 'weight']
foxes[features] = preprocessing.scale(foxes[features])

Now, we think about setting our priors in a way that makes sense. We *don't* want to include `avgfood` as a predictor in our multiple regression, as this would induce d-separation between `weight` and `area`, distorting our ability to test the causal model of interest.

In [None]:
with pm.Model() as model1:
    
    # data
    weight = foxes['weight']
    group = foxes['group']
    avgfood = foxes['avgfood']
    groupsize = foxes['groupsize']
    area = foxes['area']
    
    # priors
    alpha = pm.Normal('alpha', 0, .2)
    b_area = pm.Normal('b_area', 0, .5)
    sigma = pm.Uniform('sigma', 0, 2)
    
    # model
    mu = alpha + b_area * area
    weight_hat = pm.Normal('weight_hat', mu = mu, sigma = sigma, observed = weight)
    
    # sampling stuff
    prior = pm.sample_prior_predictive(samples = 30) # a bunch of draws from the prior for each param
    posterior1 = pm.sample(draws = 1000, tune = 1000)
    posterior_predictive1 = pm.sample_posterior_predictive(posterior1)

In [None]:
pm.summary(posterior1, alpha=.11).round(2)

`area` doesn't appear to have any total effect on `weight` according to this model.

In [None]:
pm.traceplot(posterior1);

In [None]:
samples = 30

fig = go.Figure()

for draw in range(samples):
    alpha = prior['alpha'][draw]
    beta = prior['b_area'][draw]
    prior_weight = alpha + beta * area
    
    fig.add_trace(
        go.Scatter(
            x = area,
            y = pd.Series(prior_weight),
            mode = 'lines'
        )
    )
    
fig.update_layout(showlegend = False)
fig.show()

These priors seem fine! The range of the predictions they make are well within the realm of possibility.

# 2

> Now infer the causal impact of adding food to a territory. Would this make foxes heavier? Which covariates do you need to adjust for to estimate the total causal influence of food?

If we just want to assess the total causal impact of food, we can do a simple regression with the `avgfood` variable as a predictor.

In [None]:
with pm.Model() as model_2:
    avgfood = foxes['avgfood']
    weight = foxes['weight']
    
    # priors
    alpha = pm.Normal('alpha', mu = 0, sigma = .2)
    beta = pm.Normal('beta', mu = 0, sigma = .5)
    sigma = pm.Uniform('sigma', 0, 2)
    
    # model
    mu = alpha + beta * avgfood
    weight_hat = pm.Normal('weight_hat', mu = mu, sigma = sigma, observed = weight)
    
    # sampling
    posterior2 = pm.sample(draws = 1000, tune = 1000)
    posterior_predictive2 = pm.sample_posterior_predictive(posterior2)

In [None]:
pm.summary(posterior2, alpha = .11).round(2)

In [None]:
pm.traceplot(posterior2);

Interesting - according to this model, `avgfood` doesn't appear to have a convincing causal effect on `weight`.

# 3

> Now infer the causal impact of group size. Which covariates do you need to adjust for? Looking at the posterior distribution of the resulting model, what do you think explains these data? That is, can you explain the estimates for all three problems? How do they go together?

In [None]:
with pm.Model() as model3:
    # data
    weight = foxes['weight']
    avgfood = foxes['avgfood']
    groupsize = foxes['groupsize']
    
    # priors
    alpha = pm.Normal('alpha', 0, .2)
    beta = pm.Normal('beta', 0, .5, shape = 2)
    sigma = pm.Uniform('sigma', 0, 2)
    
    # model
    mu = alpha + beta[0] * avgfood + beta[1] * groupsize
    weight_hat = pm.Normal('weight_hat', mu = mu, sigma = sigma, observed = weight)
    
    # sampling
    posterior3 = pm.sample(draws = 1000, tune = 1000)
    posterior_predictive3 = pm.sample_posterior_predictive(posterior3)

In [None]:
with pm.Model() as model4:
    # data
    weight = foxes['weight']
    avgfood = foxes['avgfood']
    groupsize = foxes['groupsize']
    
    # priors
    alpha = pm.Normal('alpha', 0, .2)
    beta = pm.Normal('beta', 0, .5, shape = 1)
    sigma = pm.Uniform('sigma', 0, 2)
    
    # model
    mu = alpha +  beta * groupsize
    weight_hat = pm.Normal('weight_hat', mu = mu, sigma = sigma, observed = weight)
    
    # sampling
    posterior4 = pm.sample(draws = 1000, tune = 1000)
    posterior_predictive4 = pm.sample_posterior_predictive(posterior4)

In [None]:
pm.summary(posterior3, alpha = .11).round(2)

In [None]:
pm.summary(posterior4, alpha = .11).round(2)