In [1]:
import aesara 
import aesara.tensor as at
import arviz as az
import matplotlib.pyplot as plt
import numpy as np 
import pymc as pm 
import xarray as xr 
import pandas as pd 
import pymc.sampling_jax
import numpyro 

print(f"Running on PyMC v{pm.__version__}")

Running on PyMC v4.1.3




#### Data: loading in the Scottish lip cancer data

In [2]:
df_scot_cancer = pd.read_csv(pm.get_data("scotland_lips_cancer.csv"))

# observed cancer counts
y = df_scot_cancer["CANCER"].values

# number of observations
N = len(y)

# expected cancer counts E for each county: this is calculated using age-standardized rates of the local population
E = df_scot_cancer["CEXP"].values
logE = np.log(E)

# proportion of the population engaged in agriculture, forestry, or fishing
x = df_scot_cancer["AFF"].values / 10.0

# spatial adjacency information: column `ADJ` contains list entries which are preprocessed to obtain adj as a list of lists
adj = (
    df_scot_cancer["ADJ"].apply(lambda x: [int(val) for val in x.strip("][").split(",")]).to_list()
)

# change to Python indexing (i.e. -1)
for i in range(len(adj)):
    for j in range(len(adj[i])):
        adj[i][j] = adj[i][j] - 1

# storing the adjacency matrix as a two-dimensional np.array
adj_matrix = np.zeros((N, N), dtype="int32")

for area in range(N):
    adj_matrix[area, adj[area]] = 1

In [3]:
# getting the necessary information to use for a spatial model specification 
N_edges = (adj_matrix == 1).sum()
node1 = np.where(adj_matrix == 1)[0] 
node2 = np.where(adj_matrix == 1)[1] 

# Expanding to a spatio-temporal Leroux model

Here I am extrapolating the Scottish Lip Cancer data set through a sequence to create a synthetic spatio-temporal dataset.

In [4]:
# linear rate through time 
multipler = np.arange(1.0, 1.3, step=0.05)
T = len(multipler)

# empty arrays 
y_time = np.zeros((N, T))
x_time = np.zeros((N, T))
log_offset_time = np.zeros((N, T))

for i in range(N):
    # each area has its own random slope through time 
    random_slope = 1 + np.random.normal(loc=0, scale=0.15, size=(1, ))
    for j in range(T):
        # new data observation 
        y_time[i, j] = np.random.poisson(lam=y[i]*multipler[j]*random_slope, size=(1, ))
        x_time[i, j] = np.random.normal(loc=x[i], scale=0.1, size=(1, ))
        log_offset_time[i, j] = np.random.normal(loc=logE[i]*multipler[j], scale=0.1, size=(1, ))

# differencing the time dimension         
t_diff = np.zeros((N, T))
for i in range(N):
    t_diff[i, ] = np.arange(T) - 3

In [5]:
# creating the coordinates for the spatio-temporal model
st_coords = {"num_times": np.arange(T), 
             "num_areas": np.arange(N), 
             "dummy": np.arange(1)}

####  Leroux model

We now use a Leroux model such that the random effect structure for each area $i$ is $\psi_i=\phi_i$, where the vector $\boldsymbol{\phi}$ is distributed according to 
\begin{align}
\boldsymbol{\phi}\sim\text{Normal}(0, [\tau^2\rho(\mathbf{D}-\mathbf{W}) + (1-\rho)\mathbf{I})\big ]^{-1}). 
\end{align} 
When $\rho=1$, the Leroux prior is equal to an ICAR prior, and when $\rho=0$, it is equal to an independent random effect. 

In [6]:
# creating the pairwise specification for the Leroux model
def pairwise_diff_leroux(rho, phi, node1, node2):
    return -0.5 * rho * ((phi[node1]-phi[node2]) ** 2).sum()
# creating the square sum potential specification 
def square_sum(rho, phi):
    return -0.5 * (1-rho) * (phi ** 2).sum()

#### Linear Time Spatio-temporal Conditional Autoregressive model

For this this model, data from area $i$ at time $t$ is modelled as 
\begin{align}
y_{i, t} &\sim \text{Poisson}(\mu_{i, t})\\
\log \mu_{i, t} &= \beta_0 + \beta_1 x_{i, t} + \log E_{i, t} +\psi_{i, t},
\end{align}
where $\psi_{i, t}$ is the random effect, modelled as 
\begin{align}
\psi_{i, t}=\mu+\phi_{i}+(\alpha+\delta_i)\frac{t-\bar{t}}{T}, 
\end{align}
where both the $\boldsymbol{\phi}$'s and $\boldsymbol{\delta}$'s have a Leroux prior. Effectively, this is a hierarchical model with random intercepts and random slopes, except that these two terms are "smoothed" over by the random intercepts and slopes respectively of their directly local neighbours. 

In [7]:
with pm.Model(coords=st_coords) as st_leroux_model:
    # spatial intercept parameters 
    # precision priors, transform to standard deviation 
    tau_phi = pm.Gamma("tau_phi", alpha=1, beta=1)
    sigma_phi = pm.Deterministic("sigma_phi", 1/at.sqrt(tau_phi))
    # spatial smoothing prior for the intercepts
    rho_intercept = pm.Beta("rho_intercept",  alpha=1, beta=1)
    # spatial intercept random effects 
    phi = pm.Flat("phi", dims=("num_areas", "dummy"))
    pm.Potential("spatial_diff_intercept", pairwise_diff_leroux(rho_intercept, phi, node1, node2))
    pm.Potential("square_sum_intercept", square_sum(rho_intercept, phi))

    # spatial slope parameters 
    # precision priors, transform to standard deviation
    tau_delta = pm.Gamma("tau_delta", alpha=1, beta=1)
    sigma_delta = pm.Deterministic("sigma_delta", 1/at.sqrt(tau_delta))
    # spatial smoothing prior for the slopes 
    rho_slope = pm.Beta("rho_slope", alpha=1, beta=1)
    # spatial slope random effects
    delta = pm.Flat("delta", dims=("num_areas", "dummy"))
    pm.Potential("spatial_diff_slope", pairwise_diff_leroux(rho_slope, delta, node1, node2))
    pm.Potential("square_sum_slope", square_sum(rho_slope, delta))
    
    # constraint on the random effects
    zero_constraint = pm.Normal.dist(mu=0.0, sigma=np.sqrt(0.001))
    pm.Potential("zero_sum_phi", pm.logp(zero_constraint, pm.math.sum(phi))) 
    pm.Potential("zero_sum_delta", pm.logp(zero_constraint, pm.math.sum(delta)))

    # regression coefficient priors 
    beta0 = pm.Normal("beta0", mu=0, sigma=5)
    beta1 = pm.Normal("beta1", mu=0, sigma=5) 
    # prior for alpha: overall slope over time 
    alpha = pm.Normal("alpha", mu=0, sigma=500)

    # linear predictor 
    eta = pm.Deterministic("eta", log_offset_time + beta0 + beta1*x_time + phi*sigma_phi + (alpha+delta*sigma_delta)*t_diff, dims=("num_areas", "num_times")) 

    # likelihood
    obs = pm.Poisson("obs", at.exp(eta), observed=y_time, dims=("num_areas", "num_times"))

In [8]:
with st_leroux_model:
    idata = pm.sample()

Auto-assigning NUTS sampler...
INFO:pymc:Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
INFO:pymc:Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
INFO:pymc:Multiprocess sampling (4 chains in 4 jobs)
NUTS: [tau_phi, rho_intercept, phi, tau_delta, rho_slope, delta, beta0, beta1, alpha]
INFO:pymc:NUTS: [tau_phi, rho_intercept, phi, tau_delta, rho_slope, delta, beta0, beta1, alpha]


  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 234 seconds.
INFO:pymc:Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 234 seconds.
