In [1]:
import pymc as pm
import numpy as np
import arviz as az
import pandas as pd

%load_ext lab_black
%load_ext watermark

# Predicting using censored data

This example demonstrates ...

Adapted from [Unit 10: katla.odc](https://raw.githubusercontent.com/areding/6420-pymc/main/original_examples/Codes4Unit10/katla.odc).

Data can be found [here](https://raw.githubusercontent.com/areding/6420-pymc/main/data/r.txt).

Associated lecture video: Unit 10 lesson 6

## Problem statement

In 2010 Icelandic volcano Eyjafjallajökull erupted. Nearby volcano Katla erupts more frequently.

Prediction for next Katla erruption (BUGS Book p254)



notes:

Having problems with Weibull and pm.Censored again! What is the deal here? Imputed censoring works fine.

In [2]:
# fmt: off
D = np.array(
    (1177, 1262, 1311, 1357, 1416, 1440, 1450, 1500, 
     1550, 1580, 1612, 1625, 1660, 1721, 1755, 1823, 
     1860, 1918, 1000000)
)
# fmt: on

# probabilities
ps = [1, 5, 10, 50]

# time between eruptions
t = np.diff(D)

In [None]:
with pm.Model() as m:
    α = pm.Exponential("α", 0.05)  # v in BUGS model

    σ = pm.Gamma("σ", 0.01, 0.01)
    λ = 1 / σ**α
    β = λ ** (-1 / α)

    _t = pm.Weibull.dist(α, β)
    pm.Censored("likelihood", _t, lower=None, upper=100, observed=t)

    median = pm.Deterministic("median tte", σ * np.log(2) ** (1 / α))

    for p in ps:
        pm.Deterministic(
            f"p_erupt_{p}", 1 - pm.math.exp((100 / σ) ** α - ((100 + p) / σ) ** α)
        )

    trace = pm.sample(3000)

This worked fine in PyMC 4, but since the update to 5 it doesn't work. Imputed censoring method (below) works fine for now, but will need to update it to the new, non-deprecated method.

In [None]:
az.summary(trace)

In [3]:
t_uncens = t[:-1]

In [4]:
with pm.Model() as m:
    # α = pm.Uniform("α", 0, 10) # getting divide by 0 errors
    α = pm.TruncatedNormal("α", mu=0, sigma=5, lower=0)  # v in BUGS model

    σ = pm.Gamma("σ", 0.001, 0.001)
    λ = (1 / σ) ** α
    β = λ ** (-1 / α)

    impute_censored = pm.Bound("impute_censored", pm.Weibull.dist(α, β), lower=100)

    pm.Weibull("uncensored", α, β, observed=t_uncens)

    median = pm.Deterministic("median tte", σ * np.log(2) ** (1 / α))

    for p in ps:
        pm.Deterministic(
            f"p_erupt_{p}", 1 - pm.math.exp((100 / σ) ** α - ((100 + p) / σ) ** α)
        )

    trace = pm.sample(3000)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [α, σ, impute_censored]


Sampling 4 chains for 1_000 tune and 3_000 draw iterations (4_000 + 12_000 draws total) took 2 seconds.


In [5]:
az.summary(trace)

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
α,2.004,0.386,1.3,2.732,0.005,0.004,5272.0,5525.0,1.0
σ,54.822,7.319,41.374,68.458,0.097,0.07,5780.0,6345.0,1.0
impute_censored,114.861,16.912,100.007,143.935,0.21,0.149,5125.0,3793.0,1.0
median tte,45.401,6.49,33.055,57.436,0.087,0.061,5627.0,5917.0,1.0
p_erupt_1,0.07,0.031,0.021,0.127,0.0,0.0,5858.0,5451.0,1.0
p_erupt_5,0.303,0.111,0.116,0.514,0.001,0.001,5844.0,5431.0,1.0
p_erupt_10,0.51,0.152,0.233,0.787,0.002,0.001,5827.0,5461.0,1.0
p_erupt_50,0.957,0.065,0.83,1.0,0.001,0.001,5724.0,5667.0,1.0


In [6]:
%watermark -n -u -v -iv -p aesara,aeppl

Last updated: Fri Feb 03 2023

Python implementation: CPython
Python version       : 3.11.0
IPython version      : 8.9.0

aesara: 2.8.10
aeppl : 0.1.1

pandas: 1.5.3
pymc  : 5.0.1
arviz : 0.14.0
numpy : 1.24.1

