In [4]:
import pymc as pm
import numpy as np
import arviz as az
import pandas as pd

%load_ext lab_black
%load_ext watermark

# Predicting using censored data

This example demonstrates ...

Adapted from [unit 10: katla.odc](https://raw.githubusercontent.com/areding/6420-pymc/main/original_examples/Codes4Unit10/katla.odc).

Data can be found [here](https://raw.githubusercontent.com/areding/6420-pymc/main/data/r.txt).

## Associated lecture video: Unit 10 Lesson 6

In [1]:
%%html
<iframe width="560" height="315" src="https://www.youtube.com/embed?v=xomK4tcePmc&list=PLv0FeK5oXK4l-RdT6DWJj0_upJOG2WKNO&index=103" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>

## Problem statement

In 2010 Icelandic volcano Eyjafjallajökull erupted. Nearby volcano Katla erupts more frequently.

Prediction for next Katla erruption (BUGS Book p254)



notes:

Having problems with Weibull and pm.Censored again! What is the deal here? Imputed censoring works fine.

In [59]:
# fmt: off
D = np.array(
    (1177, 1262, 1311, 1357, 1416, 1440, 1450, 1500, 1550, 
     1580, 1612, 1625, 1660, 1721, 1755, 1823, 1860, 1918, 2100)
)
# fmt: on

ps = [1, 5, 10, 50]

# time between eruptions
t = np.diff(D)
t

array([ 85,  49,  46,  59,  24,  10,  50,  50,  30,  32,  13,  35,  61,
        34,  68,  37,  58, 182])

In [64]:
with pm.Model() as m:
    α = pm.TruncatedNormal("α", mu=0, sigma=5, lower=0)  # v in BUGS model

    σ = pm.Gamma("σ", 0.001, 0.001)
    λ = (1 / σ)**α
    β = λ ** (-1 / α)
    
    _t = pm.Weibull.dist(α, β)
    pm.Censored("likelihood", _t, lower=None, upper=100, observed=t)
    
    median = pm.Deterministic("median tte", σ*np.log(2)**(1/α))
    
    for p in ps:
        pm.Deterministic(
            f"p_erupt_{p}", 1 - pm.math.exp((100 / σ) ** α - ((100 + p) / σ) ** α)
        )

    trace=pm.sample(3000)

Auto-assigning NUTS sampler...
INFO:pymc:Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
INFO:pymc:Initializing NUTS using jitter+adapt_diag...


SamplingError: Initial evaluation of model at starting point failed!
Starting values:
{'α_interval__': array(0.23778551), 'σ_log__': array(-0.00467777)}

Initial evaluation results:
{'α': -1.63, 'σ': -6.92, 'likelihood': -inf}

In [None]:
az.summary(trace)

In [50]:
with pm.Model() as m:
    # α = pm.Uniform("α", 0, 10) # getting divide by 0 errors
    α = pm.TruncatedNormal("α", mu=0, sigma=5, lower=0)  # v in BUGS model

    σ = pm.Gamma("σ", 0.001, 0.001)
    λ = (1 / σ) ** α
    β = λ ** (-1 / α)

    impute_censored = pm.Bound("impute_censored", pm.Weibull.dist(α, β), lower=100)

    pm.Weibull("likelihood", α, β, observed=t_uncens)

    median = pm.Deterministic("median tte", σ * np.log(2) ** (1 / α))

    for p in ps:
        pm.Deterministic(
            f"p_erupt_{p}", 1 - pm.math.exp((100 / σ) ** α - ((100 + p) / σ) ** α)
        )

    trace = pm.sample(3000)

Auto-assigning NUTS sampler...
INFO:pymc:Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
INFO:pymc:Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
INFO:pymc:Multiprocess sampling (4 chains in 4 jobs)
NUTS: [α, σ, impute_censored]
INFO:pymc:NUTS: [α, σ, impute_censored]


  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
Sampling 4 chains for 1_000 tune and 3_000 draw iterations (4_000 + 12_000 draws total) took 12 seconds.
INFO:pymc:Sampling 4 chains for 1_000 tune and 3_000 draw iterations (4_000 + 12_000 draws total) took 12 seconds.
There were 5 divergences after tuning. Increase `target_accept` or reparameterize.
ERROR:pymc:There were 5 divergences after tuning. Increase `target_accept` or reparameterize.
There were 6 divergences after tuning. Increase `target_accept` or reparameterize.
ERROR:pymc:There were 6 divergences after tuning. Increase `target_accept` or reparameterize.
There were 6 divergences after tuning. Increase `target_accept` or reparameterize.
ERROR:pymc:There were 6 divergences after tuning. Increase `target_accept` or reparameterize.
There were 10 divergences after tuning. Increase `target_accept` or reparameterize.
ERROR:pymc:There were 10 

In [51]:
az.summary(trace)

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
α,2.008,0.39,1.292,2.733,0.005,0.004,4993.0,5855.0,1.0
σ,54.745,7.38,41.45,69.068,0.096,0.069,6058.0,6198.0,1.0
impute_censored,115.224,16.981,100.001,144.204,0.209,0.148,5127.0,3548.0,1.0
median tte,45.353,6.532,32.953,57.591,0.085,0.06,5908.0,6300.0,1.0
p_erupt_1,0.071,0.031,0.019,0.127,0.0,0.0,5328.0,4749.0,1.0
p_erupt_5,0.306,0.113,0.109,0.518,0.001,0.001,5314.0,4776.0,1.0
p_erupt_10,0.513,0.154,0.244,0.808,0.002,0.001,5299.0,4775.0,1.0
p_erupt_50,0.958,0.065,0.83,1.0,0.001,0.001,5203.0,5066.0,1.0
