In [3]:
import arviz as az
import numpy as np
import pandas as pd
import pymc as pm
from pymc.math import exp, ge, switch

In [4]:
data = pd.read_csv("bladderc.csv")

In [13]:
data.head(20)

Unnamed: 0,time,observed,group
0,0,0,0
1,1,0,0
2,4,0,0
3,7,0,0
4,10,0,0
5,6,1,0
6,14,0,0
7,18,0,0
8,5,1,0
9,12,1,0


In [15]:
censored = data[data.observed==0]['observed'].values
y_uncensored = data[data.observed!=0]['time'].values
x_uncensored = data[data.observed!=0]['group'].values
x_censored = data[data.observed==0]['group'].values
censored

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [10]:
censored

array([ 0,  1,  4,  7, 10, 14, 18, 23, 26, 29, 29, 29, 32, 34, 36, 37, 41,
       49, 59,  1,  1,  9, 10, 13, 18, 22, 25, 25, 25, 38, 41, 41, 44, 45,
       46, 49, 50, 54, 59])

In [16]:
y_uncensored

array([ 6,  5, 12, 10,  3,  3,  7,  3,  1,  2, 25, 28,  2,  3, 12, 29,  9,
       16,  3,  6,  3,  9, 18, 35, 17,  3,  2,  5,  2,  5,  3,  1, 17,  2,
       17,  6,  6,  2, 26, 22,  4, 24,  1,  2,  2,  4, 38])

In [17]:
x_uncensored

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1])

In [9]:
x_uncensored

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1])

In [19]:
x_censored

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

So if we oberved the tumor, we want to right-censor the data. If not, we don't.

In [5]:
data["censored"] = 0
data.loc[data["observed"] == 0, "censored"] = data["time"]

y = data["time"].to_numpy(copy=True)
x = data["group"].to_numpy(copy=True)
observed = data["observed"].to_numpy(copy=True).astype(bool)
censored = data["censored"].to_numpy(copy=True)

np.sum(observed), y.shape, x.shape, observed.shape, censored.shape

(47, (86,), (86,), (86,), (86,))

In [6]:
x_uncensored = x[observed]
x_censored = x[~observed]

In [7]:
y_uncensored = y[observed]

In [8]:
censored = censored[~observed]

In [11]:
y_uncensored

array([ 6,  5, 12, 10,  3,  3,  7,  3,  1,  2, 25, 28,  2,  3, 12, 29,  9,
       16,  3,  6,  3,  9, 18, 35, 17,  3,  2,  5,  2,  5,  3,  1, 17,  2,
       17,  6,  6,  2, 26, 22,  4, 24,  1,  2,  2,  4, 38])

In [13]:
x_uncensored

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1])

In [23]:
# right-censored model

with pm.Model() as m:
    beta0 = pm.Normal("beta0", 0, sigma=1000)
    beta1 = pm.Normal("beta1", 0, sigma=1000)
    
    λ_uncensored = exp(beta0 + beta1 * x_uncensored)
    λ_censored = exp(beta0 + beta1 * x_censored)
    
    likelihood_uncensored = pm.Exponential("likelihood_uncensored", λ_uncensored, observed=y_uncensored, shape=y_uncensored.shape[0])
    likelihood_censored = pm.Bound("censored", pm.Exponential.dist(λ_censored), lower=censored, upper=np.inf, shape=censored.shape[0])

    mu_placebo = pm.Deterministic("mu_placebo", exp(-beta0))
    mu_chemo = pm.Deterministic("mu_chemo", exp(-beta0 - beta1))

    mu_diff = pm.Deterministic("mu_diff", mu_chemo - mu_placebo)

    H_prob = pm.Deterministic("H_prob", switch(ge(mu_diff, 0), 1, 0))

    H_prob2 = pm.Deterministic("H_prob2", switch(ge(mu_chemo, mu_placebo), 1, 0))
    trace = pm.sample(
        10000, tune=2000, cores=4, init="auto"
    )

  variables = ufunc(*ufunc_args, **ufunc_kwargs)
Apply node that caused the error: bound_rv{0, (0, 0, 0), floatX, False}(RandomStateSharedVariable(<RandomState(MT19937) at 0x1665FFD40>), TensorConstant{(1,) of 39}, TensorConstant{11}, exponential_rv{0, (0,), floatX, False}.out, TensorConstant{[ 0.  1.  ... 54. 59.]}, TensorConstant{inf})
Toposort index: 13
Inputs types: [RandomStateType, TensorType(int64, (1,)), TensorType(int64, ()), TensorType(float64, (None,)), TensorType(float64, (39,)), TensorType(float64, ())]
Inputs shapes: ['No shapes', (1,), (), (39,), (39,), ()]
Inputs strides: ['No strides', (8,), (), (8,), (8,), ()]
Inputs values: [RandomState(MT19937) at 0x1665FFD40, array([39]), array(11), 'not shown', 'not shown', array(inf)]
Outputs clients: [['output'], []]

Backtrace when the node is created (use Aesara flag traceback__limit=N to make it longer):
  File "/Users/aaron/mambaforge/envs/pymc-dev-py39/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3301

  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
Sampling 4 chains for 2_000 tune and 10_000 draw iterations (8_000 + 40_000 draws total) took 21 seconds.


In [22]:
az.summary(trace, hdi_prob=0.9)

Unnamed: 0,mean,sd,hdi_5%,hdi_95%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
beta0,-3.28,0.186,-3.588,-2.975,0.001,0.001,19211.0,21245.0,1.0
beta1,-0.54,0.303,-1.022,-0.029,0.002,0.002,18118.0,24839.0,1.0
censored[0],27.136,27.624,0.001,62.629,0.146,0.104,23322.0,15029.0,1.0
censored[1],28.127,27.965,1.0,63.868,0.132,0.103,31817.0,17785.0,1.0
censored[2],31.161,28.345,4.0,67.208,0.139,0.103,27466.0,16962.0,1.0
censored[3],34.126,28.021,7.0,70.057,0.145,0.103,24576.0,15897.0,1.0
censored[4],37.096,28.078,10.0,72.813,0.138,0.105,29287.0,17957.0,1.0
censored[5],41.177,28.141,14.002,77.106,0.142,0.104,25344.0,16411.0,1.0
censored[6],45.021,28.273,18.001,80.648,0.136,0.105,30355.0,17390.0,1.0
censored[7],50.218,28.304,23.0,86.367,0.14,0.102,26083.0,16495.0,1.0
