In [1]:
import arviz as az
import numpy as np
import pymc3 as pm

# Data Extraction

In [2]:
data_array = np.loadtxt('bladderc.csv', delimiter=',', skiprows=1)
time_data = data_array[:, 0]        # time at which the observation was made
observed_data = data_array[:, 1]    # whether or not cancer was observed
group_data = data_array[:, 2]       # whehter or not placebo or treatment group


# Separate data into censored and uncensored observations
observed_mask = observed_data == 1
censored = observed_data[~observed_mask]

time_cens = time_data[~observed_mask]
time_uncens = time_data[observed_mask]
group_cens = group_data[~observed_mask]
group_uncens = group_data[observed_mask]    

# Model

In [5]:
with pm.Model() as m:
    # Priors
    beta0 = pm.Normal('beta0', mu=0, tau=0.0001)
    beta1 = pm.Normal('beta1', mu=0, tau=0.0001)

    # Rate parameters
    λ_cens = pm.math.exp(beta0 + beta1 * group_cens)
    λ_uncens = pm.math.exp(beta0 + beta1 * group_uncens)

    # Impute censored data
    BoundedExponential = pm.Bound(pm.Exponential, lower=np.min(censored))
    impute_censored = BoundedExponential('impute_censored', lam=λ_cens, shape=censored.shape[0])

    # Likelihood
    likelihood = pm.Exponential(
        "likelihood",
        lam=λ_uncens,
        observed=time_uncens,
        shape=time_uncens.shape[0],
    )

    # Means for placebo and treatment times
    μ0 = pm.Deterministic('μ0', pm.math.exp(-beta0))
    μ1 = pm.Deterministic('μ1', pm.math.exp(-(beta0 + beta1)))

    # Difference of means
    diff_means = pm.Deterministic('diff_means', μ1 - μ0)

    # Hypothesis testing
    hypothesis = pm.Deterministic("hypothesis", pm.math.switch(pm.math.ge(μ1, μ0), 1, 0))

    trace = pm.sample(5000, target_accept=0.95)

  return wrapped_(*args_, **kwargs_)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [impute_censored, beta1, beta0]


  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
Sampling 4 chains for 1_000 tune and 5_000 draw iterations (4_000 + 20_000 draws total) took 16 seconds.


# Show Statistics

In [6]:
az.summary(trace)

Got error No model on context stack. trying to find log_likelihood in translation.


Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
beta0,-2.279,0.185,-2.634,-1.943,0.002,0.001,10557.0,13425.0,1.0
beta1,-0.064,0.3,-0.628,0.499,0.003,0.002,9795.0,12705.0,1.0
impute_censored[0],10.005,10.377,0.0,28.995,0.067,0.051,15380.0,9188.0,1.0
impute_censored[1],9.812,10.238,0.0,28.067,0.068,0.05,13314.0,7989.0,1.0
impute_censored[2],9.913,10.282,0.001,28.418,0.065,0.051,15791.0,8879.0,1.0
impute_censored[3],9.988,10.346,0.002,28.492,0.069,0.054,15059.0,9089.0,1.0
impute_censored[4],10.015,10.288,0.002,28.204,0.064,0.05,17154.0,8793.0,1.0
impute_censored[5],9.817,10.213,0.0,28.293,0.06,0.049,17905.0,8946.0,1.0
impute_censored[6],9.9,10.333,0.001,28.576,0.067,0.051,15606.0,9159.0,1.0
impute_censored[7],9.929,10.249,0.004,28.248,0.065,0.05,16435.0,9798.0,1.0


# Conclusion

### Is the 95% Credible Set for μ1 − μ0 all positive?
 The 95% credible set ranges from approximately -5.347 to 6.873. Because this interval includes negative values, it is not all positive. Therefore, based on this credible set alone, we cannot confidently state that the treatment increases the time to cancer recurrence compared to the placebo.

 ### What is the posterior probability of hypothesis H : μ1 > μ0?
 The mean of the hypothesis variable in the trace (approximately 0.58) represents the proportion of the posterior distribution where μ1 is greater than μ0. Based on this, there is an estimated 58% posterior probability that the treatment group has a longer time to cancer recurrence than the placebo group.

 ### Comment on the benefits of the treatment.
 Based on the results, there is evidence that suggests the treatment may increase the time to cancer recurrence, but the evidence is not strong. The 95% credible set for the difference in means includes zero, which indicates that we cannot be 95% confident that the treatment provides a benefit over placebo in terms of increasing the time to cancer recurrence. However, the posterior probability that the treatment is better than the placebo is 58%, which indicates that there is some evidence in favor of the treatment's efficacy--that said, it is not conclusive. Further analysis or data may be required to make a stronger statement regarding the benefits of the treatment.