# HW6Q2

In [5]:
import pymc as pm
import pandas as pd
import arviz as az
import numpy as np
from pymc.math import switch, gt, exp

In [6]:
bladderc = pd.read_csv("bladderc.csv")
bladderc.sample(5), bladderc.describe()

(    time  observed  group
 12     3         1      0
 65     6         1      1
 49     1         0      1
 75    44         0      1
 15     3         1      0,
             time   observed      group
 count  86.000000  86.000000  86.000000
 mean   18.081395   0.546512   0.441860
 std    16.442567   0.500752   0.499521
 min     0.000000   0.000000   0.000000
 25%     3.000000   0.000000   0.000000
 50%    12.500000   1.000000   0.000000
 75%    29.000000   1.000000   1.000000
 max    59.000000   1.000000   1.000000)

In [7]:
censored = bladderc.query("observed == 0")
censored.sample(5)

Unnamed: 0,time,observed,group
51,9,0,1
61,25,0,1
29,36,0,0
56,18,0,1
85,59,0,1


In [8]:
uncensored = bladderc.query("observed == 1")
uncensored.sample(5)

Unnamed: 0,time,observed,group
67,26,1,1
45,2,1,0
35,3,1,0
11,10,1,0
37,3,1,0


In [9]:
time_uncensored = uncensored["time"].to_numpy()
group_uncensored = uncensored["group"].to_numpy()
time_censored = censored["time"].to_numpy()
group_censored = censored["group"].to_numpy()

In [12]:
with pm.Model() as m:
    beta0 = pm.Normal("beta0", 0, tau=0.01)
    beta1 = pm.Normal("beta1", 0, tau=0.01)
    
    lam_censored = exp(beta0 + beta1 * group_censored)
    lam_uncensored = exp(beta0 + beta1 * group_uncensored)

    impute_censored = pm.Bound(
        "impute_censored",
        pm.Exponential.dist(lam_censored),
        lower=time_censored,
        shape=time_censored.shape[0],
    )

    pm.Exponential(
        "likelihood",
        lam_uncensored,
        observed=time_uncensored,
        shape=time_uncensored.shape[0],
    )

    mu_0 = pm.Deterministic("mu_0", exp(-beta0))
    mu_1 = pm.Deterministic("mu_1", exp(-beta0 - beta1))

    pm.Deterministic("mu_diff", mu_1 - mu_0)
    pm.Deterministic("H", switch(gt(mu_1, mu_0), 1, 0))

    
    trace = pm.sample(10000, tune=2000, init="auto", target_accept=0.9)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [beta0, beta1, impute_censored]


Sampling 4 chains for 2_000 tune and 10_000 draw iterations (8_000 + 40_000 draws total) took 9 seconds.


In [14]:
az.summary(trace, hdi_prob=.95, var_names="~impute_censored")

Unnamed: 0,mean,sd,hdi_2.5%,hdi_97.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
beta0,-3.278,0.187,-3.649,-2.921,0.001,0.001,36968.0,30171.0,1.0
beta1,-0.542,0.305,-1.152,0.037,0.002,0.001,32866.0,32286.0,1.0
mu_0,27.005,5.168,17.769,37.281,0.028,0.02,36968.0,30171.0,1.0
mu_1,46.986,11.772,26.574,70.049,0.061,0.044,39098.0,34080.0,1.0
mu_diff,19.981,12.888,-3.651,46.017,0.071,0.051,33385.0,33097.0,1.0
H,0.964,0.185,1.0,1.0,0.001,0.001,33073.0,40000.0,1.0


So the posterior probability of our hypothesis H: $\mu_1 \gt \mu_0 = .965$, but the 95% credible set for $\mu_1 - \mu_0$ contains 0. Seems there is a lot of variance in the size of the differences, making it so they could plausibly be 0, so this study is inconclusive.