In [1]:
import pandas as pd
import numpy as np
import pymc as pm
import arviz as az

## Data

In [2]:
# Read data
df = pd.read_csv('data/ir_simulations.csv')
df2 = pd.read_csv('data/sonar_simulations.csv')

data = pd.concat([df, df2]).reset_index(drop=True)
data

Unnamed: 0,trial,sensor,speed,x_init,y_init,theta_init,steps,stalled,censored
0,0,ir,8.0,23.033265,17.00646,5.277854,47,1,0
1,1,ir,5.0,16.075157,19.008258,3.468616,2,1,0
2,2,ir,7.0,13.024308,4.035999,1.675125,29,1,0
3,3,ir,6.0,12.090201,11.070986,2.919489,1,1,0
4,4,ir,6.0,11.066454,21.000817,4.737046,154,1,0
5,5,ir,5.0,24.073132,8.014609,1.815635,6,1,0
6,6,ir,7.0,12.089708,7.060024,5.985986,2,1,0
7,7,ir,8.0,6.031362,10.027173,0.412167,3,1,0
8,8,ir,8.0,13.025803,10.085996,1.144672,26,1,0
9,9,ir,5.0,19.072338,10.014212,3.26481,3,1,0


In [3]:
# Unpack into arrays
sensor = data['sensor'].map({'ir': 0 , 'sonar': 1}).to_numpy()
speed = data['speed'].to_numpy()
x_init = data['x_init'].to_numpy()
y_init = data['y_init'].to_numpy()
theta_init = data['theta_init'].to_numpy()
steps = data['steps'].to_numpy()

censored = data['censored'].to_numpy()
censored = np.where(censored == 1, steps, np.inf)
                    

In [4]:
censored

array([  inf,   inf,   inf,   inf,   inf,   inf,   inf,   inf,   inf,
         inf,   inf,   inf,   inf,   inf,   inf,  629.,   inf,   inf,
         inf,   inf,   inf,   inf,   inf,  317.,   inf, 1000.,   inf,
         inf,  652.,   inf])

In [5]:
X = np.stack([sensor, speed, x_init, y_init, theta_init], axis=1)
y = steps.copy()

print(X.shape, y.shape)

(30, 5) (30,)


In [6]:
with pm.Model() as model:
    # Non-informative priors
    alpha = pm.Exponential('alpha', 1)
    beta0 = pm.Normal('beta0', 0, 10)
    beta1 = pm.Normal('beta1', 0, 10)

    # Beta formulation (with natural logarithm link function)
    lam = pm.math.exp(beta0 + beta1 * sensor)
    beta = lam ** (-1 / alpha)

    # Likelihood (accounting for right-censored data)
    obs_latent = pm.Weibull.dist(alpha=alpha, beta=beta)
    pm.Censored('likelihood', obs_latent, lower=None, upper=censored, observed=steps)

    # Median (less affected by outliers)
    median_0 = pm.Deterministic(
        'ir_median',
        (np.log(2) * (pm.math.exp(beta0 + beta1 * 0)) ** (-1 / alpha)),
    )
    median_1 = pm.Deterministic(
        'sonar_median',
        (np.log(2) * (pm.math.exp(beta0 + beta1 * 1)) ** (-1 / alpha)),
    )

    # Sample from posterior
    censored_trace = pm.sample(3000, target_accept=0.90)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [alpha, beta0, beta1]


Sampling 4 chains for 1_000 tune and 3_000 draw iterations (4_000 + 12_000 draws total) took 3 seconds.


In [7]:
# Statistics and diagnostic summary
az.summary(censored_trace, hdi_prob=0.95)

Unnamed: 0,mean,sd,hdi_2.5%,hdi_97.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
beta0,-2.345,0.5,-3.351,-1.418,0.007,0.005,4780.0,4694.0,1.0
beta1,-1.549,0.426,-2.381,-0.722,0.006,0.004,5847.0,5726.0,1.0
alpha,0.663,0.102,0.481,0.875,0.002,0.001,4309.0,4811.0,1.0
ir_median,25.593,11.522,7.591,47.534,0.136,0.098,7569.0,7823.0,1.0
sonar_median,279.576,161.732,79.413,595.024,1.752,1.258,9345.0,8991.0,1.0


## Multivariate Regression

In [8]:
X_ir = X[X[:, 0] == 0]
X_sonar = X[X[:, 0] == 1]

In [9]:
with pm.Model() as full_model:
    X_aug = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
    X_ir_aug = np.concatenate((np.ones((X_ir.shape[0], 1)), X_ir), axis=1)
    X_sonar_aug = np.concatenate((np.ones((X_sonar.shape[0], 1)), X_sonar), axis=1)

    # Non-informative priors
    alpha = pm.Exponential('alpha', 1)
    betas = pm.Normal("beta", 0, 10, shape=X_aug.shape[1])
    
    # Beta formulation (with natural logarithm link function)
    lam = pm.math.exp(pm.math.dot(X_aug, betas))
    beta = lam ** (-1 / alpha)


    # Likelihood (accounting for right-censored data)
    obs_latent = pm.Weibull.dist(alpha=alpha, beta=beta)
    pm.Censored('likelihood', obs_latent, lower=None, upper=censored, observed=y)

    # Median (less affected by outliers)
    ir_median = pm.Deterministic(
        'ir_median',
        (np.log(2) * (pm.math.exp(pm.math.dot(X_ir_aug, betas))) ** (-1 / alpha)),
    )
    sonar_median = pm.Deterministic(
        'sonar_median',
        (np.log(2) * (pm.math.exp(pm.math.dot(X_sonar_aug, betas))) ** (-1 / alpha)),
    )

    # Sample from posterior
    censored_trace2 = pm.sample(3000, target_accept=0.95)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [alpha, beta]


Sampling 4 chains for 1_000 tune and 3_000 draw iterations (4_000 + 12_000 draws total) took 9 seconds.


In [10]:
# Statistics and diagnostic summary
az.summary(censored_trace2, hdi_prob=0.95)

Unnamed: 0,mean,sd,hdi_2.5%,hdi_97.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
beta[0],-4.091,1.58,-7.169,-1.04,0.022,0.015,5326.0,6600.0,1.0
beta[1],-1.495,0.512,-2.48,-0.479,0.006,0.005,6742.0,7108.0,1.0
beta[2],0.152,0.154,-0.151,0.454,0.002,0.001,6492.0,7269.0,1.0
beta[3],0.006,0.035,-0.062,0.076,0.0,0.0,6959.0,6941.0,1.0
beta[4],0.039,0.036,-0.029,0.111,0.0,0.0,7313.0,7068.0,1.0
beta[5],-0.034,0.128,-0.284,0.216,0.001,0.001,7438.0,8239.0,1.0
alpha,0.712,0.114,0.489,0.939,0.001,0.001,7782.0,7295.0,1.0
ir_median[0],24.039,48.354,0.789,67.338,0.562,0.397,7492.0,6951.0,1.0
ir_median[1],35.087,31.472,2.666,91.261,0.398,0.281,6133.0,7484.0,1.0
ir_median[2],50.015,51.324,4.499,131.737,0.581,0.411,9721.0,8348.0,1.0
