Allison Aprile

ISYE 6420

April 21, 2024

# Analysis
Reliability (time-to-collision) analysis of distance sensors using PyMC.

In [1]:
import pandas as pd
import numpy as np
import pymc as pm
import arviz as az

## Data

### Training Data

In [2]:
# Read data
ir_data = pd.read_csv('data/ir_simulations.csv')
sonar_data = pd.read_csv('data/sonar_simulations.csv')

data = pd.concat([ir_data, sonar_data]).reset_index(drop=True)
data

Unnamed: 0,trial,sensor,speed,x_init,y_init,theta_init,steps,stalled,censored
0,0,ir,8.0,23.033265,17.00646,5.277854,47,1,0
1,1,ir,5.0,16.075157,19.008258,3.468616,2,1,0
2,2,ir,7.0,13.024308,4.035999,1.675125,29,1,0
3,3,ir,6.0,12.090201,11.070986,2.919489,1,1,0
4,4,ir,6.0,11.066454,21.000817,4.737046,154,1,0
5,5,ir,5.0,24.073132,8.014609,1.815635,6,1,0
6,6,ir,7.0,12.089708,7.060024,5.985986,2,1,0
7,7,ir,8.0,6.031362,10.027173,0.412167,3,1,0
8,8,ir,8.0,13.025803,10.085996,1.144672,26,1,0
9,9,ir,5.0,19.072338,10.014212,3.26481,3,1,0


In [3]:
# Unpack into arrays
sensor = data['sensor'].map({'ir': 0 , 'sonar': 1}).to_numpy()
speed = data['speed'].to_numpy()
x_init = data['x_init'].to_numpy()
y_init = data['y_init'].to_numpy()
theta_init = data['theta_init'].to_numpy()
steps = data['steps'].to_numpy()

# Data array
X = np.stack([sensor, speed, x_init, y_init, theta_init], axis=1)
y = steps.copy()

print(f'X dimensions: {X.shape}')
print(f'y dimensions: {y.shape}')

X dimensions: (30, 5)
y dimensions: (30,)


In [4]:
# Prepare right-censored data for modelling
censored = data['censored'].to_numpy()
censored = np.where(censored == 1, steps, np.inf)
censored

array([  inf,   inf,   inf,   inf,   inf,   inf,   inf,   inf,   inf,
         inf,   inf,   inf,   inf,   inf,   inf,  629.,   inf,   inf,
         inf,   inf,   inf,   inf,   inf,  317.,   inf, 1000.,   inf,
         inf,  652.,   inf])

### Testing

In [5]:
# Read data
test_data = pd.read_csv('data/test_data.csv')
test_data

Unnamed: 0,sensor,speed,x_init,y_init,theta_init,steps,stalled,censored
0,ir,6.0,23.03673,5.031078,4.300983,17,1,0
1,sonar,6.0,23.03673,5.031078,4.300983,75,1,0


In [6]:
# Unpack into arrays
sensor_test = test_data['sensor'].map({'ir': 0 , 'sonar': 1}).to_numpy()
speed_test = test_data['speed'].to_numpy()
x_init_test = test_data['x_init'].to_numpy()
y_init_test = test_data['y_init'].to_numpy()
theta_init_test = test_data['theta_init'].to_numpy()
steps_test = test_data['steps'].to_numpy()

# Data array
X_test = np.stack([sensor_test, speed_test, x_init_test, y_init_test, theta_init_test], axis=1)
y_test = steps_test.copy()

print(f'X_test dimensions: {X_test.shape}')
print(f'y_test dimensions: {y_test.shape}')

X_test dimensions: (2, 5)
y_test dimensions: (2,)


## Time-to-Collision Modelling

### Single Variate (`sensor`)

In [7]:
with pm.Model() as model_1:
    # Non-informative priors
    alpha = pm.Exponential('alpha', 1)
    beta0 = pm.Normal('beta0', 0, 10)
    beta1 = pm.Normal('beta1', 0, 10)

    # Beta formulation (with natural logarithm link function)
    lam = pm.math.exp(beta0 + beta1 * sensor)
    beta = lam ** (-1 / alpha)

    # Likelihood (accounting for right-censored data)
    obs_latent = pm.Weibull.dist(alpha=alpha, beta=beta)
    pm.Censored('likelihood', obs_latent, lower=None, upper=censored, observed=steps)

    # Median (less affected by outliers)
    ir_median = pm.Deterministic(
        'ir_median',
        (np.log(2) * (pm.math.exp(beta0 + beta1 * 0)) ** (-1 / alpha)),
    )
    sonar_median = pm.Deterministic(
        'sonar_median',
        (np.log(2) * (pm.math.exp(beta0 + beta1 * 1)) ** (-1 / alpha)),
    )

    # Sample from posterior
    model_1_trace = pm.sample(3000, target_accept=0.90)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [alpha, beta0, beta1]


Sampling 4 chains for 1_000 tune and 3_000 draw iterations (4_000 + 12_000 draws total) took 3 seconds.


In [8]:
# Statistics and diagnostic summary
az.summary(model_1_trace, hdi_prob=0.95)

Unnamed: 0,mean,sd,hdi_2.5%,hdi_97.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
beta0,-2.358,0.506,-3.337,-1.342,0.007,0.005,5179.0,4920.0,1.0
beta1,-1.543,0.441,-2.398,-0.68,0.006,0.004,5497.0,5311.0,1.0
alpha,0.665,0.102,0.47,0.868,0.001,0.001,4908.0,5052.0,1.0
ir_median,25.947,12.063,8.512,49.679,0.151,0.107,7113.0,7051.0,1.0
sonar_median,276.952,160.226,72.22,575.808,1.873,1.372,8140.0,8440.0,1.0


### Multivariate

In [9]:
# Data preprocessing
## Separate IR and ultrasonic datapoints
X_ir = X[X[:, 0] == 0]
X_sonar = X[X[:, 0] == 1]
X_test_ir = X_test[X_test[:, 0] == 0]
X_test_sonar = X_test[X_test[:, 0] == 1]

## Prepend data for dot production computation
X_aug = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
X_ir_aug = np.concatenate((np.ones((X_ir.shape[0], 1)), X_ir), axis=1)
X_sonar_aug = np.concatenate((np.ones((X_sonar.shape[0], 1)), X_sonar), axis=1)
X_test_ir_aug = np.concatenate((np.ones((X_test_ir.shape[0], 1)), X_test_ir), axis=1)
X_test_sonar_aug = np.concatenate((np.ones((X_test_sonar.shape[0], 1)), X_test_sonar), axis=1)

In [10]:
with pm.Model() as model_2:
    # Non-informative priors
    alpha = pm.Exponential('alpha', 1)
    betas = pm.Normal("beta", 0, 10, shape=X_aug.shape[1])
    
    # Beta formulation (with natural logarithm link function)
    lam = pm.math.exp(pm.math.dot(X_aug, betas))
    beta = lam ** (-1 / alpha)

    # Likelihood (accounting for right-censored data)
    obs_latent = pm.Weibull.dist(alpha=alpha, beta=beta)
    pm.Censored('likelihood', obs_latent, lower=None, upper=censored, observed=y)

    # Median (less affected by outliers)
    ir_median = pm.Deterministic(
        'ir_median',
        (np.log(2) * (pm.math.exp(pm.math.dot(X_ir_aug, betas))) ** (-1 / alpha)),
    )
    sonar_median = pm.Deterministic(
        'sonar_median',
        (np.log(2) * (pm.math.exp(pm.math.dot(X_sonar_aug, betas))) ** (-1 / alpha)),
    )

    # Test median
    test_ir_median = pm.Deterministic(
        'test_ir_median',
        (np.log(2) * (pm.math.exp(pm.math.dot(X_test_ir_aug, betas))) ** (-1 / alpha)),
    )
    test_sonar_median = pm.Deterministic(
        'test_sonar_median',
        (np.log(2) * (pm.math.exp(pm.math.dot(X_test_sonar_aug, betas))) ** (-1 / alpha)),
    )

    # Sample from posterior
    model_2_trace = pm.sample(3000, target_accept=0.95)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [alpha, beta]


Sampling 4 chains for 1_000 tune and 3_000 draw iterations (4_000 + 12_000 draws total) took 9 seconds.


In [11]:
# Statistics and diagnostic summary
az.summary(model_2_trace, hdi_prob=0.95)

Unnamed: 0,mean,sd,hdi_2.5%,hdi_97.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
beta[0],-4.075,1.57,-7.202,-1.027,0.022,0.016,5174.0,5894.0,1.0
beta[1],-1.501,0.501,-2.487,-0.525,0.006,0.004,7109.0,7441.0,1.0
beta[2],0.151,0.152,-0.146,0.448,0.002,0.001,6664.0,7183.0,1.0
beta[3],0.005,0.035,-0.063,0.071,0.0,0.0,6736.0,7007.0,1.0
beta[4],0.039,0.035,-0.029,0.108,0.0,0.0,7247.0,7412.0,1.0
beta[5],-0.036,0.129,-0.283,0.216,0.002,0.001,6374.0,7861.0,1.0
alpha,0.713,0.112,0.508,0.941,0.001,0.001,7998.0,7717.0,1.0
ir_median[0],23.659,29.064,1.278,67.596,0.368,0.26,7405.0,7427.0,1.0
ir_median[1],34.399,30.489,1.995,86.968,0.385,0.272,6419.0,7018.0,1.0
ir_median[2],49.599,47.801,3.385,127.402,0.503,0.356,10046.0,9493.0,1.0


## Comparison

In [12]:
print('Model 1 (sensor)')
az.summary(model_1_trace, hdi_prob=0.95, kind='stats', var_names=['ir_median', 'sonar_median'])

Model 1 (sensor)


Unnamed: 0,mean,sd,hdi_2.5%,hdi_97.5%
ir_median,25.947,12.063,8.512,49.679
sonar_median,276.952,160.226,72.22,575.808


In [13]:
print('Model 2 (all variates)')
az.summary(model_2_trace, hdi_prob=0.95, kind='stats', var_names=['ir_median', 'sonar_median'])

Model 2 (all variates)


Unnamed: 0,mean,sd,hdi_2.5%,hdi_97.5%
ir_median[0],23.659,29.064,1.278,67.596
ir_median[1],34.399,30.489,1.995,86.968
ir_median[2],49.599,47.801,3.385,127.402
ir_median[3],38.318,22.452,8.386,81.24
ir_median[4],24.781,15.725,3.328,53.209
ir_median[5],60.018,66.128,2.614,163.779
ir_median[6],50.055,43.798,5.099,123.077
ir_median[7],33.543,46.47,0.803,99.759
ir_median[8],28.53,30.758,2.084,75.562
ir_median[9],51.256,36.866,6.219,116.523


In [14]:
print('Model 2 (all variates) TEST')
az.summary(model_2_trace, hdi_prob=0.95, kind='stats', var_names=['test_ir_median', 'test_sonar_median'])

Model 2 (all variates) TEST


Unnamed: 0,mean,sd,hdi_2.5%,hdi_97.5%
test_ir_median[0],58.945,50.67,6.809,146.957
test_sonar_median[0],552.45,699.693,41.185,1503.705
