In [1]:
import pymc3 as pm
import arviz as az
import numpy as np

# Data Extraction

In [2]:
data_array = np.loadtxt('iop.csv', delimiter=',')
# Extract the IOP data
low_iop_indicators = data_array[:, 0]

# Extract the cornea_thickness data
cornea_thickness = data_array[:, 1]

# Standardize the cornea_thickness data
cornea_thickness_std = (cornea_thickness - cornea_thickness.mean()) / (2 * cornea_thickness.std())

# Logit Model

In [4]:
with pm.Model() as m_logit:
    # Data
    x_data = pm.Data("x_data", cornea_thickness_std) # use standardized data
    y_data = pm.Data("y_data", low_iop_indicators)

    # Priors
    intercept = pm.Normal("intercept", mu=0, sigma=10)
    slope = pm.Normal("slope", mu=0, sigma=5)

    # Logistic function
    p = pm.math.invlogit(intercept + slope * x_data)

    # Likelihood
    pm.Bernoulli("y", p=p, observed=y_data)

    # Sample
    trace_logit = pm.sample(5000, idata_kwargs=dict(log_likelihood=True), return_inferencedata=False)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [slope, intercept]


  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
Sampling 4 chains for 1_000 tune and 5_000 draw iterations (4_000 + 20_000 draws total) took 6 seconds.


# Probit Model

In [6]:
with pm.Model() as m_probit:
    # Data
    x_data = pm.Data("x_data_probit", cornea_thickness_std) # use standardized data
    y_data = pm.Data("y_data_probit", low_iop_indicators)

    # Priors
    intercept = pm.Normal("intercept", mu=0, sigma=10)
    slope = pm.Normal("slope", mu=0, sigma=5)

    # Probit function
    p = pm.math.invprobit(intercept + slope * x_data)

    # Likelihood
    pm.Bernoulli("y", p=p, observed=y_data)

    # Sample
    trace_probit = pm.sample(5000, idata_kwargs=dict(log_likelihood=True), return_inferencedata=False)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
ERROR (theano.graph.opt): Optimization failure due to: local_grad_log_erfc_neg
ERROR (theano.graph.opt): node: Elemwise{true_div,no_inplace}(Elemwise{mul,no_inplace}.0, Elemwise{erfc,no_inplace}.0)
ERROR (theano.graph.opt): TRACEBACK:
ERROR (theano.graph.opt): Traceback (most recent call last):
  File "/Users/adamcuculich/georgia-tech/bayesian-stats/env/lib/python3.9/site-packages/theano/graph/opt.py", line 2017, in process_node
    replacements = lopt.transform(fgraph, node)
  File "/Users/adamcuculich/georgia-tech/bayesian-stats/env/lib/python3.9/site-packages/theano/graph/opt.py", line 1209, in transform
    return self.fn(*args, **kwargs)
  File "/Users/adamcuculich/georgia-tech/bayesian-stats/env/lib/python3.9/site-packages/theano/tensor/opt.py", line 7291, in local_grad_log_erfc_neg
    if not exp_in.owner.inputs[0].owner:
AttributeError: 'NoneType' object has no attribute 'owner'

Multiprocess sampling (

  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
Sampling 4 chains for 1_000 tune and 5_000 draw iterations (4_000 + 20_000 draws total) took 6 seconds.


# Deviance

In [10]:
# Compute deviance for logit model
with m_logit:
    deviance_logit = az.waic(trace_logit, scale="deviance")
    print(f"Deviance for logit model: {deviance_logit.waic}")

# Compute deviance for probit model
with m_probit:
    deviance_probit = az.waic(trace_probit, scale="deviance")
    print(f"Deviance for probit model: {deviance_probit.waic}")




Deviance for logit model: 136.5728992407411
Deviance for probit model: 136.00296371082013


# Conclusion

As shown above, the deviance for the probit model is slightly smaller than that of the logit model, thereby suggesting that the probit model has a slightly better fit.