In [1]:
import arviz as az
import numpy as np
import pandas as pd
import pymc as pm
import pytensor.tensor as pt

In [2]:
data = pd.read_csv("tests/data/inhaler.csv")
data.head()

Unnamed: 0,subject,rating,treat,period,carry
0,1,1,0.5,0.5,0
1,2,1,0.5,0.5,0
2,3,1,0.5,0.5,0
3,4,1,0.5,0.5,0
4,5,1,0.5,0.5,0


In [3]:
with pm.Model() as model:
    threshold = pm.Normal(
        "threshold", 
        mu=(-1, 0, 1),
        transform=pm.distributions.transforms.univariate_ordered
    )

    treat = pm.Normal("treat")
    period = pm.Normal("period")
    carry = pm.Normal("carry")

    eta = (
        treat * data["treat"].to_numpy() 
        + period * data["period"].to_numpy() 
        + carry * data["carry"].to_numpy() 
    )

    # like brms
    eta_shifted = threshold - pt.shape_padright(eta)
    probabilities = pm.math.sigmoid(eta_shifted.T).T

    ps = pt.concatenate(
        [
            pt.shape_padright(probabilities[..., 0]), 
            probabilities[..., 1:] - probabilities[..., :-1], 
            pt.shape_padright(1 - probabilities[..., -1])
        ], 
        axis=-1
    )

    pm.Categorical("response", p=ps, observed=(data["rating"] - 1).to_numpy())
    idata = pm.sample()

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [threshold, treat, period, carry]


Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 9 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics


In [4]:
az.summary(idata, var_names=["threshold", "treat", "period", "carry"])

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
threshold[0],0.506,0.091,0.332,0.676,0.002,0.001,2701.0,1678.0,1.01
threshold[1],3.003,0.184,2.671,3.366,0.004,0.003,2169.0,1383.0,1.0
threshold[2],4.144,0.299,3.599,4.718,0.006,0.004,2528.0,1651.0,1.0
treat,-0.739,0.244,-1.182,-0.276,0.005,0.004,2038.0,1694.0,1.0
period,0.174,0.172,-0.145,0.488,0.003,0.003,2696.0,1726.0,1.0
carry,-0.23,0.178,-0.548,0.108,0.004,0.003,2045.0,1729.0,1.0


In [21]:
data = pd.read_csv("stemcell.csv")

In [22]:
data.head()

Unnamed: 0,belief,rating,gender
0,fundamentalist,1,female
1,fundamentalist,1,female
2,fundamentalist,1,female
3,fundamentalist,1,female
4,fundamentalist,1,female


In [28]:
data["rating"] = np.abs(data["rating"] - 5)
data["belief"] = pd.Categorical(data["belief"], categories=["moderate", "fundamentalist", "liberal"], ordered=True)

In [25]:
np.unique(data["rating"])

array([1, 2, 3, 4])

In [8]:
data.head()

Unnamed: 0,belief,rating,gender
0,fundamentalist,4,female
1,fundamentalist,4,female
2,fundamentalist,4,female
3,fundamentalist,4,female
4,fundamentalist,4,female


In [32]:
belief_idx = data["belief"].cat.codes.to_numpy()
beliefs = ["moderate", "fundamentalist", "liberal"]

coords = {"beliefs": beliefs[1:]}
with pm.Model(coords=coords) as model:
    threshold = pm.Normal(
        "threshold", 
        mu=(-1, 0, 1),
        transform=pm.distributions.transforms.univariate_ordered
    )

    belief = pm.Normal("belief", dims="beliefs")
    belief = pt.concatenate([np.zeros(1), belief])

    eta = belief[belief_idx]

    # like brms
    eta_shifted = threshold - pt.shape_padright(eta)
    probabilities = pm.math.sigmoid(eta_shifted.T).T

    ps = pt.concatenate(
        [
            pt.shape_padright(probabilities[..., 0]), 
            probabilities[..., 1:] - probabilities[..., :-1], 
            pt.shape_padright(1 - probabilities[..., -1])
        ], 
        axis=-1
    )

    pm.Categorical("response", p=ps, observed=(data["rating"] - 1).to_numpy())
    idata = pm.sample()

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [threshold, belief]


Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 14 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics


In [33]:
az.summary(idata)

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
belief[fundamentalist],-0.387,0.154,-0.673,-0.088,0.004,0.003,1228.0,1160.0,1.0
belief[liberal],0.586,0.156,0.297,0.867,0.004,0.003,1498.0,1644.0,1.0
threshold[0],-2.124,0.143,-2.374,-1.833,0.004,0.003,1122.0,1278.0,1.0
threshold[1],-1.013,0.121,-1.246,-0.788,0.003,0.002,1225.0,1284.0,1.0
threshold[2],0.967,0.118,0.747,1.186,0.003,0.002,1355.0,1489.0,1.0


It matches what you get with brms if you do

```r
brm(
  formula = rating ~ 1 + belief,
  data = stemcell,
  family = cumulative("logit")
)
```

In [54]:
belief_idx = data["belief"].cat.codes.to_numpy()
beliefs = ["moderate", "fundamentalist", "liberal"]

coords = {"beliefs": beliefs[1:], "threshold_dim": [0, 1, 2]}

with pm.Model(coords=coords) as model:

    threshold = pm.Normal(
        "threshold", 
        mu=(-1, 0, 1),
        transform=pm.distributions.transforms.univariate_ordered,
        dims="threshold_dim"
    )

    belief = pm.Normal("belief", dims=("beliefs", "threshold_dim"))
    belief = pt.concatenate([pt.shape_padleft(pt.zeros_like(belief[0, ...])), belief])

    eta = belief[belief_idx]

    # # like brms
    eta_shifted = threshold - eta
    probabilities = pm.math.sigmoid(eta_shifted.T).T

    ps = pt.concatenate(
        [
            pt.shape_padright(probabilities[..., 0]), 
            probabilities[..., 1:] - probabilities[..., :-1], 
            pt.shape_padright(1 - probabilities[..., -1])
        ], 
        axis=-1
    )

    pm.Categorical("response", p=ps, observed=(data["rating"] - 1).to_numpy())
    idata = pm.sample()

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [threshold, belief]


In [55]:
az.summary(idata)

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
"belief[fundamentalist, 0]",-0.512,0.245,-0.97,-0.068,0.007,0.005,1173.0,1179.0,1.0
"belief[fundamentalist, 1]",-0.471,0.181,-0.787,-0.107,0.005,0.004,1246.0,1170.0,1.01
"belief[fundamentalist, 2]",-0.281,0.195,-0.636,0.088,0.005,0.004,1397.0,1292.0,1.0
"belief[liberal, 0]",0.173,0.286,-0.397,0.679,0.009,0.006,1132.0,1096.0,1.0
"belief[liberal, 1]",0.358,0.199,-0.013,0.719,0.005,0.004,1374.0,1298.0,1.0
"belief[liberal, 2]",0.735,0.176,0.419,1.065,0.005,0.004,1227.0,1424.0,1.0
threshold[0],-2.294,0.194,-2.663,-1.936,0.006,0.004,1021.0,881.0,1.0
threshold[1],-1.107,0.132,-1.382,-0.885,0.004,0.003,1383.0,1454.0,1.0
threshold[2],1.067,0.132,0.831,1.327,0.004,0.003,1268.0,1529.0,1.0


It matches what you get with brms if you do

```r
brm(
  formula = rating ~ 1 + cs(belief),
  data = stemcell,
  family = cumulative("logit")
)
```

# Ideas

* Have families where "threshold" is a parameter
* This would work as a distributional model
* We need can enforce **not to have** an intercept the main formula, BUT we add it later only to reduce the predictors

```python
formula = bmb.Formula(
    "rating ~ 0 + belief",
    "threshold ~ 1" # this is implicit
)
```

You are not able to add the covariate in both places at the same time (because of identifiability reasons)

```python
formula = bmb.Formula(
    "rating ~ 0", # the linear predictor will be equal to 0
    "threshold ~ 1 + belief" # this is implicit
)
```

**Question** Does it make sense to have predictors for the thresholds? Do we want to interpret the effect they have on the different thresholds, or the effect they have on the linear predictor?

In [None]:
from formulae import design_matrices
design_matrices("rating ~ 0", data) # works, returns no common group

* cumulative
* sratio
* acat

In [None]:
def compute_cumulative_p(p, threhold):
    # cumulative model
    # P(Y = k) = F(tau_k - eta) - F(tau_{k - 1} - eta)
    ...

def compute_sratio_p():
    # sequential model
    # P(Y = k) = F(tau_k - eta) * prod_{j=1}^{k-1}{(1 - F(tau_j - eta))}
    ...

def compute_acat_p():
    # adjacent category model
    ...

Why don't we get started without category specific behaviors? we could get implemented "easily"