In [36]:
import pandas as pd
import numpy as np
import pymc3 as pm
from pytensor.tensor import subtensor as st

In [29]:
q2_data = pd.read_csv("clouds.csv")
q2_data.loc[:, ["Season", "Seeded", "DIFF"]].head()

Unnamed: 0,Season,Seeded,DIFF
0,Autumn,S,0.45
1,Autumn,U,-0.397
2,Winter,S,-0.768
3,Winter,U,-1.035
4,Winter,S,-0.562


In [30]:
season = pd.Categorical(q2_data["Season"]).codes
seeded = pd.Categorical(q2_data["Seeded"]).codes
rain_diff = q2_data["DIFF"].to_numpy()

season

array([0, 0, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0, 0,
       0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1,
       1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3,
       3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0,
       0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1],
      dtype=int8)

In [31]:
seasons, _ = pd.factorize(q2_data["Season"])
seeded, _ = pd.factorize(q2_data["Seeded"])

np.unique(seeded)

array([0, 1])

In [32]:
coords = {
    "season_cats": np.unique(seasons),
    "seeded_cats": np.unique(seeded),
    "idx": q2_data["Seeded"].index,
}

coords

{'season_cats': array([0, 1, 2, 3]),
 'seeded_cats': array([0, 1]),
 'idx': RangeIndex(start=0, stop=108, step=1)}

In [34]:
def differences(var, index):
    """Calculate differences between levels with names like "alpha[low] - alpha[high]".

    var: pytensor.tensor.var.TensorVariable
    index: pandas.Index
    """
    name = var.name
    count = 0
    for i, j in combinations(range(index.size), 2):
        a, b = index[i], index[j]
        pm.Deterministic(f"{name}[{a}] - {name}[{b}]", var[i] - var[j])
        count += 1

# Model

In [37]:
with pm.Model(coords=coords) as m:
    season_data = pm.Data("season", seasons, dims="idx")
    seeded_data = pm.Data("seeded", seeded, dims="idx")
    diff_data = pm.Data("diff", rain_diff, dims="idx")

    mu0 = pm.Normal("mu0", 0, tau=0.01)
    _alpha = pm.Normal("_alpha", 0, tau=0.01, dims="season_cats")
    _beta = pm.Normal("_beta", 0, tau=0.01, dims="seeded_cats")

    sigma = pm.Exponential("sigma", 0.05)

    # stz constraints
    alpha = pm.Deterministic(
        "alpha", st.set_subtensor(_alpha[0], -pm.math.sum(_alpha[1:]))
    )
    beta = pm.Deterministic("beta", st.set_subtensor(_beta[0], -pm.math.sum(_beta[1:])))

    mu = mu0 + alpha[season_data] + beta[seeded_data]

    pm.Normal("likelihood", mu, sigma, observed=rain_diff)

    differences(alpha, coords["season_cats"])
    differences(beta, coords["seeded_cats"])

    trace = pm.sample(2000)

NotImplementedError: Cannot convert Subtensor{int64}.0 to a tensor variable.

In [None]:
az.summary(trace, var_names="~_", filter_vars="like", kind="stats", hdi_prob=0.95)