### Data

In [93]:
import pandas as pd
import numpy as np

data = pd.read_csv("./bml-component-data.csv")

# Scale time column
data["timestamp"] /= 10000
data.head()

Unnamed: 0,ID,index,M1,M2,M3,M4,M5,timestamp,efficiency
0,CX-00,0,0.585,1.788,1.44,1.342,1.029,0.0,97.644
1,CX-00,0,0.585,1.788,1.44,1.342,1.029,0.01,90.796
2,CX-00,0,0.585,1.788,1.44,1.342,1.029,0.03,80.454
3,CX-00,0,0.585,1.788,1.44,1.342,1.029,0.04,83.102
4,CX-00,0,0.585,1.788,1.44,1.342,1.029,0.06,81.03


In [94]:
print(data["ID"].unique())

['CX-00' 'CX-01' 'CX-02' 'CX-03' 'CX-04' 'CX-05' 'CX-06' 'CX-07' 'CX-08'
 'CX-09' 'CX-10' 'CX-11' 'CX-12' 'CX-13' 'CX-14' 'CX-15' 'CX-16' 'CX-17'
 'CX-18' 'CX-19' 'CX-20' 'CX-21' 'CX-22' 'CX-23' 'CX-24' 'CX-25' 'CX-26'
 'CX-27' 'CX-28' 'CX-29' 'CX-30' 'CX-31' 'CX-32' 'CX-33' 'CX-34' 'CX-35'
 'CX-36' 'CX-37' 'CX-38' 'CX-39' 'CX-40' 'CX-41' 'CX-42' 'CX-43' 'CX-44'
 'CX-45' 'CX-46' 'CX-47' 'CX-48' 'CX-49' 'CX-50' 'CX-51' 'CX-52' 'CX-53'
 'CX-54' 'CX-55' 'CX-56' 'CX-57' 'CX-58' 'CX-59' 'CX-60' 'CX-61' 'CX-62'
 'CX-63' 'CX-64' 'CX-65' 'CX-66' 'CX-67' 'CX-68' 'CX-69' 'CX-70' 'CX-71'
 'CX-72' 'CX-73' 'CX-74']


In [95]:
# Collect all values from each CX into one row and put timestamps and efficiencies into np arrays

data_grouped = data[["ID", "timestamp", "efficiency"]].groupby(["ID"]).agg(list)

data_grouped[["timestamp", "efficiency"]] = data_grouped[["timestamp", "efficiency"]].map(np.array)

# Each row is exactly one CX
x = list(data_grouped["timestamp"])
y = list(data_grouped["efficiency"])

### Baseline Model

Model the efficiency $y_i$ as a noisy exponential decay function $f_i$, with parameters $u_i$ and $v_i$:

$$y_i(t) = f_i(t) + \epsilon$$

$$f_i(t) = u_i \cdot \exp \left \{-v_i t \right \}$$

We omit the 10,000 divisor since we already divided the timestamp earlier.

In [None]:
import numpyro as npr
import numpyro.distributions as dist
import jax.numpy as jnp

# x: [75, a]
# y: [75, a]

def model(x, y):
    u_alpha = npr.sample("u_alpha", dist.Normal(90, 1))
    u_beta = npr.sample("u_beta", dist.Normal(10, 1))
    v_alpha = npr.sample("v_alpha", dist.Normal(5, 1))
    v_beta = npr.sample("v_beta", dist.Normal(5, 1))

    sigma = npr.sample("sigma", dist.Normal(0, 1))

    # Generate u_i, v_i, and noise variance for all CX components
    with npr.plate("cx-component", len(x)):
        u = npr.sample("u", dist.Normal(u_alpha, u_beta))
        v = npr.sample("v", dist.Normal(v_alpha, v_beta))

        # Get the maximum length of any CX component data
        max_obs = max([len(row) for row in x])

        with npr.plate("observations", max_obs):
            with npr.handlers.mask(??):
                f = npr.sample("f", u * dist.Exponential(-v * x))

                npr.sample("obs", dist.Normal(f, sigma), y)

In [97]:
from jax import random
from numpyro.infer import MCMC, NUTS


seed = 0
nuts_kernel = NUTS(model)
mcmc = MCMC(nuts_kernel, num_warmup=500, num_samples=1000)
rng_key = random.PRNGKey(seed)
mcmc.run(rng_key, x=x, y=y)
mcmc.print_summary()

TypeError: unsupported operand type(s) for *: 'ArrayImpl' and 'list'