In [5]:
import pymc as pm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pytensor.tensor as pt
import arviz as az

In [6]:
t = np.array([1, 2, 4, 7, 12, 21, 35, 59, 99, 200])
nt = len(t)
ns = 4
# it might be useful to have tmat and k in the same shape, thus there happens some reshaping
tmat = np.repeat(t, ns).reshape(nt, -1).T
k = np.ma.masked_values([18, 18, 16, 13, 9, 6, 4, 4, 4, -999,
                          17, 13,  9,  6, 4, 4, 4, 4, 4, -999,
                          14, 10,  6,  4, 4, 4, 4, 4, 4, -999,
                          -999, -999, -999, -999, -999, -999, -999, -999, -999, -999], 
                          value=-999).reshape(ns,-1)
n = 18

In [7]:
"""
This function takes the result of `pm.sample_posterior_predictive` as input
and plots the comparison between the posterior predictive and the human behavior
"""
def plot_predict(ppc):
    predict_trace = ppc["posterior_predictive"]["kij"]
    pt_values = predict_trace.values.reshape(-1, 4, 10)
    _, axes = plt.subplots(2, 2, figsize=(6,6))
    axes = axes.flatten()

    for s in range(ns):
        ax = axes[s]
        for ts in range(nt):
            sizes, positions = np.histogram(pt_values[:, s, ts], bins=np.arange(0, n+1), density=True)
            ax.scatter([ts] * len(sizes), positions[1:], s=sizes*100, marker="s", c="k")
        ax.plot(k[s, :], c="r")
    
        ax.set(
            xlabel="Retention Time Span",
            ylabel="Remembered Items",
            xticks=range(len(t)),
            xticklabels=t
        )
    
    plt.tight_layout()

### 3.1 Simple Model

In [8]:
simple_model = pm.Model()

with simple_model:
    
    # prior
    alpha = pm.Beta('alpha', alpha=1, beta=1, )
    beta = pm.Beta('beta', alpha=1, beta=1, )

    # parameter transformation // determine theta
    theta_t = pm.Deterministic('theta_t', pm.math.clip(pm.math.exp(-alpha * tmat) + beta, 0, 1))

    # likelihood
    k_observed = pm.Binomial('k_observed', n=n, p=theta_t, observed=k)

    # sampling: we have set some initializiations that might help
    trace1 = pm.sample(2000, tune=2000, target_accept=0.99, initvals={'alpha': 0.5, 'beta': 0.25})


  variables = ufunc(*ufunc_args, **ufunc_kwargs)
  variables = ufunc(*ufunc_args, **ufunc_kwargs)
  variables = ufunc(*ufunc_args, **ufunc_kwargs)
  variables = ufunc(*ufunc_args, **ufunc_kwargs)
  variables = ufunc(*ufunc_args, **ufunc_kwargs)


SamplingError: Initial evaluation of model at starting point failed!
Starting values:
{'alpha_logodds__': array(0.), 'beta_logodds__': array(-1.09861229), 'k_observed_unobserved': array([ 4,  4,  4, 15, 11,  7,  5,  5,  5,  5,  5,  4,  4], dtype=int64)}

Logp initial evaluation results:
{'alpha': -inf, 'beta': -1.58, 'k_observed_unobserved': -20.53, 'k_observed_observed': -72.81}
You can call `model.debug()` for more details.

In [None]:
# Plot the posterior for alpha, beta and all theta's
az.plot_trace(trace1, var_names=['alpha', 'beta'])
plt.show()

In [None]:
# summarize the posterior distribution
summary = az.summary(trace1, var_names=['alpha', 'beta'])
print(summary)

In [None]:
az.plot_trace(trace1, var_names=['theta_t'], compact=True)
plt.show()

In [None]:
# Posterior Predictive Sampling
# with simple_model:
    # ppc = pm.sample_posterior_predictive(trace1)

# Formatting ppc for plot_predict function
# formatted_ppc = {"posterior_predictive": {"kij": ppc["k_observed"]}}

# Plotting
plot_predict(pm.sample_posterior_predictive(trace1,simple_model))

# Explanation and Analysis:

    Priors:
        We use normal distributions for αα and ββ centered around 0.5 and 0.25, respectively, with standard deviations of 0.5 and 0.25.

    Parameter Transformation:
        θtθt​ is calculated using the given formula, with values clipped to be between 0 and 1.

    Likelihood:
        The likelihood of observing the data kk is modeled using a Binomial distribution, where the probability of success is given by θtθt​.

    Sampling:
        We run the MCMC sampler to obtain the posterior distributions for the parameters.

    Posterior Predictive Checks:
        Posterior predictive samples are generated and compared to the observed data to assess the model's performance.

    Plotting and Interpretation:
        The plot_predict function visualizes the comparison between the posterior predictive samples and the actual observed data, helping us understand how well the model predicts and generalizes.

# Results Interpretation:

    The posterior distributions of αα and ββ provide insights into the decay rate and baseline memory retention.
    The theta_t distributions help visualize the expected probability of remembering items at different time points.
    The posterior predictive checks allow us to see how well the model's predictions align with the observed data, including both prediction (missing values at time point 200) and generalization (missing values for subject 4).

Overall, this implementation provides a comprehensive analysis of the memory retention model using PyMC.

### 3.2 Individual Differences

In [None]:
individual_model = pm.Model()

with individual_model:
    # Priors
    alpha = pm.Normal('alpha', mu=0.5, sigma=0.5, shape=ns)
    beta = pm.Normal('beta', mu=0.25, sigma=0.25, shape=ns)

    # Parameter transformation: determine theta
    theta_t = pm.Deterministic('theta_t', pm.math.clip(pm.math.exp(-alpha[:, None] * tmat) + beta[:, None], 0, 1))

    # Likelihood
    k_observed = pm.Binomial('k_observed', n=n, p=theta_t, observed=k)
    
    # Sampling with correct initial values shape
    trace2 = pm.sample(1000, target_accept=0.99, 
                       initvals={'alpha': np.array([0.5] * 4), 'beta': np.array([0.25] * 4)})


In [None]:
# Plot the posterior for alpha, beta and all theta's
az.plot_trace(trace2, var_names=['alpha', 'beta'])
plt.show()

In [None]:
# summarize the posterior distribution
summary = az.summary(trace2, var_names=['alpha', 'beta'])
print(summary)

In [None]:
az.plot_trace(trace2, var_names=['theta_t'], compact=True)
plt.show()

In [None]:
# Compare posterior predictive checks with human behavior
with individual_model:
    ppc = pm.sample_posterior_predictive(trace2, var_names=["k_observed"])

# Rename the posterior predictive samples to match what the plot_predict function expects
ppc = {"posterior_predictive": {"kij": ppc["k_observed"]}}

plot_predict(ppc)
plt.show()

# Advantages and Disadvantages of the Individual Differences Model
## Advantages:

    Captures Individual Variability: This model allows for individual differences in memory retention, making it more realistic and capable of capturing subject-specific nuances.
    Better Fit: By accounting for individual differences, the model is likely to fit the data better and provide more accurate predictions for each subject.

## Disadvantages:

    Increased Complexity: The model is more complex, requiring more parameters and possibly more computational resources for fitting.
    Overfitting Risk: With more parameters, there's a higher risk of overfitting, especially if the sample size is small relative to the number of parameters.

## Changes in Inferences and Posterior Predictive Checks

    More Accurate Predictions: The posterior predictive checks are expected to be more accurate as the model now captures individual differences.
    Different Parameter Estimates: The individual αα and ββ estimates for each subject may provide insights into how memory retention varies between subjects.
    Improved Generalization: By modeling individual differences, the model may generalize better to new subjects or new data points.

### 3.3 Hierarchical Model

In [None]:
hierarchical_model = pm.Model()

with hierarchical_model:
    # Hyperpriors
    mu_alpha = pm.Normal('mu_alpha', mu=0.5, sigma=0.5)
    sigma_alpha = pm.HalfNormal('sigma_alpha', sigma=0.5)
    
    mu_beta = pm.Normal('mu_beta', mu=0.25, sigma=0.25)
    sigma_beta = pm.HalfNormal('sigma_beta', sigma=0.25)
    
    # Priors for each subject
    alpha = pm.Normal('alpha', mu=mu_alpha, sigma=sigma_alpha, shape=ns)
    beta = pm.Normal('beta', mu=mu_beta, sigma=sigma_beta, shape=ns)
    
    # Parameter transformation: calculate theta
    theta_t = pm.Deterministic('theta_t', pm.math.clip(pm.math.exp(-alpha[:, None] * tmat) + beta[:, None], 0, 1))
    
    # Likelihood
    k_observed = pm.Binomial('k_observed', n=n, p=theta_t, observed=k)
    
    # Sampling with correct initial values shape
    trace3 = pm.sample(1000, target_accept=0.99, 
                       initvals={'alpha': np.array([0.5] * 4), 'beta': np.array([0.25] * 4)})


In [None]:
# Plot the posterior for alpha, beta and all theta's
az.plot_trace(trace3, var_names=['alpha', 'beta'])
plt.show()

In [None]:
# summarize the posterior distribution
summary = az.summary(trace3, var_names=['alpha', 'beta'])
print(summary)

In [None]:
az.plot_trace(trace3, var_names=['theta_t'], compact=True)
plt.show()

In [None]:
# Compare posterior predictive checks with human behavior
with hierarchical_model:
    ppc = pm.sample_posterior_predictive(trace3, var_names=["k_observed"])

# Rename the posterior predictive samples to match what the plot_predict function expects
ppc = {"posterior_predictive": {"kij": ppc["k_observed"]}}

plot_predict(ppc)
plt.show()

# Advantages and Disadvantages of the Hierarchical Model
## Advantages:

    Better Handling of Individual Differences: The hierarchical model allows for individual differences while also borrowing strength across subjects through the hyper-priors.
    Regularization: By estimating hyper-priors, the model introduces regularization, potentially preventing overfitting by constraining individual parameter estimates.
    Improved Generalization: This model can generalize better to new subjects as it captures population-level trends through the hyper-priors.

## Disadvantages:

    Increased Complexity: The hierarchical model is more complex, requiring more computational resources and possibly longer convergence times.
    Difficult to Specify Hyper-priors: Choosing appropriate hyper-priors can be challenging and may require domain knowledge or additional data.

## Changes in Inferences and Posterior Predictive Checks

    More Accurate Posterior Estimates: The posterior estimates for αα and ββ should be more accurate as the model now accounts for both individual differences and population-level trends.
    Improved Predictive Performance: The posterior predictive checks are expected to improve as the model leverages both individual and group-level information.
    Insights into Population Trends: The hyper-priors provide insights into the overall population trends, which can be useful for understanding general memory retention behavior.

This hierarchical approach offers a balance between capturing individual differences and leveraging population-level information, leading to potentially more robust and generalizable inferences.