# TO DO

Samplen auf Modell 1 erweitern

Modellindikatoren einbauen

# Offene Fragen

Richtig, dass wir hier nur 1 Cluster mit 20 Beobachtungen haben?

Inverse Gamma Distribution = 1/np.random.gamma oder über scipy.stats.invgamma ?

Jede Variable spezifizieren vs. args/kwarg: Wann hier in dem Code args/kwargs einsetzen? 

Docstring Konventionen - hier nach BayesFlow Package richten oder Numpy Docstring Standard?

In [160]:
import numpy as np
import math

Hierarchical Normal Example Model

$$
\begin{align}
\mu &\sim p(\mu | \mu_0, \tau_0)\\
\tau^2 &\sim p(\tau^2 | \alpha, \beta)\\
\theta_l &\sim p(\theta | \mu, \tau^2) \text{ for } l=1,...,L\\
x_n &\sim p(x | \theta_l, \sigma^2) \text{ for } n=1,...,N \text { (assume } \sigma^2 \text { to be known (=1))}\\
\end{align}
$$

$$
\begin{align}
&\text{Null Model }H_0 \text{: } \mu=0\\
&\text{Alternative Model }H_1 \text{: } \mu \text{ allowed to differ from 0} \\
\end{align}
$$

In [278]:
class Simulator:
    
    def __init__(self):    
        pass
    
    def draw_from_prior(self, model_index, n_clusters, mu0, tau20, alpha, beta):
        """
        Draws parameter values from the specified prior distributions of the 
        hyperprior and the conditional prior.
        ----------
        
        Arguments:        
        n_clusters : int -- number of higher order clusters that the observations are nested in
        mu0        : float -- higher order mean prior - mean
        tau20      : float -- higher order mean prior - variance
        alpha      : float -- higher order variance prior - shape parameter
        beta       : float -- higher order variance prior - rate parameter
        """
        
        if model_index == 0:
            mu = 0
        if model_index == 1:
            mu = np.random.normal(loc=mu0, scale=math.sqrt(tau20))            
        tau2 = 1/np.random.gamma(alpha, beta)
        theta = np.random.normal(loc=mu, scale=math.sqrt(tau2), size=n_clusters)
        sigma2 = 1
        return theta, sigma2
    
    def gen_from_likelihood(self, params, n_observations):
        """
        Generates a single hierarchical dataset from the sampled parameter values.
        ----------
        
        Arguments: 
        params         : list -- parameters sampled from prior 
        n_observations : int -- number of observations per cluster
        """
        
        theta, sigma2 = params
        X = np.random.normal(loc=theta, scale=math.sqrt(sigma2), size=(n_observations, theta.shape[0])).T
        return X
    
    def simulate(self, batch_size, n_models, n_clusters, n_observations, mu0, tau20, alpha, beta):
        """
        Simulates multiple hierarchical datasets.
        ----------
        
        Arguments:
        batch_size     : int -- number of batches to be generated
        n_models       : int -- number of models to be simulated from
        n_clusters     : int -- number of higher order clusters that the observations are nested in
        n_observations : int -- number of observations per cluster
        n_variables    : int -- number of variables in the simulated datasets 
        mu0            : float -- higher order mean prior - mean
        tau20          : float -- higher order mean prior - variance
        alpha          : float -- higher order variance prior - shape parameter
        beta           : float -- higher order variance prior - rate parameter
        --------
        
        Returns:
        numpy array of shape (batch_size, n_clusters, n_observations, n_variables) - contains the simulated hierarchical datasets
        """
        
        X = []
        for b in range(batch_size):
            for m_index in range(n_models):
                prior_sample = self.draw_from_prior(m_index, n_clusters, mu0, tau20, alpha, beta)
                x_gen = self.gen_from_likelihood(prior_sample, n_observations)
                X.append(x_gen)
        return np.array(X)[...,np.newaxis]

In [283]:
%%time

# prior specification

mu0 = 0
tau20 = 1
alpha = 1
beta = 1

# run simulator

sim = Simulator()
x_test = sim.simulate(batch_size=1, n_models = 2, n_clusters=1, n_observations=20, mu0=mu0, tau20=tau20, alpha=alpha, beta=beta)
print(x_test.shape)
print(x_test[0][0][0:5])
print(x_test[1][0][0:5])

(2, 1, 20, 1)
[[ 0.79278728]
 [ 1.17021609]
 [ 0.39257989]
 [ 0.64095352]
 [-0.60520439]]
[[-2.00956942]
 [-4.96192579]
 [-2.94106544]
 [-2.85043796]
 [-3.3011672 ]]
Wall time: 998 µs


In [284]:
# generate data from null model which will serve as 'empirical' dataset

#np.random.seed(12345)

mu = 0
tau2 = 0.5
sigma2 = 1

N = 20 # Number of observations
theta = np.random.normal(loc=mu, scale=math.sqrt(tau2), size=N)
X = np.random.normal(loc=theta, scale=math.sqrt(sigma2), size=N)
print(X.shape)
print(X[0:5])

(20,)
[-1.73588948  2.42714974  0.35228581  0.83584784  0.60450026]
