# Offene Fragen



Modelpriors: random.randint durch random.choice ersetzt, um Modelpriors spezifizieren zu können -> Passt?

# Setup

In [2]:
import numpy as np
import tensorflow as tf

Hierarchical Normal Example Model

$$
\begin{align}
\mu &\sim p(\mu | \mu_0, \tau_0)\\
\tau^2 &\sim p(\tau^2 | \alpha, \beta)\\
\theta_l &\sim p(\theta | \mu, \tau^2) \text{ for } l=1,...,L\\
x_n &\sim p(x | \theta_l, \sigma^2) \text{ for } n=1,...,N \text { (assume } \sigma^2 \text { to be known (=1))}\\
\end{align}
$$

$$
\begin{align}
&\text{Null Model }H_0 \text{: } \mu=0\\
&\text{Alternative Model }H_1 \text{: } \mu \text{ allowed to differ from 0} \\
\end{align}
$$

# Simulator class

In [138]:
class Simulator1:
    
    def __init__(self):    
        pass
    
    def draw_from_prior(self, n_clusters, mu0, tau20, alpha, beta):
        """
        Draws parameter values from the specified prior distributions of the 
        hyperprior and the conditional prior.
        ----------
        
        Arguments:        
        n_clusters : int -- number of higher order clusters that the observations are nested in
        mu0        : float -- higher order mean prior - mean
        tau20      : float -- higher order mean prior - variance
        alpha      : float -- higher order variance prior - shape parameter
        beta       : float -- higher order variance prior - rate parameter
        """
        
        mu = np.random.normal(loc=mu0, scale=np.sqrt(tau20))            
        tau2 = 1/np.random.gamma(alpha, beta)
        theta = np.random.normal(loc=mu, scale=np.sqrt(tau2), size=n_clusters)
        return theta
    
    def gen_from_likelihood(self, theta, n_obs):
        """
        Generates a single hierarchical dataset from the sampled parameter values.
        ----------
        
        Arguments: 
        params         : list -- parameters sampled from prior 
        n_obs : int -- number of observations per cluster
        """
        
        X = np.random.normal(loc=theta, scale=1, size=(n_obs, theta.shape[0])).T 
        return X
    
    def generate_single(self, n_clusters, n_obs, mu0=0, tau20=1, alpha=1, beta=1):
        
        prior_sample = self.draw_from_prior(n_clusters, mu0, tau20, alpha, beta)
        x_generated = self.gen_from_likelihood(prior_sample, n_obs)
        return x_generated[...,np.newaxis]
        
    
    def simulate(self, batch_size, n_models, n_clusters, n_obs, mu0=0, tau20=1, alpha=1, beta=1):
        """
        Simulates multiple hierarchical datasets.
        ----------
        
        Arguments:
        batch_size     : int -- number of batches to be generated
        n_models       : int -- number of models to be simulated from
        n_clusters     : int -- number of higher order clusters that the observations are nested in
        n_obs          : int -- number of observations per cluster
        n_variables    : int -- number of variables in the simulated datasets 
        mu0            : float -- higher order mean prior - mean
        tau20          : float -- higher order mean prior - variance
        alpha          : float -- higher order variance prior - shape parameter
        beta           : float -- higher order variance prior - rate parameter
        --------
        
        Returns:
        numpy array of shape (batch_size * n_models, n_clusters, n_obs, n_variables) - contains the simulated hierarchical datasets
        """
        
        X = []
        for b in range(batch_size):
            prior_sample = self.draw_from_prior(n_clusters, mu0, tau20, alpha, beta)
            x_generated = self.gen_from_likelihood(prior_sample, n_obs)
            X.append(x_generated)
        return np.array(X)[...,np.newaxis]
    
    
class Simulator2:
    
    def __init__(self):    
        pass
    
    def draw_from_prior(self, n_clusters, mu0, tau20, alpha, beta):
        """
        Draws parameter values from the specified prior distributions of the 
        hyperprior and the conditional prior.
        ----------
        
        Arguments:        
        n_clusters : int -- number of higher order clusters that the observations are nested in
        mu0        : float -- higher order mean prior - mean
        tau20      : float -- higher order mean prior - variance
        alpha      : float -- higher order variance prior - shape parameter
        beta       : float -- higher order variance prior - rate parameter
        """
           
        tau2 = 1/np.random.gamma(alpha, beta)
        theta = np.random.normal(loc=0, scale=np.sqrt(tau2), size=n_clusters)
        return theta
    
    def gen_from_likelihood(self, theta, n_obs):
        """
        Generates a single hierarchical dataset from the sampled parameter values.
        ----------
        
        Arguments: 
        params         : list -- parameters sampled from prior 
        n_obs          : int -- number of observations per cluster
        """
        
        X = np.random.normal(loc=theta, scale=1, size=(n_obs, theta.shape[0])).T 
        return X
    
    def generate_single(self,  n_clusters, n_obs, mu0=0, tau20=1, alpha=1, beta=1):
        
        prior_sample = self.draw_from_prior(n_clusters, mu0, tau20, alpha, beta)
        x_generated = self.gen_from_likelihood(prior_sample, n_obs)
        return x_generated[...,np.newaxis]
        
    def simulate(self, batch_size, n_models, n_clusters, n_obs, mu0=0, tau20=1, alpha=1, beta=1):
        """
        Simulates multiple hierarchical datasets.
        ----------
        
        Arguments:
        batch_size     : int -- number of batches to be generated
        n_models       : int -- number of models to be simulated from
        n_clusters     : int -- number of higher order clusters that the observations are nested in
        n_obs          : int -- number of observations per cluster
        n_variables    : int -- number of variables in the simulated datasets 
        mu0            : float -- higher order mean prior - mean
        tau20          : float -- higher order mean prior - variance
        alpha          : float -- higher order variance prior - shape parameter
        beta           : float -- higher order variance prior - rate parameter
        --------
        
        Returns:
        numpy array of shape (batch_size * n_models, n_clusters, n_obs, n_variables) - contains the simulated hierarchical datasets
        """
        
        X = []
        for b in range(batch_size):
            prior_sample = self.draw_from_prior(model_index, n_clusters, mu0, tau20, alpha, beta)
            x_generated = self.gen_from_likelihood(prior_sample, n_obs)
            X.append(x_generated)
        return np.array(X)[...,np.newaxis]

In [139]:
class MainSimulator:
    
    def __init__(self, model_list):
        
        self.model_list = model_list
    
    def draw_from_model_prior(self, batch_size, n_models, model_prior):
        """
        Creates the sequence of models to be simulated from in the batch.
        ----------
        
        Arguments:
        batch_size     : int -- number of batches to be generated
        n_models       : int -- number of models to be simulated from
        model_prior    : list -- prior model probabilities
        --------
        
        Returns:
        array of shape (batch_size) - array of indices corresponding to the sampled model from p(M).
        """
        
        # create base list of model indices
        model_base_indices = [*range(n_models)]
        
        # uniform prior over model probabilities if no model prior given
        if model_prior == None:
            model_prior = [1/n_models] * n_models
        
        # generate sampling list of model indeces
        model_indices = np.random.choice(model_base_indices, size=batch_size, p=model_prior)
        return model_indices
    
    def simulate(self, batch_size, n_models, model_prior, n_clust_min=2, n_clust_max=100, n_obs_min=2, n_obs_max=200):
        """
        Simulates batches of hierarchical datasets.
        ----------
        
        Arguments:
        batch_size     : int -- number of datasets to be generated per batch
        n_models       : int -- number of models to be simulated from
        model_prior    : list -- prior model probabilities
        n_clust_min    : int -- minimum number of clusters
        n_clust_max    : int -- maximum number of cluster
        n_obs_min      : int -- minimum number of observations
        n_obs_max      : int -- maximum number of observations
        --------
        
        Returns:
        dict of {'X' : array of shape (batch_size * n_models, n_clusters, n_obs, n_variables),  
                 'm' : array of shape (batch_size)}
        """
        # Draw K and N
        n_clusters = np.random.randint(n_clust_min, n_clust_max+1)
        n_obs = np.random.randint(n_obs_min, n_obs_max+1)
        
        # Draw sampling list of model indices
        model_indices = self.draw_from_model_prior(batch_size, n_models, model_prior)
        
        # Prepare an array to hold simulations
        X_gen = np.zeros((batch_size, n_clusters, n_obs, 1))
        
        for b in range(batch_size):
            X_gen[b] = self.model_list[model_indices[b]].generate_single(n_clusters, n_obs)
            
        return {'X': X_gen, 'm': model_indices}
    
    def __call__(self, batch_size, n_models=2, model_prior=None, n_clust_min=2, n_clust_max=100, n_obs_min=2, n_obs_max=200):
        return self.simulate(batch_size, n_models, model_prior, n_clust_min, n_clust_max, n_obs_min, n_obs_max)

In [142]:
s1 = Simulator1()
s2 = Simulator2()
model_list = [s1, s2]
main_sim = MainSimulator(model_list)

In [143]:
%%time
print(main_sim(64)["X"][:, 0, :, :].shape) # shape of 1 cluster 
print(np.unique(main_sim(64)["m"], return_counts=True)[1]) # model indices distribution in batch
print(main_sim(64))

(64, 9, 1)
[29 35]
{'X': array([[[[-1.40012849e-01],
         [ 5.55734731e-02],
         [ 2.71037966e-01],
         ...,
         [-1.52561632e-01],
         [-3.88382918e-01],
         [-9.82880755e-01]],

        [[-2.68428795e+00],
         [-3.93169843e+00],
         [-4.19355932e+00],
         ...,
         [-2.51891566e+00],
         [-3.29165676e+00],
         [-2.50058343e+00]],

        [[-1.23273598e+00],
         [-1.43126824e+00],
         [-1.16468356e+00],
         ...,
         [-1.02286146e+00],
         [-1.30779805e+00],
         [-7.61730161e-01]],

        ...,

        [[ 4.41819569e-01],
         [ 3.53047355e+00],
         [ 2.64251707e+00],
         ...,
         [ 3.66338587e+00],
         [ 2.54294515e+00],
         [ 1.32427403e+00]],

        [[-3.49434417e-01],
         [-7.93838086e-01],
         [-8.85432897e-01],
         ...,
         [-1.77359313e+00],
         [ 3.52210182e-01],
         [ 1.79016564e-01]],

        [[-3.57920872e-01],
         [-2.

In [None]:
class HierarchicalEvidenceNet(tf.keras.Model):
    
    def __init__(self):
        pass
    
    def call(self, X):
        """Doc..."""
        pass

In [None]:
# Tutorials/Beispiele zu custom modellen in tf anschauen

# Tutorial on Eager Execution (TensorFlow)

# Some thoughts on architectural design / first attempt?
# -> einziger Unterschied: noch eine Dimension zusätzlich!
# -> noch eine zusätzliche Summary on top
