# Offene Fragen



Modelpriors: random.randint durch random.choice ersetzt, um Modelpriors spezifizieren zu können -> Passt?

# Setup

In [1]:
import numpy as np
import tensorflow as tf

In [3]:
tf.version.VERSION

'1.14.0'

Hierarchical Normal Example Model

$$
\begin{align}
x_n &\sim p(x | \theta_l, \sigma^2) \text{ for } n=1,...,N \text { (assume } \sigma^2 \text { to be known (=1))}\\
\theta_l &\sim p(\theta | \mu, \tau^2) \text{ for } l=1,...,L\\
\mu &\sim p(\mu | \mu_0, \tau_0)\\
\tau^2 &\sim p(\tau^2 | \alpha, \beta)\\
\end{align}
$$

$$
\begin{align}
&\text{Null Model }H_0 \text{: } \mu=0\\
&\text{Alternative Model }H_1 \text{: } \mu \text{ allowed to differ from 0} \\
\end{align}
$$

# Simulator class

In [2]:
class HierarchicalNormalSimulator:
    
    def __init__(self):    
        pass
    
    def draw_from_prior(self, model_index, n_clusters, mu0, tau20, alpha, beta):
        """
        Draws parameter values from the specified prior distributions of the 
        hyperprior and the conditional prior.
        ----------
        
        Arguments:        
        n_clusters : int -- number of higher order clusters that the observations are nested in
        mu0        : float -- higher order mean prior - mean
        tau20      : float -- higher order mean prior - variance
        alpha      : float -- higher order variance prior - shape parameter
        beta       : float -- higher order variance prior - rate parameter
        """
        
        if model_index == 0: 
            mu = 0
        if model_index == 1:
            mu = np.random.normal(loc=mu0, scale=np.sqrt(tau20))
            
        tau2 = 1/np.random.gamma(alpha, beta)
        theta = np.random.normal(loc=mu, scale=np.sqrt(tau2), size=n_clusters)
        return theta
    
    def gen_from_likelihood(self, theta, n_obs):
        """
        Generates a single hierarchical dataset from the sampled parameter values.
        ----------
        
        Arguments: 
        params         : list -- parameters sampled from prior 
        n_obs : int -- number of observations per cluster
        """
        
        X = np.random.normal(loc=theta, scale=1, size=(n_obs, theta.shape[0])).T 
        return X
    
    def generate_single(self, model_index, n_clusters, n_obs, mu0=0, tau20=1, alpha=1, beta=1):
        """
        Generates a single hierarchical dataset.
        ----------
        
        Arguments:
        model_index    : int -- index of the model to be simulated from
        n_clusters     : int -- number of higher order clusters that the observations are nested in
        n_obs          : int -- number of observations per cluster
        mu0            : float -- higher order mean prior - mean
        tau20          : float -- higher order mean prior - variance
        alpha          : float -- higher order variance prior - shape parameter
        beta           : float -- higher order variance prior - rate parameter
        --------
        
        Returns:
        numpy array of shape (n_clusters, n_obs, n_variables) - contains the simulated hierarchical datasets
        """
        prior_sample = self.draw_from_prior(model_index, n_clusters, mu0, tau20, alpha, beta)
        x_generated = self.gen_from_likelihood(prior_sample, n_obs)
        return x_generated[...,np.newaxis]
        
    
    def simulate(self, batch_size, n_models, n_clusters, n_obs, mu0=0, tau20=1, alpha=1, beta=1):
        """
        Simulates multiple hierarchical datasets.
        ----------
        
        Arguments:
        batch_size     : int -- number of batches to be generated
        n_models       : int -- number of models to be simulated from
        n_clusters     : int -- number of higher order clusters that the observations are nested in
        n_obs          : int -- number of observations per cluster
        n_variables    : int -- number of variables in the simulated datasets 
        mu0            : float -- higher order mean prior - mean
        tau20          : float -- higher order mean prior - variance
        alpha          : float -- higher order variance prior - shape parameter
        beta           : float -- higher order variance prior - rate parameter
        --------
        
        Returns:
        numpy array of shape (batch_size * n_models, n_clusters, n_obs, n_variables) - contains the simulated hierarchical datasets
        """
        
        X = []
        for b in range(batch_size):
            prior_sample = self.draw_from_prior(n_clusters, mu0, tau20, alpha, beta)
            x_generated = self.gen_from_likelihood(prior_sample, n_obs)
            X.append(x_generated)
        return np.array(X)[...,np.newaxis]

In [12]:
class MainSimulator:
    
    def __init__(self, simulator):
        
        self.simulator = simulator
    
    def draw_from_model_prior(self, batch_size, n_models, model_prior):
        """
        Creates the sequence of models to be simulated from in the batch.
        ----------
        
        Arguments:
        batch_size     : int -- number of batches to be generated
        n_models       : int -- number of models to be simulated from
        model_prior    : list -- prior model probabilities
        --------
        
        Returns:
        array of shape (batch_size) - array of indices corresponding to the sampled model from p(M).
        """
        
        # create base list of model indices
        model_base_indices = [*range(n_models)]
        
        # uniform prior over model probabilities if no model prior given
        if model_prior == None:
            model_prior = [1/n_models] * n_models
        
        # generate sampling list of model indeces
        model_indices = np.random.choice(model_base_indices, size=batch_size, p=model_prior)
        return model_indices
    
    def simulate(self, batch_size, n_models, model_prior, n_clust_min=2, n_clust_max=100, n_obs_min=2, n_obs_max=200):
        """
        Simulates a batch of hierarchical datasets.
        ----------
        
        Arguments:
        batch_size     : int -- number of datasets to be generated per batch
        n_models       : int -- number of models to be simulated from
        model_prior    : list -- prior model probabilities
        n_clust_min    : int -- minimum number of clusters
        n_clust_max    : int -- maximum number of cluster
        n_obs_min      : int -- minimum number of observations
        n_obs_max      : int -- maximum number of observations
        --------
        
        Returns:
        dict of {'X' : array of shape (batch_size, n_clusters, n_obs, n_variables),  
                 'm' : array of shape (batch_size)}
        """
        # Draw K and N
        n_clusters = np.random.randint(n_clust_min, n_clust_max+1)
        n_obs = np.random.randint(n_obs_min, n_obs_max+1)
        
        # Draw sampling list of model indices
        model_indices = self.draw_from_model_prior(batch_size, n_models, model_prior)
        
        # Prepare an array to hold simulations
        X_gen = np.zeros((batch_size, n_clusters, n_obs, 1))
        
        for b in range(batch_size):
            X_gen[b] = self.simulator.generate_single(model_indices[b], n_clusters, n_obs)
               
        return {'X': X_gen, 'm': model_indices}
    
    def __call__(self, batch_size, n_models=2, model_prior=None, n_clust_min=2, n_clust_max=100, n_obs_min=2, n_obs_max=200):
        return self.simulate(batch_size, n_models, model_prior, n_clust_min, n_clust_max, n_obs_min, n_obs_max)

In [4]:
main_sim = MainSimulator(HierarchicalNormalSimulator())

In [10]:
%%time
print(main_sim(64)["X"][:, 0, :, :].shape) # shape of 1 cluster 
print(np.unique(main_sim(64)["m"], return_counts=True)[1]) # model indices distribution in batch

(64, 140, 1)
[34 30]
Wall time: 87.9 ms


In [11]:
%%time
main_sim(64)

Wall time: 48 ms


{'X': array([[[[-8.84577782e-01],
          [-1.14130959e-01],
          [ 8.23165541e-01],
          ...,
          [-4.54229609e-01],
          [-4.49460439e-01],
          [ 3.01507510e-02]],
 
         [[-1.37442333e+00],
          [-5.07659822e-01],
          [-4.82695140e-01],
          ...,
          [-1.11660468e-01],
          [-1.90301845e+00],
          [ 5.69016620e-01]],
 
         [[-6.56138302e-01],
          [-8.25876881e-01],
          [-3.62250525e-01],
          ...,
          [-5.80045403e-01],
          [-6.17373694e-01],
          [ 2.84223382e-01]],
 
         ...,
 
         [[ 3.50316202e+00],
          [ 9.74727976e-01],
          [ 3.25020462e+00],
          ...,
          [ 3.44938426e+00],
          [ 1.15295734e+00],
          [ 1.93682731e+00]],
 
         [[ 2.99446471e-01],
          [ 6.97549320e-01],
          [ 2.02336177e+00],
          ...,
          [ 1.28854241e+00],
          [ 7.89602569e-01],
          [ 2.49299152e-01]],
 
         [[-5.22855

# Network Class

In [7]:
tf.enable_eager_execution()

NameError: name 'tf' is not defined

In [None]:
class HierarchicalEvidenceNet(tf.keras.Model):
    
    def __init__(self):
        pass
    
    def call(self, X):
        """Doc..."""
        pass

In [None]:
# Tutorials/Beispiele zu custom modellen in tf anschauen

# Tutorial on Eager Execution (TensorFlow)

# Some thoughts on architectural design / first attempt?
# -> einziger Unterschied: noch eine Dimension zusätzlich!
# -> noch eine zusätzliche Summary on top
