# Offene Fragen



Modelpriors: random.randint durch random.choice ersetzt, um Modelpriors spezifizieren zu können -> Passt?

# Setup

In [2]:
import numpy as np
import tensorflow as tf

In [3]:
tf.version.VERSION

'2.3.0'

Hierarchical Normal Example Model

$$
\begin{align}
x_n &\sim p(x | \theta_l, \sigma^2) \text{ for } n=1,...,N \text { (assume } \sigma^2 \text { to be known (=1))}\\
\theta_l &\sim p(\theta | \mu, \tau^2) \text{ for } l=1,...,L\\
\mu &\sim p(\mu | \mu_0, \tau_0)\\
\tau^2 &\sim p(\tau^2 | \alpha, \beta)\\
\end{align}
$$

$$
\begin{align}
&\text{Null Model }H_0 \text{: } \mu=0\\
&\text{Alternative Model }H_1 \text{: } \mu \text{ allowed to differ from 0} \\
\end{align}
$$

# Simulator class

In [4]:
class HierarchicalNormalSimulator:
    
    def __init__(self):    
        pass
    
    def draw_from_prior(self, model_index, n_clusters, mu0, tau20, alpha, beta):
        """
        Draws parameter values from the specified prior distributions of the 
        hyperprior and the conditional prior.
        ----------
        
        Arguments:        
        n_clusters : int -- number of higher order clusters that the observations are nested in
        mu0        : float -- higher order mean prior - mean
        tau20      : float -- higher order mean prior - variance
        alpha      : float -- higher order variance prior - shape parameter
        beta       : float -- higher order variance prior - rate parameter
        """
        
        if model_index == 0: 
            mu = 0
        if model_index == 1:
            mu = np.random.normal(loc=mu0, scale=np.sqrt(tau20))
            
        tau2 = 1/np.random.gamma(alpha, beta)
        theta = np.random.normal(loc=mu, scale=np.sqrt(tau2), size=n_clusters)
        return theta
    
    def gen_from_likelihood(self, theta, n_obs):
        """
        Generates a single hierarchical dataset from the sampled parameter values.
        ----------
        
        Arguments: 
        params         : list -- parameters sampled from prior 
        n_obs : int -- number of observations per cluster
        """
        
        X = np.random.normal(loc=theta, scale=1, size=(n_obs, theta.shape[0])).T 
        return X
    
    def generate_single(self, model_index, n_clusters, n_obs, mu0=0, tau20=1, alpha=1, beta=1):
        """
        Generates a single hierarchical dataset.
        ----------
        
        Arguments:
        model_index    : int -- index of the model to be simulated from
        n_clusters     : int -- number of higher order clusters that the observations are nested in
        n_obs          : int -- number of observations per cluster
        mu0            : float -- higher order mean prior - mean
        tau20          : float -- higher order mean prior - variance
        alpha          : float -- higher order variance prior - shape parameter
        beta           : float -- higher order variance prior - rate parameter
        --------
        
        Returns:
        numpy array of shape (n_clusters, n_obs, n_variables) - contains the simulated hierarchical datasets
        """
        prior_sample = self.draw_from_prior(model_index, n_clusters, mu0, tau20, alpha, beta)
        x_generated = self.gen_from_likelihood(prior_sample, n_obs)
        return x_generated[...,np.newaxis]
        
    
    def simulate(self, batch_size, n_models, n_clusters, n_obs, mu0=0, tau20=1, alpha=1, beta=1):
        """
        Simulates multiple hierarchical datasets. Useful for single usage and debugging (both without the MainSimulator).
        ----------
        
        Arguments:
        batch_size     : int -- number of batches to be generated
        n_models       : int -- number of models to be simulated from
        n_clusters     : int -- number of higher order clusters that the observations are nested in
        n_obs          : int -- number of observations per cluster
        n_variables    : int -- number of variables in the simulated datasets 
        mu0            : float -- higher order mean prior - mean
        tau20          : float -- higher order mean prior - variance
        alpha          : float -- higher order variance prior - shape parameter
        beta           : float -- higher order variance prior - rate parameter
        --------
        
        Returns:
        numpy array of shape (batch_size * n_models, n_clusters, n_obs, n_variables) - contains the simulated hierarchical datasets
        """
        
        X = []
        for b in range(batch_size):
            prior_sample = self.draw_from_prior(n_clusters, mu0, tau20, alpha, beta)
            x_generated = self.gen_from_likelihood(prior_sample, n_obs)
            X.append(x_generated)
        return np.array(X)[...,np.newaxis]

In [43]:
class MainSimulator:
    
    def __init__(self, simulator):
        
        self.simulator = simulator
    
    def draw_from_model_prior(self, batch_size, n_models, model_prior):
        """
        Creates the sequence of models to be simulated from in the batch.
        ----------
        
        Arguments:
        batch_size     : int -- number of batches to be generated
        n_models       : int -- number of models to be simulated from
        model_prior    : list -- prior model probabilities
        --------
        
        Returns:
        array of shape (batch_size) - array of indices corresponding to the sampled model from p(M).
        """
        
        # create base list of model indices
        model_base_indices = [*range(n_models)]
        
        # uniform prior over model probabilities if no model prior given
        if model_prior == None:
            model_prior = [1/n_models] * n_models
        
        # generate sampling list of model indeces
        model_indices = np.random.choice(model_base_indices, size=batch_size, p=model_prior)
        return model_indices
    
    def simulate(self, batch_size, n_models, model_prior, n_clust_min=2, n_clust_max=100, n_obs_min=2, n_obs_max=200):
        """
        Simulates a batch of hierarchical datasets.
        ----------
        
        Arguments:
        batch_size     : int -- number of datasets to be generated per batch
        n_models       : int -- number of models to be simulated from
        model_prior    : list -- prior model probabilities
        n_clust_min    : int -- minimum number of clusters
        n_clust_max    : int -- maximum number of cluster
        n_obs_min      : int -- minimum number of observations
        n_obs_max      : int -- maximum number of observations
        --------
        
        Returns:
        dict of {'X' : array of shape (batch_size, n_clusters, n_obs, n_variables),  
                 'm' : array of shape (batch_size)}
        """
        # Draw K and N (equal for all datasets in the batch)
        n_clusters = np.random.randint(n_clust_min, n_clust_max+1)
        n_obs = np.random.randint(n_obs_min, n_obs_max+1)
        
        # Draw sampling list of model indices
        model_indices = self.draw_from_model_prior(batch_size, n_models, model_prior)
        
        # Prepare an array to hold simulations
        X_gen = np.zeros((batch_size, n_clusters, n_obs, 1), dtype=np.float32)
        
        for b in range(batch_size):
            X_gen[b] = self.simulator.generate_single(model_indices[b], n_clusters, n_obs)
               
        return {'X': X_gen, 'm': model_indices}
    
    def __call__(self, batch_size, n_models=2, model_prior=None, n_clust_min=2, n_clust_max=100, n_obs_min=2, n_obs_max=200):
        return self.simulate(batch_size, n_models, model_prior, n_clust_min, n_clust_max, n_obs_min, n_obs_max)

In [44]:
# Construct simulator

main_sim = MainSimulator(HierarchicalNormalSimulator())

In [45]:
%%time
# Testing zone: how do shapes and index distribution change in different runs?

print(main_sim(64)["X"][:, 0, :, :].shape) # shape of 1 cluster 
print(np.unique(main_sim(64)["m"], return_counts=True)[1]) # model index distribution in batch

(64, 63, 1)
[30 34]
Wall time: 97.7 ms


In [47]:
%%time
# Simulate a batch for subsequent NN development

simulated_batch = main_sim(2)
print(simulated_batch["X"].shape)
print(simulated_batch["X"][0,:2,:3,:]) # display the first 3 observations of the first 2 clusters of the first batch

(2, 69, 49, 1)
[[[-2.131488  ]
  [-2.1201575 ]
  [-3.5995593 ]]

 [[ 0.07386541]
  [-1.8807907 ]
  [-0.84533495]]]
Wall time: 1.99 ms


# Network Class

Structure:
- Invariant Layer Class
- Equivariant Layer Class
- Hierarchical Invariant Network Class (Equivar. + Invar.)
- Deep Hierarchical Evidential Model Class (multiple Hier.Inv.Netw.)

To do:
- Invariant module:
    - Learnable Pooling in init() und call()
    - call() Description of arguments and return
- Equivariant module:
    - call() Description of arguments and return

In [9]:
# Settings

n_dense_inv = 2 # no of layers pre pooling in the invariant network
n_dense_post = 2 # no of layers post pooling in the invariant network
n_dense_equiv = 2 # no of layers in the equivariant network
n_equiv = 2 # no of equivariant modules to be stacked

## Invariant Module

In [178]:
class InvariantModule(tf.keras.Model):
    """Implements an invariant nn module as proposed by Bloem-Reddy and Teh (2019)."""

    def __init__(self, n_dense_inv):
        """
        Creates an invariant function with mean pooling.
        ----------
        
        Arguments:
        n_dense_inv : int -- no of layers pre pooling in the invariant network
        n_dense_post : int -- no of layers post pooling in the invariant network
        """
        
        super(InvariantModule, self).__init__()
        
        # Pre pooling network
        self.module = tf.keras.Sequential([tf.keras.layers.Dense(units = 64,
                                                                 activation='elu',
                                                                 kernel_initializer='glorot_normal')
                                           for _ in range(n_dense_inv)])
        
        # LEARNABLE POOLING?
        self.weights_layer = None
        
        # Post pooling network
        self.post_pooling_dense = tf.keras.Sequential([tf.keras.layers.Dense(units = 64,
                                                                             activation='elu',
                                                                             kernel_initializer='glorot_normal')
                                                      for _ in range(n_dense_inv)])
        
            
    def call(self, x):
        """
        Transforms the input into an invariant representation.
        ----------
        
        Arguments:
        x : tf.Tensor of shape ???
        --------
        
        Returns:
        out: tf.Tensor of shape ???
        """
        
        # Embed input
        x_emb = self.module(x)
        
        # COMPUTE WEIGHTS IF EXISTING or perform mean pooling
        w_x = tf.reduce_mean(x_emb, axis=-1) # always reduce dimensionality of last axis
        
        # Increase representational power
        out = self.post_pooling_dense(w_x)
        return out

In [216]:
# Test if dimensionality reduction of invariant module works

print('{} - Shape of the simulated batch of data'.format(simulated_batch["X"].shape))

inv = InvariantModule(n_dense_inv)
reduce_to_3D = inv(x=simulated_batch["X"])
print('{} - Shape of the first invariant module output'.format(d_prime.shape))

inv = InvariantModule(n_dense_inv) # WARUM MUSS HIER NEU INITIALISIERT WERDEN? reset graph etc reicht nicht, Input size inkompatibel
reduce_to_2D = inv(reduce_to_3D)
print('{} - Shape of the second invariant module output'.format(reduce_to_2D.shape))

(2, 69, 49, 1) - Shape of simulated batch of data
(2, 69, 64) - Shape of first invariant module output
(2, 64) - Shape of second invariant module output


## Equivariant Module

In [350]:
class EquivariantModule(tf.keras.Model):
    """Implements an equivariant nn module as proposed by Bloem-Reddy and Teh (2019)."""

    def __init__(self, n_dense_equiv, n_dense_inv):
        """
        Creates an equivariant neural network consisting of a FC network with
        equal number of hidden units in each layer and an invariant module
        with the same FC structure.
        ----------
        
        Arguments:
        n_dense_equiv : int -- no of layers in the equivariant network
        """
        
        super(EquivariantModule,self).__init__()
        
        # Equivariant Network
        self.module = tf.keras.Sequential([tf.keras.layers.Dense(units = 32,
                                                                 activation='elu',
                                                                 kernel_initializer='glorot_normal')
                                           for _ in range(n_dense_equiv)])
        
        self.invariant_module = InvariantModule(n_dense_inv)
        
    def call (self, x):
        """
        Transforms the input into an equivariant representation.
        ----------
        
        Arguments:
        x : tf.Tensor of shape ???
        --------
        
        Returns:
        out: tf.Tensor of shape ???
        """
        
        # Run input through invariant network 
        x_inv = self.invariant_module(x)
        
        # Repeat x_inv n times so that it matches x
        # read out position -2 so that the layer to be permuted is accessed in 3D as well as 4D
        x_inv = tf.stack([x_inv] * int(x.shape[-2]), axis=-2)
        
        # Concat original input with invariant transformation
        x = tf.concat((x_inv, x), axis=-1)
        
        # Run through equivariant network
        out = self.module(x)
        
        return out

In [351]:
# Test if equivariant transformation of equivariant module works

print('{} - Shape of the simulated batch of data'.format(simulated_batch["X"].shape))

# test with 4D data
equiv = EquivariantModule(n_dense_equiv, n_dense_inv)
x_equiv4D = equiv(x=simulated_batch["X"])
print('{} - Shape of the equivariant module output in 4D'.format(x_equiv4D.shape))

# test with 3D data
equiv = EquivariantModule(n_dense_equiv, n_dense_inv)
x_equiv3D = equiv(x=reduce_to_3D)
print('{} - Shape of the equivariant module output in 3D'.format(x_equiv3D.shape))

(2, 69, 49, 1) - Shape of the simulated batch of data
(2, 69, 49, 32) - Shape of the equivariant module output in 4D
(2, 69, 32) - Shape of the equivariant module output in 3D


In [352]:
# Test if the invariant module can handle the output of the equivariant module

inv = InvariantModule(n_dense_inv)
inv_with_equiv_input = inv(x_equiv4D)
print('{} - Shape of the invariant module output when given the 4D-output of the equivariant module'.format(inv_with_equiv_input.shape))

(2, 69, 64) - Shape of the invariant module output when given the 4D-output of the equivariant module


In [353]:
# Test if multiple equivariant modules are stackable

equiv = EquivariantModule(n_dense_equiv, n_dense_inv)
equiv1 = equiv(x=x_equiv4D)
print('{} - Shape of the equivariant module output when given the 4D-output of the equivariant module'.format(x_equiv4D.shape))

(2, 69, 49, 32) - Shape of the equivariant module output when given the 4D-output of the equivariant module
