# Simulated Distributions

In this notebook, I will walk through some of the distributions we used in order to generate some fake data.

In [1]:
import sys, os
cwd = os.getcwd()
sys.path.insert(0, f'{cwd}/../../src')
sys.path.insert(0, f'{cwd}/../../src/itetoolbox')

import numpy as np
import ite
from sklearn.utils import check_random_state
from data.toy import entropy_marginal

%matplotlib inline
%load_ext autoreload
%autoreload 2

## Code

### Distribution Class Generator

In [52]:
class DistData:
    def __init__(
        self,
        n_samples: int=1000,
        d_dimensions: int=3,
        distribution: str="gauss",
        mu: float=0.0,
        sigma: float=1.0,
        weight: float=2.0,
        bias: float=0.5,
        nu: float=1.0,
        gauss_state: int=123,
        dim_state: int=111,
        trans_state: int=123,
    )-> None:
        self.n_samples = n_samples
        self.d_dimensions = d_dimensions
        self.distribution = distribution
        self.mu = mu
        self.sigma = sigma
        self.weight = weight
        self.bias = bias
        self.nu = nu
        self.gauss = check_random_state(gauss_state)
        self.dim_state = check_random_state(dim_state)
        self.trans_state = check_random_state(trans_state)

    def data(self):

        if self.distribution == "gauss":

            # generate data Gaussian data
            self.samples = self.mu + self.sigma * self.gauss.randn(self.n_samples, self.d_dimensions)
            
            # random rotation (uniformly distributed)
            self.A = self.trans_state.rand(self.d_dimensions, self.d_dimensions)
            
            # output data
            self.X = self.samples @ self.A
        
        elif self.distribution == 'linear':
            
            # generate data from normal dist
            self.samples= self.mu + self.sigma * self.gauss.randn(self.n_samples, self.d_dimensions)
            
            # random rotation (uniformly distributed)
            d_rot = self.dim_state.randn(1, self.d_dimensions)
            
            # linear transformation on all dimensions
            for idim in range(self.d_dimensions):
                exponent = self.weight * d_rot[:, idim] + self.bias
                self.samples[:, idim] = np.sign(self.samples[:, idim]) * np.abs(self.samples[:, idim])**exponent
            
            # random rotation (uniformly distributed)
            self.A = self.trans_state.rand(self.d_dimensions, self.d_dimensions)
            self.X = self.samples @ self.A
            
        else:
            raise ValueError("Unrecognized distribution...")

        return self.X
    
    def entropy(self):
        
        if self.distribution == "gauss":
            
            # calculate entropy
            return entropy_marginal(self.X).sum() + np.linalg.slogdet(self.A)[1]
        
        if self.distribution == "linear":
            
            # calculate entropy
            return entropy_marginal(self.X).sum() + np.linalg.slogdet(self.A)[1]
        else:
            raise ValueError("Unrecognized distribution...")


### Distribution I - Rotated Gaussian Dataset

In [53]:
random_state = 123
mu = 0.0
sigma = 1.0
n_samples = 10000
d_dimensions = 100
distribution = 'gauss'

# initialize class
clf_datadist = DistData(
    n_samples=n_samples,
    mu = mu,
    sigma = sigma,
    d_dimensions=d_dimensions
    
)

# generate samples
X = clf_datadist.data()

# calculate entropy
H_x = clf_datadist.entropy()

print(f"Entropy: {H_x:.4f}")

Entropy: 518.0578


### Distribution II - Rotated Linear Dataset

In [54]:
random_state = 123
mu = 0.0
sigma = 1.0
n_samples = 10000
d_dimensions = 100
distribution = 'linear'

# initialize class
clf_datadist = DistData(
    n_samples=n_samples,
    mu = mu,
    sigma = sigma,
    d_dimensions=d_dimensions,
    distribution=distribution
    
)

# generate samples
X = clf_datadist.data()

# calculate entropy
H_x = clf_datadist.entropy()

print(f"Entropy: {H_x:.4f}")

Entropy: 4602.8497
