In this notebook, we calculate the "true" mutual information for a set of synthetic datasets. We use Monte Carlo integration with a large number of samples to estimate the MI integral (unless an analytic version is available), and save the results.

### Import packages

In [1]:
import os
import numpy as np
from gmm_mi.gmm import GMM
from gmm_mi.gmm_mi import GMM_MI
import gmm_mi.data.synthetic_data as synthetic_data
from gmm_mi.utils import analytic_MI

### Iterate over the datasets and store the results

In [2]:
MI_method = 'MC'
MC_samples = 1e8
dataset_names = []
MI_values = []

In [3]:
for d_name, obj in synthetic_data.__dict__.items(): # iterate through every module's attributes
    if isinstance(obj, GMM):
        dataset_names.append(d_name)
        # if 1D, calculate using analytic formula for MI
        if len(obj.weights_init) == 1:
            MI_value = analytic_MI.calculate_MI_D1_analytical(obj.covariances_init[0])
            MI_values.append(MI_value)
            continue
        if MI_method == 'MC':
            MI_value = obj.estimate_MI_MC(MC_samples=MC_samples)
        elif MI_method == 'quad':
            MI_value = obj.estimate_MI_quad()
        MI_values.append(MI_value)

### Save the MI values

In [4]:
root_folder = './MI_synthetic_datasets/'
if not os.path.isdir(root_folder):
    os.mkdir(root_folder)
    
for i, MI_value in enumerate(MI_values):
    np.save(f"{root_folder}MI_{dataset_names[i]}", MI_value)