# Test mutual information estimators

## Preamble

In [None]:
import numpy as np
import pandas as pd
import scipy.stats as sps

In [None]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

In [None]:
font = {'family' : 'DejaVu Sans',
        'size'   : 18}

matplotlib.rc('font', **font)

In [None]:
import os
import json
import csv

from datetime import datetime

In [None]:
from pathlib import Path
path = os.path.abspath(os.path.join(os.path.abspath(os.getcwd()), "../../data/"))

In [None]:
experiments_path = path + "/mutual_information/synthetic/"

#### Importing the module

In [None]:
import mutinfo.estimators.mutual_information as mi_estimators
from mutinfo.utils.dependent_norm import multivariate_normal_from_MI

In [None]:
### SETTINGS ###
%run ./Settings.ipynb

#### Standard tests with arbitrary mapping

In [None]:
def perform_normal_compressed_test(mi, n_samples, X_dimension, Y_dimension, X_map=None, Y_map=None,
                                   X_compressor=None, Y_compressor=None, verbose=0):
    # Generation.
    random_variable = multivariate_normal_from_MI(X_dimension, Y_dimension, mi)
    X_Y = random_variable.rvs(n_samples)
    X = X_Y[:, 0:X_dimension]
    Y = X_Y[:, X_dimension:X_dimension + Y_dimension]
        
    # Mapping application.
    if not X_map is None:
        X = X_map(X)
           
    if not Y_map is None:
        Y = Y_map(Y)
        
    # Mutual information estimation.
    mi_estimator = mi_estimators.MutualInfoEstimator(entropy_estimator_params=entropy_estimator_params)
    mi_estimator.fit(X, Y, verbose=verbose)
    mi = mi_estimator.estimate(X, Y, verbose=verbose)
    
    # Mutual information estimation for compressed representation.
    mi_estimator = mi_estimators.LossyMutualInfoEstimator(X_compressor, Y_compressor,
                                                          entropy_estimator_params=entropy_estimator_params)
    mi_estimator.fit(X, Y, verbose=verbose)
    mi_compressed = mi_estimator.estimate(X, Y, verbose=verbose)
    
    return mi, mi_compressed

In [None]:
def perform_normal_compressed_tests_MI(MI, n_samples, X_dimension, Y_dimension, X_map=None, Y_map=None,
                                       X_compressor=None, Y_compressor=None, verbose=0):
    """
    Estimate mutual information for different true values
    (transformed normal distribution).
    """
    n_exps = len(MI)
    
    # Mutual information estimates.
    estimated_MI = []
    estimated_MI_compressed = []

    # Conducting the tests.
    for n_exp in range(n_exps):
        print("\nn_exp = %d/%d\n------------\n" % (n_exp + 1, n_exps))
        mi, compressed_mi = perform_normal_compressed_test(MI[n_exp], n_samples, X_dimension, Y_dimension,
                                                           X_map, Y_map, X_compressor, Y_compressor, verbose)
        estimated_MI.append(mi)
        estimated_MI_compressed.append(compressed_mi)
        
    return estimated_MI, estimated_MI_compressed

In [None]:
def plot_estimated_compressed_MI(MI, estimated_MI, estimated_MI_compressed, title):
    estimated_MI_mean = np.array([estimated_MI[index][0] for index in range(len(estimated_MI))])
    estimated_MI_std  = np.array([estimated_MI[index][1] for index in range(len(estimated_MI))])
    
    estimated_MI_compressed_mean = np.array([estimated_MI_compressed[index][0]
                                             for index in range(len(estimated_MI_compressed))])
    estimated_MI_compressed_std  = np.array([estimated_MI_compressed[index][1]
                                             for index in range(len(estimated_MI_compressed))])
    
    fig_normal, ax_normal = plt.subplots()

    fig_normal.set_figheight(11)
    fig_normal.set_figwidth(16)

    # Grid.
    ax_normal.grid(color='#000000', alpha=0.15, linestyle='-', linewidth=1, which='major')
    ax_normal.grid(color='#000000', alpha=0.1, linestyle='-', linewidth=0.5, which='minor')

    ax_normal.set_title(title)
    ax_normal.set_xlabel("$I(X,Y)$")
    ax_normal.set_ylabel("$\\hat I(X,Y)$")
    
    ax_normal.minorticks_on()
    
    #ax_normal.set_yscale('log')
    #ax_normal.set_xscale('log')

    ax_normal.plot(MI, MI, label="$I(X,Y)$", color='red')
    
    ax_normal.plot(MI, estimated_MI_mean, label="$\\hat I(X,Y)$")
    ax_normal.fill_between(MI, estimated_MI_mean + estimated_MI_std, estimated_MI_mean - estimated_MI_std, alpha=0.2)
    
    ax_normal.plot(MI, estimated_MI_compressed_mean, label="$\\hat I_{compr}(X,Y)$")
    ax_normal.fill_between(MI, estimated_MI_compressed_mean + estimated_MI_compressed_std,
                           estimated_MI_compressed_mean - estimated_MI_compressed_std, alpha=0.2)

    ax_normal.legend(loc='upper left')

    ax_normal.set_xlim((0.0, None))
    ax_normal.set_ylim((0.0, None))

    plt.show();

### Global parameters

In [None]:
# The values of mutual information under study.
MI = np.linspace(0.0, 4.0, 41)
n_exps = len(MI)

# Sample size and dimensions of vectors X and Y.
n_samples = 5000

### Images of rectangles

In [None]:
from mutinfo.utils.synthetic import normal_to_uniform

In [None]:
X_dimension = 1
Y_dimension = 1
latent_dimension = 2

embedding_dimension = 32

experiments_dir = ('anti_PCA_%d' % (embedding_dimension))

#### Train the autoencoder

In [None]:
from scipy.stats import multivariate_normal

In [None]:
n_train_samples = 6000
n_test_samples  = 1000

In [None]:
def latent_transform(xi):
    Z = 2.0 * normal_to_uniform(multivariate_normal().rvs((xi.shape[0], 1)))[:,None] - 1.0
    X = 2.0 * normal_to_uniform(xi) - 1.0

    return np.concatenate((X, Z), axis=1)


koeffs = np.arange(0, embedding_dimension)[None,:]

def embedding_transform(latent_X):
    a = latent_X[:,0,None] + latent_X[:,1,None] * koeffs
    result = np.maximum(a, 0.1 * a * koeffs)
    result[:,0] = latent_X[:,0]
    result[:,1] = latent_X[:,1]
    
    return result

In [None]:
random_variable = multivariate_normal()

latent_X = latent_transform(random_variable.rvs((n_train_samples + n_test_samples, X_dimension))[:,None])
X = embedding_transform(latent_X)
X_train = X[0:n_train_samples]
X_test  = X[n_train_samples:n_train_samples + n_test_samples]

## PCA

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=latent_dimension).fit(X_train.reshape(X_train.shape[0], -1))
np.mean((pca.inverse_transform(pca.transform(X_test)) - X_test)**2)

In [None]:
PCA_latent = pca.transform(X_test)
plt.scatter(PCA_latent[:,0], PCA_latent[:,1])

In [None]:
estimated_MI, estimated_MI_compressed = perform_normal_compressed_tests_MI(MI,
    n_samples, X_dimension, Y_dimension, latent_transform, latent_transform,
    lambda x : pca.transform(embedding_transform(x)), lambda x : pca.transform(embedding_transform(x)), verbose=10)

In [None]:
plot_estimated_compressed_MI(MI, estimated_MI, estimated_MI_compressed, "Anti-PCA")

In [None]:
save_estimated_MI(MI, estimated_MI, experiments_dir + '/coordinates')
save_estimated_MI(MI, estimated_MI_compressed, experiments_dir + '/compressed/PCA')

## Autoencoder

In [None]:
import torch

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
#device = "cpu"
print("Device: " + device)
print(f"Devices count: {torch.cuda.device_count()}")

In [None]:
from mutinfo.torch.layers import AdditiveGaussianNoise

In [None]:
encoder = torch.nn.Sequential(
    #torch.nn.Linear(embedding_dimension, embedding_dimension),
    #torch.nn.LeakyReLU(),
    torch.nn.Linear(embedding_dimension, latent_dimension),
)

In [None]:
decoder = torch.nn.Sequential(
    AdditiveGaussianNoise(0.01, relative_scale=True, enabled_on_inference=False),
    torch.nn.Linear(latent_dimension, embedding_dimension),
    torch.nn.LeakyReLU(),
    torch.nn.Linear(embedding_dimension, embedding_dimension),
)

In [None]:
autoencoder = torch.nn.Sequential(encoder, decoder).to(device)

In [None]:
optim = torch.optim.Adam(autoencoder.parameters(), lr=1e-3)
loss = torch.nn.MSELoss()

In [None]:
dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.float32, device=device))
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1000, shuffle=True)

In [None]:
from tqdm import trange

n_epochs = 2000

for epoch in trange(n_epochs):
    for batch, in dataloader:
        optim.zero_grad()

        y = autoencoder(batch)
        loss_value = loss(batch, y)
        loss_value.backward()

        optim.step()
    
    if epoch % 100 == 0:
        print(loss_value.item())

In [None]:
encoder = encoder.eval().double()
decoder = decoder.eval().double()
autoencoder = autoencoder.eval().double()

In [None]:
x_train = torch.tensor(X_train, dtype=torch.float64, device=device)
x_test = torch.tensor(X_test, dtype=torch.float64, device=device)

In [None]:
torch.mean((autoencoder(x_test) - x_test)**2)

In [None]:
torch.mean((autoencoder(x_test)[:,0] - x_test[:,0])**2)

In [None]:
AE_latent = encoder(x_test).detach().cpu().numpy()
plt.scatter(AE_latent[:,0], AE_latent[:,1])

In [None]:
plt.scatter(X_test[:,0], AE_latent[:,0])

In [None]:
estimated_MI, estimated_MI_compressed = perform_normal_compressed_tests_MI(
    MI,
    n_samples, X_dimension, Y_dimension, latent_transform, latent_transform,
    lambda x : encoder(torch.tensor(embedding_transform(x), dtype=torch.float64, device=device)).detach().cpu().numpy(),
    lambda x : encoder(torch.tensor(embedding_transform(x), dtype=torch.float64, device=device)).detach().cpu().numpy(),
    #lambda x : autoencoder(torch.tensor(embedding_transform(x), dtype=torch.float64, device=device)).detach().cpu().numpy()[:,:2],
    #lambda x : autoencoder(torch.tensor(embedding_transform(x), dtype=torch.float64, device=device)).detach().cpu().numpy()[:,:2],
    verbose=10)

In [None]:
plot_estimated_compressed_MI(MI, estimated_MI, estimated_MI_compressed, "Anti-PCA")

In [None]:
save_estimated_MI(MI, estimated_MI, experiments_dir + '/coordinates')
save_estimated_MI(MI, estimated_MI_compressed, experiments_dir + '/compressed')