In [1]:
%%capture

from lmi import lmi
import time
import os

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import rcParams
from tqdm.notebook import tqdm
import torch
from scipy.stats import kendalltau

os.environ["XLA_PYTHON_CLIENT_PREALLOCATE"]="false"
os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"]=".10"
os.environ["XLA_PYTHON_CLIENT_ALLOCATOR"]="platform"
from bmi.estimators import MINEEstimator as MINE
from bmi.estimators import InfoNCEEstimator as InfoNCE

torch.manual_seed(2121)
np.random.seed(2121)

In [2]:
def generate_gaussian_dataset(ambient, intrinsic, nuisance, antidiag, samples=10**3):
    
    assert (intrinsic*2)+nuisance <= ambient, "Dimensionality not adding up"
    
    X_nuisance = np.random.normal(size=(samples, nuisance))
    Y_nuisance = np.random.normal(size=(samples, nuisance))


    cov = np.array([[6, antidiag], [antidiag, 3.5]])
    
    pts = [np.random.multivariate_normal([0, 0], cov, 
                                         size=samples) for i in range(intrinsic)]
    
    pts = np.hstack(pts)
    
    # one copy of the intrinsic dimensions
    Xs = pts[:, [i for i in range(0, 2*intrinsic, 2)]]
    Ys = pts[:, [i for i in range(1, 2*intrinsic, 2)]]
    
    # then randomly sample them to make up the rest of the dimensions
    X_redundant = pts[:, np.random.choice(range(0, 2*intrinsic, 2), 
                                   size = (ambient-(intrinsic+nuisance)))]
    
    Y_redundant = pts[:, np.random.choice(range(1, 2*intrinsic, 2), 
                               size = (ambient-(intrinsic+nuisance)))]
    
    Xs = np.hstack((Xs, X_redundant, X_nuisance))
    Ys = np.hstack((Ys, Y_redundant, Y_nuisance))
    
    return Xs, Ys

def mi_from_rho(rho, intrinsic):
    return -0.5*np.log2((1-(rho/(np.sqrt(6*3.5)))**2))*intrinsic

def rho_from_mi(mi, intrinsic):
    return np.sqrt(6*3.5) * np.sqrt(1 - 2**(-2*mi/intrinsic))

In [3]:
ambient = 1000
intrinsic = 4
nuisance = (ambient - intrinsic)//2
true_mi = 1
antidiag = rho_from_mi(true_mi, intrinsic)
N_samples = 5*10**3


d = {
    "Estimator" : [],
    "Estimate" : [],
    "True MI" : []
}

for trials in range(1):
    
    Xs, Ys = generate_gaussian_dataset(ambient, intrinsic, 
                                       nuisance, 
                                       antidiag,
                                      samples=N_samples)
    for k in range(2, 9, 2):

        d['Estimator'].append('LMI-%d'%k)
        d['Estimate'].append(np.nanmean(lmi.lmi(Xs, Ys, N_dims=k)[0]))
        d['True MI'].append(true_mi)

    infonce = InfoNCE(verbose=False)
    infonce_mi = infonce.estimate(Xs, Ys)/np.log(2)

    mine = MINE(verbose=False)
    mine_mi = mine.estimate(Xs, Ys)/np.log(2)

    d['Estimator'].append("MINE")
    d['Estimate'].append(mine_mi)
    d['True MI'].append(true_mi)

    d['Estimator'].append("InfoNCE")
    d['Estimate'].append(infonce_mi)
    d['True MI'].append(true_mi)
    

In [4]:
df = pd.DataFrame(d)

In [5]:
df

Unnamed: 0,Estimator,Estimate,True MI
0,LMI-2,0.295732,1
1,LMI-4,0.719762,1
2,LMI-6,0.670632,1
3,LMI-8,0.686974,1
4,MINE,-1e-06,1
5,InfoNCE,-0.001417,1
