In [1]:
%%capture

import time
import os

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import rcParams
from tqdm.notebook import tqdm
import torch

os.environ["XLA_PYTHON_CLIENT_PREALLOCATE"]="false"
os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"]=".10"
os.environ["XLA_PYTHON_CLIENT_ALLOCATOR"]="platform"
from bmi.estimators import MINEEstimator as MINE
from bmi.estimators import InfoNCEEstimator as InfoNCE

torch.manual_seed(2121)
np.random.seed(2121)

In [2]:
def generate_gaussian_dataset(ambient, intrinsic, nuisance, antidiag, samples=10**3):
    
    assert intrinsic+nuisance <= ambient, "Dimensionality not adding up"
    
    X_nuisance = np.random.normal(size=(samples, nuisance))
    Y_nuisance = np.random.normal(size=(samples, nuisance))


    cov = np.array([[6, antidiag], [antidiag, 3.5]])
    
    pts = [np.random.multivariate_normal([0, 0], cov, 
                                         size=samples) for i in range(intrinsic)]
    
    pts = np.hstack(pts)
    
    # one copy of the intrinsic dimensions
    Xs = pts[:, [i for i in range(0, 2*intrinsic, 2)]]
    Ys = pts[:, [i for i in range(1, 2*intrinsic, 2)]]
    
    # then randomly sample them to make up the rest of the dimensions
    X_redundant = pts[:, np.random.choice(range(0, 2*intrinsic, 2), 
                                   size = (ambient-(intrinsic+nuisance)))]
    
    Y_redundant = pts[:, np.random.choice(range(1, 2*intrinsic, 2), 
                               size = (ambient-(intrinsic+nuisance)))]
    
    Xs = np.hstack((Xs, X_redundant, X_nuisance))
    Ys = np.hstack((Ys, Y_redundant, Y_nuisance))
    
    return Xs, Ys

def mi_from_rho(rho, intrinsic):
    return -0.5*np.log2((1-(rho/(np.sqrt(6*3.5)))**2))*intrinsic

def rho_from_mi(mi, intrinsic):
    return np.sqrt(6*3.5) * np.sqrt(1 - 2**(-2*mi/intrinsic))

In [3]:
N_samples = 5000
intrinsic = 1
true_mi = 1
anti_diag = rho_from_mi(true_mi, intrinsic)


smi_d = {
    "Estimate" : [],
    "True MI" : [],
    "Dimensions" : [],
    "Max layer size" : [],
    "Measure" : [],
    "Time" : [],
    }

for L in tqdm(range(4, 11, 2)):

    for ambient in tqdm([10, 100, 1000], leave=False, desc='ambient'):

        nuisance = (ambient - intrinsic)//2

        Xs, Ys = generate_gaussian_dataset(ambient, intrinsic, 
                                       nuisance, 
                                       anti_diag,
                                      samples=N_samples)

        L1 = 2**L
        L2 = L1//2
    
        t0 = time.time()
        mine = MINE(verbose=False, hidden_layers=(L1, L2))
        mine_mi = mine.estimate(Xs, Ys) # in nats
        t1 = time.time()
        
        smi_d["Estimate"].append(mine_mi/np.log(2))
        smi_d['Dimensions'].append(ambient)
        smi_d["True MI"].append(1)
        smi_d["Max layer size"].append(L1)
        smi_d["Measure"].append("MINE")
        smi_d["Time"].append(t1-t0)
    
        t0 = time.time()
        infonce = InfoNCE(verbose=False, hidden_layers=(L1, L2))
        infonce_mi = infonce.estimate(Xs, Ys)
        t1 = time.time()
        
        smi_d["Estimate"].append(infonce_mi/np.log(2))
        smi_d['Dimensions'].append(ambient)
        smi_d["True MI"].append(1)
        smi_d["Max layer size"].append(L1)
        smi_d["Measure"].append("InfoNCE")
        smi_d["Time"].append(t1-t0)

  0%|          | 0/4 [00:00<?, ?it/s]

ambient:   0%|          | 0/3 [00:00<?, ?it/s]

ambient:   0%|          | 0/3 [00:00<?, ?it/s]

ambient:   0%|          | 0/3 [00:00<?, ?it/s]

ambient:   0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
pd.DataFrame(smi_d).to_csv("../results/A_critic.csv")