# Distribution Experiment

In [1]:
import sys, os
import warnings
import tqdm
import random
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

# Insert path to model directory,.
cwd = os.getcwd()
path = f"{cwd}/../../src"
sys.path.insert(0, path)

# toy datasets
from data.toy import RBIGData

# Experiments
from experiments.distributions import DistributionExp

# Kernel Dependency measure
from models.dependence import HSIC, train_rbf_hsic
from models.kernel import estimate_sigma, sigma_to_gamma, gamma_to_sigma, get_param_grid

# RBIG IT measures
from models.ite_algorithms import run_rbig_models

import scipy.io as scio

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

warnings.filterwarnings('ignore') # get rid of annoying warnings

%load_ext autoreload
%autoreload 2

In [2]:
path

'/home/emmanuel/projects/2019_hsic_align/notebooks/4_distributions/../../src'

## Datasets

* Samples - [500, 1K, 5K, 10K, 30K, 50K]
* Dimensions - [ 2, 3, 10, 50, 100]
* trials - [1,5]
* IT measures - [TC, H, MI, KLD]
* Distributions - [Linear, Gaussian, T-Student]

### Example - Gaussian Distribution

In [3]:
data_path = "/media/disk/erc/papers/2018_RBIG_IT_measures/2018_RBIG_IT_measures/reproducible_results/DATA/"
gauss_data = f"{data_path}MI_gaus/"
sample_data = "DATA_MI_gaus_nd_3_Ns_500_tryal_1.mat"

In [4]:
dat = scio.loadmat(f"{gauss_data}{sample_data}")

In [5]:
X, Y, mi_val = dat['X'], dat['Y'], dat['MI_ori_nats']

#### Using the Helper function

In [6]:
from typing import Optional

class MIData:
    """MI Data
    
    
    Dataset
    -------
    trials = 1:5
    samples = 50, 100, 500, 1_000, 5_000
    dimensions = 2, 3, 10, 50, 100
    std = 1:11
    nu = 1:9
    """
    def __init__(self, distribution: Optional['gauss'])-> None:
        
        self.distribution = distribution
        self.data_path = "/media/disk/erc/papers/2019_HSIC_ALIGN/data/mi_distributions/"
        
        if self.distribution == 'gauss':
            self.dist_path = f"{self.data_path}MI_gaus/"
        elif self.distribution == 'tstudent':
            self.dist_path = f"{self.data_path}MI_tstu/"
        else:
            raise ValueError(f"Unrecognized Dataset: {distribution}")
            
    def get_data(self, samples=50, dimensions=2, std=1, trial=1, nu=1):
        
        if self.distribution == 'gauss':
            dat = scio.loadmat(
                f"{self.dist_path}DATA_MI_gaus_nd_{dimensions}_"
                f"Ns_{samples}_std_{std}_tryal_{trial}.mat"
            )
            
            return dat['X'], dat['Y'], float(dat['MI_ori_nats'][0][0])
        elif self.distribution == 'tstudent':
            dat = scio.loadmat(
                f"{self.dist_path}DATA_MI_tstu_nd_{dimensions}_"
                f"Ns_{samples}_tryal_{trial}_nu_{nu}.mat"
            )
            
            return dat['X'], dat['Y'], float(dat['MI_ori_nats'][0][0])
        else:
            raise ValueError(f"Unrecognized distribution '{self.distribution}'")
            

In [7]:
itera = {'1': 'a', '2': 'b'}

In [8]:
for iitera in itera.items():
    print(iitera[0], iitera[1])

1 a
2 b


In [9]:
dataset = 'tstudent'

mi_loader = MIData('tstudent')

x, y, mi = mi_loader.get_data()

In [6]:
SAVE_PATH = "/home/emmanuel/projects/2019_hsic_align/results/hsic/"

clf_exp = DistributionExp(
    seed=123,
    factor=1,
    sigma_est='median',
    n_gamma=10,
    save_path=SAVE_PATH,
    save_name='dist_v2_belkin',
)

# run full experiment
clf_exp.run_experiment()

Function: gauss


KeyboardInterrupt: 