In [6]:
from MINE.cmi.generator import knn
from MINE.gtm import GTM
from MINE.mi.MineClassif import MineClassif
import tensorflow as tf
from tensorflow.keras import layers, Input
from tensorflow.keras.models import Model
import pickle
import pandas as pd
from tqdm import tqdm

import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = 20, 15

SMALL_SIZE = 10
MEDIUM_SIZE = 20
BIGGER_SIZE = 30

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

In [7]:
def construct_model2(input_x_shape, input_y_shape, output, hidden=64):
    inp_x = Input((input_x_shape,))
    inp_y = Input((input_y_shape,))

    x = layers.Concatenate()([inp_x, inp_y])
    x = layers.Dense(hidden, activation='tanh')(x)
    x = layers.Dense(1, activation=output)(x)

    model = Model(inputs=[inp_x, inp_y], outputs=x)
    return model

def calculate_mi(X, Y):
    model = construct_model2(X.shape[1], Y.shape[1], 'sigmoid')
    mine = MineClassif(model)
    mine.compile(optimizer='adam', loss='binary_crossentropy')
    es = tf.keras.callbacks.EarlyStopping(patience=10, monitor='loss', mode='min', restore_best_weights=True)
    history = mine.fit(X, Y, batch_size=256, epochs=1000, callbacks=[es], verbose=0)
    return mine.estimate_MI(X, Y, n_shuffles=30)

def sampling(X, Y, bootstrap, avg, k, c):
    X_, Y_, Z_ = knn(X[:, [c]], Y.reshape(-1, 1), X[:, :c], k, bootstrap, avg)
    Y_ = tf.cast(Y_, 'float64')
#     YZ_ = tf.concat((Y_, Z_), axis=1)
    return Y_, X_

In [8]:
result = []
tf.random.set_seed(77)
for i in tqdm(range(100)):
    gtm = GTM(12, 0.9)
    X, Y = gtm.generate(256)
    for c in range(1, 11):
        Y_, X_ = sampling(X, Y, False, False, k=2, c=c)
        row = [i, 'original', 2, c, calculate_mi(Y_, X_)]
        result.append(row)
        
        Y_, X_ = sampling(X, Y, False, False, k=10, c=c)
        row = [i, 'original', 10, c, calculate_mi(Y_, X_)]
        result.append(row)

        Y_, X_ = sampling(X, Y, True, False, k=2, c=c)
        row = [i, 'bootstrap', 2, c, calculate_mi(Y_, X_)]
        result.append(row)
        
        Y_, X_ = sampling(X, Y, True, False, k=10, c=c)
        row = [i, 'bootstrap', 10, c, calculate_mi(Y_, X_)]
        result.append(row)

        Y_, X_ = sampling(X, Y, True, True, k=2, c=c)
        row = [i, 'bootstrap_avg', 2, c, calculate_mi(Y_, X_)]
        result.append(row)
        
        Y_, X_ = sampling(X, Y, True, True, k=10, c=c)
        row = [i, 'bootstrap_avg', 10, c, calculate_mi(Y_, X_)]
        result.append(row)

100%|██████████████████████████████████████████████████████████████████████████████| 100/100 [2:00:16<00:00, 72.17s/it]


In [None]:
result = pd.DataFrame(result, columns=['iter', 'method', 'k', 'c', 'mi'])

In [13]:
with open('clean_results/independence.pkl', 'wb') as fd:
    pickle.dump(result, fd)

In [12]:
result

Unnamed: 0,iter,method,k,c,mi
0,0,original,2,1,0.002601
1,0,original,10,1,0.001904
2,0,bootstrap,2,1,0.017310
3,0,bootstrap,10,1,0.029027
4,0,bootstrap_avg,2,1,-0.002017
...,...,...,...,...,...
5995,99,original,10,10,0.005210
5996,99,bootstrap,2,10,0.000251
5997,99,bootstrap,10,10,0.024368
5998,99,bootstrap_avg,2,10,0.001542
