In [13]:
import numpy as np
import pandas as pd

np.random.seed(0)

# Generate Gaussian hidden factors and store in a dataframe
hidden = np.random.normal(0, 1, size=(2, 5000))
data = pd.DataFrame(hidden.T, columns=['h_0', 'h_1'])

data.head()

Unnamed: 0,h_0,h_1
0,1.764052,0.309724
1,0.400157,-0.737456
2,0.978738,-1.53692
3,2.240893,-0.562255
4,1.867558,-1.599511


In [14]:
# We now create linear combinations of h_0 and h_1 as our observations
data['x_00'] = data['h_0'] + data['h_1']
data['x_01'] = data['h_0']

# A second set of observations has non-linearities
data['x_10'] = data['h_0'] + data['h_1']
data['x_11'] = np.tanh(data['h_0'])

# A third set of observations also increases the magnitude of observations
data['x_20'] = 10 * data['h_0'] + 10 * data['h_1']
data['x_21'] = np.tanh(10 * data['h_0'])

# A fourth set of observations changes the non-linearity to relu
data['x_30'] = data['h_0'] + data['h_1']
data['x_31'] = np.maximum(0, data['h_0'])

data.head()

Unnamed: 0,h_0,h_1,x_00,x_01,x_10,x_11,x_20,x_21,x_30,x_31
0,1.764052,0.309724,2.073776,1.764052,2.073776,0.942954,20.737762,1.0,2.073776,1.764052
1,0.400157,-0.737456,-0.337299,0.400157,-0.337299,0.380083,-3.37299,0.999331,-0.337299,0.400157
2,0.978738,-1.53692,-0.558182,0.978738,-0.558182,0.752519,-5.581819,1.0,-0.558182,0.978738
3,2.240893,-0.562255,1.678638,2.240893,1.678638,0.977627,16.786384,1.0,1.678638,2.240893
4,1.867558,-1.599511,0.268047,1.867558,0.268047,0.953372,2.680469,1.0,0.268047,1.867558


In [15]:
from sklearn.decomposition import PCA
from representation_learning.metrics import mutual_information_gap

# Get the disentanglement coefficients given a set of x variables
def disentanglement_coefficients(df, x_variables):
    coefficients = []
    for method in [PCA]:
        latent = create_latent_space(df, x_variables, method)
        disentanglement = mutual_information_gap(latent, ['h_0', 'h_1'], ['z_0', 'z_1'])
        coefficients.append(disentanglement)
    return coefficients

# Get latent representations for a given method
def create_latent_space(df, x_variables, latent_method):
    latent = latent_method(n_components=2, random_state=0).fit_transform(df[x_variables])
    latent = pd.DataFrame(latent, columns=['z_0', 'z_1'])
    latent = pd.concat([df, latent], axis=1)
    return latent

# We now create latent spaces for each set of observations
results = []
for i in range(4):
    observations = [f'x_{i}0', f'x_{i}1']
    results.append(disentanglement_coefficients(data, observations))

results

[[0.48474679726528613],
 [0.43937641535105953],
 [0.3611143831513175],
 [0.13680550211115206]]