In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, Model, Input
from tensorflow.keras.datasets import mnist

2024-07-21 01:48:32.122724: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Load the MNIST dataset
(x_train, _), (x_test, _) = mnist.load_data()
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = np.reshape(x_train, (len(x_train), -1))
x_test = np.reshape(x_test, (len(x_test), -1))

# Combine train and test data
data = np.concatenate((x_train, x_test), axis=0)

# Define the dimensions
original_dim = data.shape[1]
latent_dim = 3


In [3]:
# Encoder
inputs = Input(shape=(original_dim,))
h = layers.Dense(64, activation='relu')(inputs)
h = layers.Dense(32, activation='relu')(h)
z_mean = layers.Dense(latent_dim)(h)
z_log_var = layers.Dense(latent_dim)(h)

# Sampling function
def sampling(args):
    z_mean, z_log_var = args
    batch = tf.shape(z_mean)[0]
    dim = tf.shape(z_mean)[1]
    epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon

# Latent space
z = layers.Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])

2024-07-21 01:48:34.331004: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
# Decoder
decoder_h = layers.Dense(32, activation='relu')
decoder_h2 = layers.Dense(64, activation='relu')
decoder_mean = layers.Dense(original_dim, activation='sigmoid')

h_decoded = decoder_h(z)
h_decoded2 = decoder_h2(h_decoded)
x_decoded_mean = decoder_mean(h_decoded2)

# Define the VAE model
vae = Model(inputs, x_decoded_mean)

In [5]:
# Define the loss
reconstruction_loss = tf.keras.losses.mean_squared_error(inputs, x_decoded_mean)
reconstruction_loss *= original_dim
kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
kl_loss = tf.reduce_sum(kl_loss, axis=-1)
kl_loss *= -0.5
vae_loss = tf.reduce_mean(reconstruction_loss + kl_loss)

vae.add_loss(vae_loss)
vae.compile(optimizer='adam')

# Train the VAE
vae.fit(data, data, epochs=50, batch_size=32, validation_split=0.2)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7fd97890a460>

In [6]:
# Encoder model to transform data to latent space
encoder = Model(inputs, z_mean)

# Transform the dataset
latent_representations = encoder.predict(data)

# Convert the latent representations to a pandas DataFrame
latent_df = pd.DataFrame(latent_representations, columns=['x', 'y', 'z'])

# Display the first few rows of the DataFrame
print(latent_df.head())

          x         y         z
0  0.340344  0.898225 -0.130393
1  1.119578  1.195147 -0.598310
2  1.328761 -1.318005  0.280387
3 -1.772535  0.642607 -0.323692
4 -0.227911 -0.933447  0.059159


In [7]:
from depth.multivariate import *

In [8]:
combined_data = latent_df.to_numpy()
combined_x = latent_df['x'].to_numpy()
print(combined_x.shape, combined_data.shape)

(70000,) (70000, 3)


In [14]:
halfspace_depths = halfspace(combined_x[:1000], combined_data[:1000])

In [15]:
print(halfspace_depths)

[0.054 0.044 0.255 0.324 0.059 0.088 0.031 0.    0.129 0.008 0.063 0.082
 0.23  0.065 0.127 0.302 0.283 0.01  0.01  0.006 0.003 0.062 0.    0.02
 0.098 0.    0.016 0.076 0.046 0.049 0.028 0.089 0.172 0.063 0.063 0.05
 0.    0.003 0.006 0.027 0.    0.012 0.11  0.243 0.136 0.245 0.035 0.043
 0.271 0.089 0.077 0.141 0.001 0.1   0.058 0.158 0.126 0.212 0.094 0.082
 0.096 0.064 0.088 0.322 0.023 0.144 0.132 0.071 0.    0.    0.039 0.224
 0.078 0.041 0.004 0.279 0.    0.085 0.082 0.257 0.04  0.087 0.005 0.
 0.098 0.363 0.    0.057 0.099 0.    0.032 0.328 0.044 0.153 0.002 0.198
 0.011 0.047 0.    0.101 0.083 0.012 0.002 0.004 0.138 0.045 0.07  0.092
 0.071 0.327 0.162 0.228 0.089 0.243 0.23  0.088 0.035 0.002 0.05  0.024
 0.207 0.154 0.066 0.104 0.353 0.094 0.035 0.157 0.068 0.031 0.163 0.12
 0.005 0.054 0.189 0.171 0.06  0.071 0.104 0.223 0.141 0.152 0.132 0.01
 0.216 0.009 0.    0.012 0.268 0.009 0.005 0.    0.025 0.291 0.    0.
 0.136 0.013 0.003 0.15  0.282 0.    0.006 0.    0.007 0.    

In [16]:
halfspace_depths = halfspace(combined_x[:2000], combined_data[:2000])