### Custom embedder for parametric UMAP. 
This notebook shows you how to run a UMAP projection with a custom embedder. 

In [1]:
%env XLA_FLAGS=--xla_gpu_cuda_data_dir=/opt/conda/envs/umap/lib/

env: XLA_FLAGS=--xla_gpu_cuda_data_dir=/opt/conda/envs/umap/lib/


In [2]:
import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


### load data

In [3]:
from tensorflow.keras.datasets import mnist
(train_images, Y_train), (test_images, Y_test) = mnist.load_data()
train_images = train_images.reshape((train_images.shape[0], -1))/255.
test_images = test_images.reshape((test_images.shape[0], -1))/255.

### define the encoder network

In [4]:
import tensorflow as tf
dims = (28,28, 1)
n_components = 2
encoder = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=dims),
    tf.keras.layers.Conv2D(
        filters=64, kernel_size=3, strides=(2, 2), activation="relu", padding="same"
    ),
    tf.keras.layers.Conv2D(
        filters=128, kernel_size=3, strides=(2, 2), activation="relu", padding="same"
    ),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=512, activation="relu"),
    tf.keras.layers.Dense(units=512, activation="relu"),
    tf.keras.layers.Dense(units=n_components),
])
encoder.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 14, 14, 64)        640       
                                                                 
 conv2d_1 (Conv2D)           (None, 7, 7, 128)         73856     
                                                                 
 flatten (Flatten)           (None, 6272)              0         
                                                                 
 dense (Dense)               (None, 512)               3211776   
                                                                 
 dense_1 (Dense)             (None, 512)               262656    
                                                                 
 dense_2 (Dense)             (None, 2)                 1026      
                                                                 
Total params: 3549954 (13.54 MB)
Trainable params: 35499

### create parametric umap model

In [5]:
from umap.parametric_umap import ParametricUMAP

  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()


In [6]:
embedder = ParametricUMAP(encoder=encoder, dims=dims, n_components=n_components, n_training_epochs=1, verbose=True)

In [None]:
embedding = embedder.fit_transform(train_images)

ParametricUMAP(dims=(28, 28, 1), optimizer=<keras.src.optimizers.adam.Adam object at 0x7fe47c324ca0>)
Wed Aug 16 01:19:16 2023 Construct fuzzy simplicial set
Wed Aug 16 01:19:16 2023 Finding Nearest Neighbors
Wed Aug 16 01:19:16 2023 Building RP forest with 17 trees
Wed Aug 16 01:19:20 2023 NN descent for 16 iterations
	 1  /  16
	 2  /  16
	 3  /  16
	 4  /  16
	Stopping threshold met -- exiting after 4 iterations
Wed Aug 16 01:19:37 2023 Finished Nearest Neighbor Search
Wed Aug 16 01:19:40 2023 Construct embedding
Epoch 1/10


### plot results

In [None]:
embedding = embedder.embedding_

In [None]:
import matplotlib.pyplot as plt

In [None]:
fig, ax = plt.subplots( figsize=(8, 8))
sc = ax.scatter(
    embedding[:, 0],
    embedding[:, 1],
    c=Y_train.astype(int),
    cmap="tab10",
    s=0.1,
    alpha=0.5,
    rasterized=True,
)
ax.axis('equal')
ax.set_title("UMAP in Tensorflow embedding", fontsize=20)
plt.colorbar(sc, ax=ax);

### plotting loss

In [None]:
embedder._history.keys()

In [None]:
fig, ax = plt.subplots()
ax.plot(embedder._history['loss'])
ax.set_ylabel('Cross Entropy')
ax.set_xlabel('Epoch')