In [31]:
from rec.models.mnist_vae import MNISTVAE, MNISTVampVAE

import os

import tensorflow as tf
tfl = tf.keras.layers

import tensorflow_probability as tfp
tfd = tfp.distributions

import tensorflow_datasets as tfds

import numpy as np
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm, trange

data_save_dir = "../experimental_data/data_distributions/"

In [2]:
# Taken from https://github.com/tensorflow/tensorflow/issues/31135#issuecomment-516526113
# Set CPU as available physical device
tf.config.experimental.set_visible_devices([], 'GPU')

# MNIST Experiments

In [13]:
mnist_gauss_save_dir = "../../../models/relative-entropy-coding/empirical-bayes-experiments/mnist/gaussian"

In [6]:
# Standard Gaussian VAE
mnist_gauss_vae = MNISTVAE(name="gaussian_mnist_vae", 
                           prior=tfd.Normal(loc=tf.zeros(50), scale=tf.ones(50)))

In [14]:
ckpt = tf.train.Checkpoint(model=mnist_gauss_vae)
    
if not os.path.exists(mnist_gauss_save_dir):
    print(f"{mnist_gauss_save_dir} has not been trained yet!")

manager = tf.train.CheckpointManager(ckpt, mnist_gauss_save_dir, max_to_keep=3)

mnist_gauss_vae(tf.zeros([1, 28, 28, 1]))
ckpt.restore(manager.latest_checkpoint)

if manager.latest_checkpoint:
    print(f"Restored {manager.latest_checkpoint}")

Restored ../../../models/relative-entropy-coding/empirical-bayes-experiments/mnist/gaussian/ckpt-287


## Load MNIST and pass it through every model

In [22]:
dataset = tfds.load("binarized_mnist",
                    data_dir="/scratch/gf332/datasets/binarized_mnist",
                   with_info=True,)

In [37]:
beta = 1
latent_size = 50

model = mnist_gauss_vae

for ds_folder in ["train", "test"]:
    
    print(f"Saving {ds_folder} set!")
    
    ds = dataset[0][ds_folder]
    ds = ds.map(lambda x: tf.cast(x["image"], tf.float32))
    
    for i, img in tqdm(enumerate(ds), total=dataset[1].splits[ds_folder].num_examples):
        
        save_dir = f"{data_save_dir}/mnist/beta_{beta}_latents_{latent_size}/{ds_folder}/img_{i}"
        
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        
        reconstruction = model(img[None, ...], training=True)[0,...,0]

        samples = model.posterior.sample()

        np.save(f"{save_dir}/post_loc.npy", model.posterior.loc.numpy())
        np.save(f"{save_dir}/post_scale.npy", model.posterior.scale.numpy())

        np.save(f"{save_dir}/prior_loc.npy", model.prior.loc.numpy())
        np.save(f"{save_dir}/prior_scale.npy", model.prior.scale.numpy())

        prior_prob = model.prior.log_prob(samples)
        prior_prob = tf.reduce_sum(prior_prob, axis=1)

Saving train set!


HBox(children=(FloatProgress(value=0.0, max=50000.0), HTML(value='')))


Saving test set!


HBox(children=(FloatProgress(value=0.0, max=10000.0), HTML(value='')))




10000