In [9]:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Concatenate
import tensorflow_probability as tfp
import numpy as np
import os 
import importlib
import logging
from tqdm import tqdm

importlib.reload(logging)
logging.basicConfig(level = logging.INFO)

# limit GPU memory
gpus = tf.config.experimental.list_physical_devices('GPU')
try:
    tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
    tf.config.experimental.set_virtual_device_configuration(
    gpus[0],
    [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=10000)])
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
except RuntimeError as e:
# Visible devices must be set before GPUs have been initialized
    print(e)

1 Physical GPUs, 1 Logical GPU


In [2]:
training_data = np.load("./nsi_data/sample_nsi_regression_1e7_v1.npz")
data_all = np.column_stack([training_data['ve_dune'][:,:36], training_data['vu_dune'][:,:36], training_data['vebar_dune'][:,:36], training_data['vubar_dune'][:,:36]])

# theta13, theta23, delta, mumu, emu, etau
target = np.column_stack([training_data["theta13"]/180*np.pi, training_data["theta23"]/180*np.pi,
                          np.sin(training_data["delta"]/180*np.pi), np.cos(training_data["delta"]/180*np.pi),
                         training_data["mumu"], training_data["emu"],training_data["etau"]])

x_train = data_all
y_train = target
x_train_poisson = np.random.poisson(x_train)/1000

Encoder 1 (parameter + spectrum)

In [10]:
def encoder1(guassian_number, latent_dim, latent_dim_2, x_parameter_node, x_spectrum_node, x_latent_node):
    encoder_parameter_inputs = layers.Input(shape=(len(y_train[0]),),name = 'encoder_parameter_inputs')
    x_parameter = layers.Dense(x_parameter_node[0], activation="relu", name = 'dense_parameter_1')(encoder_parameter_inputs)
    x_parameter = layers.Dense(x_parameter_node[1], activation="relu", name = 'dense_parameter_2')(x_parameter)
    x_parameter = layers.Dense(x_parameter_node[2], activation="relu", name = 'dense_parameter_3')(x_parameter)

    encoder_spectrum_inputs = layers.Input(shape=(144),name = 'encoder_spectrum_inputs')
    x_spectrum = layers.Dense(x_spectrum_node[0], activation="relu", name = 'dense_spectrum_1')(encoder_spectrum_inputs)
    x_spectrum = layers.Dense(x_spectrum_node[1], activation="relu", name = 'dense_spectrum_2')(x_spectrum)
    x_spectrum = layers.Dense(x_spectrum_node[2], activation="relu", name = 'dense_spectrum_3')(x_spectrum)

    mergedOut_Encoder_1 = Concatenate()([x_parameter,x_spectrum])

    z_mean = layers.Dense(latent_dim, name="z_mean")(mergedOut_Encoder_1)
    z_log_var = layers.Dense(latent_dim, name="z_log_var")(mergedOut_Encoder_1)

    return keras.Model([encoder_parameter_inputs, encoder_spectrum_inputs], [z_mean, z_log_var], name="encoder_1")

Encoder2 (spectrum)

In [11]:
def encoder2(guassian_number, latent_dim, latent_dim_2, x_parameter_node, x_spectrum_node, x_latent_node):
    encoder_spectrum_inputs = layers.Input(shape=(len(x_train[0]),),name = 'encoder_spectrum_inputs')
    x_spectrum = layers.Dense(x_spectrum_node[0], activation="relu", name = 'dense_spectrum_1')(encoder_spectrum_inputs)
    x_spectrum = layers.Dense(x_spectrum_node[1], activation="relu", name = 'dense_spectrum_2')(x_spectrum)
    x_spectrum = layers.Dense(x_spectrum_node[2], activation="relu", name = 'dense_spectrum_3')(x_spectrum)

    z_mean = layers.Dense(guassian_number*latent_dim, name="z_mean")(x_spectrum)
    z_log_var = layers.Dense(guassian_number*latent_dim, name="z_log_var")(x_spectrum)
    z_weight = layers.Dense(guassian_number, name="z_weight")(x_spectrum)

    return keras.Model(encoder_spectrum_inputs, [z_mean, z_log_var, z_weight], name="encoder_2")

Decoder (latent + spectrum)

In [12]:
def decoder(guassian_number, latent_dim, latent_dim_2, x_parameter_node, x_spectrum_node, x_latent_node):
    decoder_latent_inputs = keras.Input(shape=(latent_dim,),name = 'decoder_latent_inputs')
    x_latent = layers.Dense(x_latent_node[0], activation="relu", name = 'dense_1')(decoder_latent_inputs)
    x_latent = layers.Dense(x_latent_node[1], activation="relu", name = 'dense_2')(x_latent)
    x_latent = layers.Dense(x_latent_node[2], activation="relu", name = 'dense_3')(x_latent)

    # spectrum
    decoder_spectrum_inputs = layers.Input(shape=(144,),name = 'decoder_spectrum_inputs')
    x_spectrum = layers.Dense(x_spectrum_node[0], activation="relu", name = 'dense_spectrum_1')(decoder_spectrum_inputs)
    x_spectrum = layers.Dense(x_spectrum_node[1], activation="relu", name = 'dense_spectrum_2')(x_spectrum)
    x_spectrum = layers.Dense(x_spectrum_node[2], activation="relu", name = 'dense_spectrum_3')(x_spectrum)

    mergedOut_Decoder = Concatenate()([x_latent,x_spectrum])

    z2_mean = layers.Dense(latent_dim_2, name="z_mean")(mergedOut_Decoder)
    z2_log_var = layers.Dense(latent_dim_2, name="z_log_var")(mergedOut_Decoder)

    return keras.Model([decoder_latent_inputs, decoder_spectrum_inputs], [z2_mean, z2_log_var], name="decoder")

In [16]:
class CVAE(keras.Model):
    def __init__(self, guassian_number, latent_dim, latent_dim_2, x_parameter_node, x_spectrum_node, x_latent_node, kl_scaling, **kwargs):
        super(CVAE, self).__init__(**kwargs)
        self.encoder1 = encoder1(guassian_number, latent_dim, latent_dim_2, x_parameter_node, x_spectrum_node, x_latent_node)
        self.encoder2 = encoder2(guassian_number, latent_dim, latent_dim_2, x_parameter_node, x_spectrum_node, x_latent_node)
        self.decoder = decoder(guassian_number, latent_dim, latent_dim_2, x_parameter_node, x_spectrum_node, x_latent_node)
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(name="reconstruction_loss")
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
                ]

    def train_step(self, data):
        x, y = data
        with tf.GradientTape() as tape:
            SMALL_CONSTANT = 1e-12
            
            z1_mean, z1_log_var = self.encoder1(x)
            
            temp_var_q = SMALL_CONSTANT + tf.exp(z1_log_var)
            mvn_q = tfp.distributions.MultivariateNormalDiag(
                          loc=z1_mean,
                          scale_diag=tf.sqrt(temp_var_q))
            
            z1 = mvn_q.sample()
            
            z2_mean, z2_log_var, z2_weight = self.encoder2(x[1])

            z2_mean = tf.reshape(z2_mean, (-1, guassian_number, latent_dim))
            z2_log_var = tf.reshape(z2_log_var, (-1, guassian_number, latent_dim))
            z2_weight = tf.reshape(z2_weight, (-1, guassian_number))

            temp_var_r1 = SMALL_CONSTANT + tf.exp(z2_log_var)
            bimix_gauss = tfp.distributions.MixtureSameFamily(
                          mixture_distribution=tfp.distributions.Categorical(logits=z2_weight),
                          components_distribution=tfp.distributions.MultivariateNormalDiag(
                          loc=z2_mean,
                          scale_diag=tf.sqrt(temp_var_r1)))
            
            reconstruction_mean, reconstruction_var = self.decoder([z1, x[1]])     
            
            temp_var_r2 = SMALL_CONSTANT + tf.exp(reconstruction_var)
            reconstruction_parameter = tfp.distributions.MultivariateNormalDiag(
                                     loc=reconstruction_mean,
                                     scale_diag= tf.sqrt(temp_var_r2))

            kl_loss = tf.reduce_mean(mvn_q.log_prob(z1) - bimix_gauss.log_prob(z1))*kl_scaling
            reconstruction_loss = -1.0*tf.reduce_mean(reconstruction_parameter.log_prob(y))
            
            total_loss = reconstruction_loss + kl_loss
            
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

Model Building, Train, and Save

In [17]:
guassian_number = 100
latent_dim = 100
latent_dim_2 = len(y_train[0])
x_parameter_node = [256, 256, 256]
x_spectrum_node = x_parameter_node
x_latent_node = x_parameter_node
kl_scaling = 1e+4
lr = 0.001
# x_spectrum_node = [256, 64, 16]
# x_latent_node = [256, 64, 16]

In [18]:
cvae = CVAE(guassian_number, latent_dim, latent_dim_2, x_parameter_node, x_spectrum_node, x_latent_node, kl_scaling)
cvae.compile(optimizer=keras.optimizers.Adam(lr=lr))
cvae_model_index = 1
while os.path.isdir("./tb_log/model_{}".format(cvae_model_index)): cvae_model_index += 1
os.mkdir("./tb_log/model_{}".format(cvae_model_index))
with open("./tb_log/model_{}/model_info.txt".format(cvae_model_index), 'w') as f:
    f.writelines('guassian_number = {}\n'.format(guassian_number))
    f.writelines('latent_dim = {}\n'.format(latent_dim))
    f.writelines('x_parameter_node = {}\n'.format(x_parameter_node))
    f.writelines('x_spectrum_node = {}\n'.format(x_spectrum_node))
    f.writelines('x_latent_node = {}\n'.format(x_latent_node))
    f.writelines('kl_scaling = {}\n'.format(kl_scaling))
    f.writelines('lr = {}\n'.format(lr))
tensorboard_callback = keras.callbacks.TensorBoard(
    log_dir = "./tb_log/model_{}".format(cvae_model_index), histogram_freq = 1)
cvae.fit(x = [y_train, x_train_poisson],
            y = y_train,
            batch_size=1000,
            epochs=100,
            verbose=1,
            shuffle=True,
            callbacks=[tensorboard_callback]
)

path = "./CVAE/model_{}/".format(cvae_model_index)
cvae.encoder1.save(path + "encoder_1.h5")
cvae.encoder2.save(path + "encoder_2.h5")
cvae.decoder.save(path + "decoder.h5")

Epoch 1/100

KeyboardInterrupt: 

cvae.fit(x = [y_train, x_train/1000],
            y = y_train,
            batch_size=1000,
            epochs=10,
            verbose=1,
            shuffle=True)
        
scale_steps = np.logspace(-3, 0, 30)

for scale in scale_steps:
    x_train_gen = np.random.normal(x_train, np.sqrt(x_train)*scale)/1000
    cvae.fit(x = [y_train, x_train_gen],
             y = y_train,
             batch_size=1000,
             epochs=10,
             verbose=1,
             shuffle=True)

for i in tqdm(range(2, 11)):
    training_data = np.load("./nsi_data/sample_nsi_regression_1e7_v{}.npz".format(i))
    data_all = np.column_stack([training_data['ve_dune'][:,:36], training_data['vu_dune'][:,:36], training_data['vebar_dune'][:,:36], training_data['vubar_dune'][:,:36]])

    target = np.column_stack([training_data["theta13"]/180*np.pi, training_data["theta23"]/180*np.pi,
                            np.sin(training_data["delta"]/180*np.pi), np.cos(training_data["delta"]/180*np.pi),
                            training_data["mumu"], training_data["emu"],training_data["etau"]])

    x_train = data_all
    y_train = target
    x_train_poisson = np.random.poisson(x_train)/1000
    cvae.fit(x = [y_train, x_train_poisson],
             y = y_train,
             batch_size=1000,
             epochs=100,
             verbose=1,
             shuffle=True)