In [None]:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense, Dropout, Flatten , Convolution2D, MaxPooling2D , Lambda, Conv2D, Activation,Concatenate, Input, BatchNormalization
from tensorflow.keras.optimizers import Adam , SGD , Adagrad
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, CSVLogger, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import regularizers , initializers, activations
import tensorflow.keras.backend as K
from sklearn.preprocessing import StandardScaler
from joblib import dump, load
import tensorflow_probability as tfp

import numpy as np
import matplotlib.pyplot as plt
import corner
import os 
import sys
import time
import importlib
import logging
from tqdm import tqdm

importlib.reload(logging)
logging.basicConfig(level = logging.INFO)

# limit GPU memory
gpus = tf.config.experimental.list_physical_devices('GPU')
# if gpus:
#   # Restrict TensorFlow to only use the first GPU
try:
    tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
    tf.config.experimental.set_virtual_device_configuration(
    gpus[0],
    [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=10000)])
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
except RuntimeError as e:
# Visible devices must be set before GPUs have been initialized
    print(e)

In [None]:
%%time
training_data = np.load("./nsi_data/sample_nsi_regression_1e7_v1.npz")
data_all = np.column_stack([training_data['ve_dune'][:,:36], training_data['vu_dune'][:,:36], training_data['vebar_dune'][:,:36], training_data['vubar_dune'][:,:36]])

target = np.column_stack([training_data["theta13"], training_data["theta23"],
                          np.sin(training_data["delta"]/180*np.pi), np.cos(training_data["delta"]/180*np.pi),
                         training_data["mumu"], training_data["emu"],
                         training_data["etau"]])

x_train = data_all[:9000000]
y_train = target[:9000000]

x_test = data_all[9000000:]
y_test = target[9000000:]

x_train_poisson = np.random.poisson(x_train)/1000
x_test_poisson = np.random.poisson(x_test)/1000

In [None]:
class Sampling(layers.Layer):
    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

In [None]:
latent_dim = 7

"""
Encoder 1 (parameter + spectrum)
"""
encoder_parameter_inputs = layers.Input(shape=(len(y_train[0]),),name = 'encoder_parameter_inputs')
x_parameter = layers.Dense(64, activation="relu", name = 'dense_parameter_1')(encoder_parameter_inputs)
x_parameter = layers.Dense(32, activation="relu", name = 'dense_parameter_2')(x_parameter)
x_parameter = layers.Dense(16, activation="relu", name = 'dense_parameter_3')(x_parameter)

encoder_spectrum_inputs = layers.Input(shape=(144),name = 'encoder_spectrum_inputs')
x_spectrum = layers.Dense(64, activation="relu", name = 'dense_spectrum_1')(encoder_spectrum_inputs)
x_spectrum = layers.Dense(32, activation="relu", name = 'dense_spectrum_2')(x_spectrum)
x_spectrum = layers.Dense(16, activation="relu", name = 'dense_spectrum_3')(x_spectrum)

mergedOut_Encoder_1 = Concatenate()([x_parameter,x_spectrum])

z_mean = layers.Dense(latent_dim, name="z_mean")(mergedOut_Encoder_1)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(mergedOut_Encoder_1)

encoder_1 = keras.Model([encoder_parameter_inputs, encoder_spectrum_inputs], [z_mean, z_log_var], name="encoder_1")
encoder_1.summary()

In [None]:
"""
Encoder 2 (spectrum)
"""
encoder_spectrum_inputs = layers.Input(shape=(len(x_train[0]),),name = 'encoder_spectrum_inputs')
x_spectrum = layers.Dense(64, activation="relu", name = 'dense_spectrum_1')(encoder_spectrum_inputs)
x_spectrum = layers.Dense(32, activation="relu", name = 'dense_spectrum_2')(x_spectrum)
x_spectrum = layers.Dense(16, activation="relu", name = 'dense_spectrum_3')(x_spectrum)

guassian_number = 40
z_mean = layers.Dense(guassian_number*latent_dim, name="z_mean")(x_spectrum)
z_log_var = layers.Dense(guassian_number*latent_dim, name="z_log_var")(x_spectrum)
z_weight = layers.Dense(guassian_number, name="z_weight")(x_spectrum)

encoder_2 = keras.Model(encoder_spectrum_inputs, [z_mean, z_log_var, z_weight], name="encoder_2")
encoder_2.summary()

In [None]:
"""
Decoder Model (latent + spectrum)
"""
latent_dim_2 = len(y_train[0])

decoder_latent_inputs = keras.Input(shape=(latent_dim,),name = 'decoder_latent_inputs')
x_latent = layers.Dense(64, activation="relu", name = 'dense_1')(decoder_latent_inputs)
x_latent = layers.Dense(32, activation="relu", name = 'dense_2')(x_latent)
x_latent = layers.Dense(16, activation="relu", name = 'dense_3')(x_latent)

# spectrum
decoder_spectrum_inputs = layers.Input(shape=(144,),name = 'decoder_spectrum_inputs')
x_spectrum = layers.Dense(64, activation="relu", name = 'dense_spectrum_1')(decoder_spectrum_inputs)
x_spectrum = layers.Dense(32, activation="relu", name = 'dense_spectrum_2')(x_spectrum)
x_spectrum = layers.Dense(16, activation="relu", name = 'dense_spectrum_3')(x_spectrum)

mergedOut_Decoder = Concatenate()([x_latent,x_spectrum])

z2_mean = layers.Dense(latent_dim_2, name="z_mean")(mergedOut_Decoder)
z2_log_var = layers.Dense(latent_dim_2, name="z_log_var")(mergedOut_Decoder)

decoder = keras.Model([decoder_latent_inputs, decoder_spectrum_inputs], [z2_mean, z2_log_var], name="decoder")
decoder.summary()

In [None]:
class CVAE(keras.Model):
    def __init__(self, encoder1, encoder2, decoder, **kwargs):
        super(CVAE, self).__init__(**kwargs)
        self.encoder1 = encoder1
        self.encoder2 = encoder2
        self.decoder = decoder
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
                ]

    def train_step(self, data):
        x, y = data
        with tf.GradientTape() as tape:
            SMALL_CONSTANT = 1e-12
            
            z1_mean, z1_log_var = self.encoder1(x)
            
            temp_var_q = SMALL_CONSTANT + tf.exp(z1_log_var)
            mvn_q = tfp.distributions.MultivariateNormalDiag(
                          loc=z1_mean,
                          scale_diag=tf.sqrt(temp_var_q))
            
            z1 = mvn_q.sample()
            
            z2_mean, z2_log_var, z2_weight = self.encoder2(x[1])

            z2_mean = tf.reshape(z2_mean, (-1, guassian_number, latent_dim))
            z2_log_var = tf.reshape(z2_log_var, (-1, guassian_number, latent_dim))
            z2_weight = tf.reshape(z2_weight, (-1, guassian_number))

            temp_var_r1 = SMALL_CONSTANT + tf.exp(z2_log_var)
            bimix_gauss = tfp.distributions.MixtureSameFamily(
                          mixture_distribution=tfp.distributions.Categorical(logits=z2_weight),
                          components_distribution=tfp.distributions.MultivariateNormalDiag(
                          loc=z2_mean,
                          scale_diag=tf.sqrt(temp_var_r1)))
            
            z2 = bimix_gauss.sample()
            
            reconstruction_mean, reconstruction_var = self.decoder([z1, x[1]])     
            
            temp_var_r2 = SMALL_CONSTANT + tf.exp(reconstruction_var)
            reconstruction_parameter = tfp.distributions.MultivariateNormalDiag(
                                     loc=reconstruction_mean,
                                     scale_diag= tf.sqrt(temp_var_r2))
            
            r2 = reconstruction_parameter.sample()

            log_q_q = mvn_q.log_prob(z1)
            log_r1_q = bimix_gauss.log_prob(z1)
            kl_loss = tf.reduce_mean(log_q_q - log_r1_q)
            
            reconstruction_parameter_loss = reconstruction_parameter.log_prob(y)
            reconstruction_loss = -1.0*tf.reduce_mean(reconstruction_parameter_loss)
            
            total_loss = reconstruction_loss + kl_loss
            
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

In [None]:
"""
Model Building
"""
cvae = CVAE(encoder_1,encoder_2,decoder)
cvae.compile(optimizer=keras.optimizers.Adam())

In [None]:
"""
Model Training
"""
%%time
cvae.fit(x = [y_train, x_train_poisson],
         y = y_train,
         batch_size=1000,
         epochs=300,
         verbose=1,
         shuffle=True)

In [None]:
path = './CVAE/DNN_angleTran_allparam_{},{}'.format(guassian_number, latent_dim) + '_{}'
index = 1
while os.path.isdir(path.format(index)):
    index += 1
path = path.format(index)
cvae.encoder1.save(path + "/encoder_1.h5")
cvae.encoder2.save(path + "/encoder_2.h5")
cvae.decoder.save(path + "/decoder.h5")