In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE" # on NERSC filelocking is not allowed
import h5py
import tensorflow as tf
# Make notebook run on other GPUS. GPT's solution ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# gpus = tf.config.list_physical_devices('GPU')
# tf.config.set_visible_devices(gpus[2], 'GPU')  # change 1 to 0, 2, 3 as needed
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# import tensorflow.math as tfmath
import tensorflow.keras as keras
# from scipy.optimize imporjun26t curve_fit
# from tensorflow.keras import layers, Model
import tensorflow.keras.backend as K
# from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
# from tensorflow.keras.models import load_model
# from sklearn.metrics import roc_curve, auc
# import sklearn.metrics as sk
# from tensorflow.keras.models import Model
from tensorflow.keras.layers import PReLU, Input, LSTM, Flatten, Concatenate, Dense, Conv2D, TimeDistributed, MaxPooling2D, LeakyReLU, ReLU, Dropout, BatchNormalization, Activation
from tensorflow.keras.optimizers import Adam, SGD
# from tensorflow.keras.metrics import Precision
# # from qkeras import QActivation, QDense, QConv2D, QBatchNormalization, QConv2DBatchnorm # These don't seem to be used
# # from qkeras import quantized_relu, quantized_bits
# from tensorflow.keras.regularizers import l1, l2, l1_l2

2025-06-30 16:05:02.896987: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Load data
home_path = "/global/cfs/cdirs/m2616/jananinf/projsIO/VAE_FS/" # Updated to NERSC
file_path = home_path + "preprocessed_SNL_data.h5"
with h5py.File(file_path, 'r') as hf:           # Shapes:
    X_train = hf['X_train'][:]                  # (3200000, 57)
    X_test  = hf['X_test'][:]                   # (800000,  57)
    Ato4l_data  = hf['Ato4l_data'][:]           # (55969,   57) Signal data? 
    hToTauTau_data  = hf['hToTauTau_data'][:]   # (691283,  57)
    hChToTauNu_data  = hf['hChToTauNu_data'][:] # (760272,  57)
    leptoquark_data = hf['leptoquark_data'][:]  # (340544,  57)
    print("Data loaded from preprocessed_SNL_data.h5")

Data loaded from preprocessed_SNL_data.h5


In [3]:
class Sampling(keras.layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon
    


In [4]:
def Qmake_encoder_set_weights(input_dim,h_dim_1,h_dim_2,latent_dim):
    """
    Makes encoder

    Parameters
    ----------
    input_dim : int
        size of input layer
    h_dim_[X] : int
        size of hidden layer X
    latent_dim : int
        size of latent layer
    """

    # What is this and why? ----------------------------------------------------
    # l2_factor = 1e-3 
    # --    

    # Input layer --------------------------------------------------------------
    inputs = keras.Input(shape=(input_dim))
    x = BatchNormalization(name="enc_BN_Input")(inputs) # paper indicates batch 
                                               # normalization at the beginning
                                               # of each layer
    # --

    # Hidden layer 1 -----------------------------------------------------------
    x = Dense(h_dim_1,
    # I am not quite seeing these in the paper. Maybe they're implied?
            #  kernel_initializer=keras.initializers.HeNormal(seed=None), 
            #  bias_initializer=keras.initializers.Zeros(),
            #  kernel_regularizer=l1_l2(l1=0, l2=l2_factor), # This is where the l2_factor is used.

             name = "enc_dense1")(inputs)
    x = BatchNormalization(name="enc_BN_h1")(x)
    x = LeakyReLU(name="enc_Lrelu1")(x)
    # ---

    # Hidden Layer 1 -----------------------------------------------------------
    x = Dense(h_dim_2,
            #  kernel_initializer=keras.initializers.HeNormal(seed=None),
            #  bias_initializer=keras.initializers.Zeros(),
            #  kernel_regularizer=l1_l2(l1=0, l2=l2_factor),
             name = "enc_dense2")(x)    
    x = BatchNormalization(name="enc_BN_h2")(x)
    x = LeakyReLU(name="enc_Lrelu2")(x)
    # ---

    # Latent layer -------------------------------------------------------------
    # No activation. 
    z_mean=Dense(latent_dim, name='z_mean',
                #   kernel_initializer=keras.initializers.HeNormal(seed=None),
                #   bias_initializer=keras.initializers.Zeros(),
                #   kernel_regularizer=l1_l2(l1=0, l2=l2_factor)
                )(x)
    z_logvar=Dense(latent_dim, name='z_log_var',
                    #   kernel_initializer=keras.initializers.Zeros(),
                    #   bias_initializer=keras.initializers.Zeros(),
                    #   kernel_regularizer=l1_l2(l1=0, l2=l2_factor)
                    )(x)
    z=Sampling()([z_mean,z_logvar])
    # ---


    encoder = keras.Model(inputs,[z_mean,z_logvar,z],name='encoder')
    return encoder


def Qmake_decoder_set_weights(input_dim,h_dim_1,h_dim_2,latent_dim):
    """ 
    Makes decoder

    Parameters
    ----------
    input_dim : int
        size of input layer
    h_dim_[X] : int
        size of hidden layer X
    latent_dim : int
        size of latent layer
    """
    # l2_factor = 1e-3
    # Input layer -------
    inputs=keras.Input(shape=(latent_dim)) 
    x = BatchNormalization(name="dec_BN_IN")(inputs)   
    # --

    # Hiden layer 1 (3 total, not counting latent) -------
    x = Dense(h_dim_2,

                #    kernel_initializer=keras.initializers.HeNormal(seed=None),
                #    bias_initializer=keras.initializers.Zeros(),
                #    kernel_regularizer=l1_l2(l1=0, l2=l2_factor)
                   )(x)
    x = BatchNormalization(name="dec_BN_h3")(x)
    x = LeakyReLU(name="dec_Lrelu3")(x)
    # --


    # Hidden layer 2( 4 total, not counting laten) -----
    x = Dense(h_dim_1,
                #    activation='relu',
                #    kernel_initializer=keras.initializers.HeNormal(seed=None),
                #    bias_initializer=keras.initializers.Zeros(),
                #    kernel_regularizer=l1_l2(l1=0, l2=l2_factor)
                   )(x)
    x = BatchNormalization(name="dec_BN_h4")(x)
    x = LeakyReLU(name="dec_Lrelu4")(x)
    # --

    x = Dense(input_dim,
                #    kernel_initializer=keras.initializers.HeNormal(seed=None),
                #    bias_initializer=keras.initializers.Zeros(),
                #    kernel_regularizer=l1_l2(l1=0, l2=l2_factor)
                   )(x)
    x = BatchNormalization(name="dec_BN_h5")(x)
    y = LeakyReLU(name="dec_Lrelu5")(x)
    decoder=keras.Model(inputs, y,name='decoder')
    return decoder

In [5]:
INPUT_SZ = 57
H1_SZ = 32
H2_SZ = 16
LATENT_SZ = 3
enc = Qmake_encoder_set_weights(INPUT_SZ, H1_SZ, H2_SZ, LATENT_SZ)
enc.summary()

Model: "encoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 57)]         0           []                               
                                                                                                  
 enc_dense1 (Dense)             (None, 32)           1856        ['input_1[0][0]']                
                                                                                                  
 enc_BN_h1 (BatchNormalization)  (None, 32)          128         ['enc_dense1[0][0]']             
                                                                                                  
 enc_Lrelu1 (LeakyReLU)         (None, 32)           0           ['enc_BN_h1[0][0]']              
                                                                                            

2025-06-30 16:05:11.264985: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 38366 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-40GB, pci bus id: 0000:03:00.0, compute capability: 8.0
2025-06-30 16:05:11.266764: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 38366 MB memory:  -> device: 1, name: NVIDIA A100-SXM4-40GB, pci bus id: 0000:41:00.0, compute capability: 8.0
2025-06-30 16:05:11.268525: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:2 with 38366 MB memory:  -> device: 2, name: NVIDIA A100-SXM4-40GB, pci bus id: 0000:82:00.0, compute capability: 8.0
2025-06-30 16:05:11.270293: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:3 with 38366 MB memory:  -> device: 3, name: NVIDIA A100-SXM4-40GB, pci bu

In [6]:
dec = Qmake_decoder_set_weights(INPUT_SZ, H1_SZ, H2_SZ, LATENT_SZ)
dec.summary()

Model: "decoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   


 input_2 (InputLayer)        [(None, 3)]               0         
                                                                 
 dec_BN_IN (BatchNormalizati  (None, 3)                12        
 on)                                                             
                                                                 
 dense (Dense)               (None, 16)                64        
                                                                 
 dec_BN_h3 (BatchNormalizati  (None, 16)               64        
 on)                                                             
                                                                 
 dec_Lrelu3 (LeakyReLU)      (None, 16)                0         
                                                                 
 dense_1 (Dense)             (None, 32)                544       
                                                                 
 dec_BN_h4 (BatchNormalizati  (None, 32)               128       
 on)      

In [25]:
def _custom_MSE(reconstruction, data):
    # "We use a dataset with standardized p_T as a target so that all quantities are O(1)" arXiv: 2108.03986 

    # Q: is the input also standardized?
    
    loss = keras.losses.mse(data, reconstruction)
    return loss

class VAE_Model(keras.Model):
    def __init__(self, encoder, decoder,**kwargs):
        super().__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder

        # per keras VAE example https://keras.io/examples/generative/vae/
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reonstruction_loss_tracker = keras.metrics.Mean(name="reconstruction_loss")
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

        @property
        def metrics(self):
            return [
                self.total_loss_tracker,
                self.reconstruction_loss_tracker,
                self.kl_loss_tracker,
            ]


    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            # here we shove in our custom reconstructionn loss function
            
            # Ignore zero-padded entries. 
            mask = K.cast(K.not_equal(data, 0), K.floatx()) 
            print(f"DATA SHAPE = {tf.shape(data)}\n", f"RECONSTRUCTION shape  = {tf.shape(reconstruction)}")
            # break
            reconstruction_loss = _custom_MSE(mask*reconstruction, mask*data)

            # This is just standard Kullback-Leibler diversion loss. I think this can stay.
            kl_loss = -0.5 * (1 + z_log_var - ops.square(z_mean) - ops.exp(z_log_var))
            kl_loss = ops.mean(ops.sum(kl_loss, axis = 1))

            total_loss = reconstruction_loss + kl_loss
        # break
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }
        
    def call(self, data):
        z_mean,z_log_var,x = self.encoder(data)
        reconstruction = self.decoder(x)
        return {
            "z_mean": z_mean,
            "z_log_var": z_log_var,
            "reconstruction": reconstruction
        } 

    # need to define the loss

In [26]:
vae = VAE_Model(enc, dec)


In [27]:

vae.compile(optimizer=keras.optimizers.Adam())

In [28]:
vae.fit(x=X_train, epochs=2)

Epoch 1/2
DATA SHAPE = Tensor("Shape:0", shape=(2,), dtype=int32)
 RECONSTRUCTION shape  = Tensor("Shape_1:0", shape=(2,), dtype=int32)


NameError: in user code:

    File "/global/common/software/nersc9/tensorflow/2.12.0/lib/python3.9/site-packages/keras/engine/training.py", line 1284, in train_function  *
        return step_function(self, iterator)
    File "/global/common/software/nersc9/tensorflow/2.12.0/lib/python3.9/site-packages/keras/engine/training.py", line 1268, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/global/common/software/nersc9/tensorflow/2.12.0/lib/python3.9/site-packages/keras/engine/training.py", line 1249, in run_step  **
        outputs = model.train_step(data)
    File "/tmp/ipykernel_526191/1893446130.py", line 42, in train_step
        kl_loss = -0.5 * (1 + z_log_var - ops.square(z_mean) - ops.exp(z_log_var))

    NameError: name 'ops' is not defined
