In [1]:
import random
import h5py
from pathlib import Path

In [2]:
import numpy as np
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (
    Dense, Input, Conv2D, Reshape, Add, ReLU, Dropout, 
    Flatten, Softmax, BatchNormalization, MaxPooling2D, AveragePooling2D
)

2023-03-20 03:50:23.479871: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-20 03:50:24.565540: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
2023-03-20 03:50:24.565703: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64


In [3]:
### set I/O path
project_dir = Path.cwd().parent.resolve()

data_dir = project_dir.joinpath('data')
h5_dir = data_dir.joinpath('hdf5')
h5_train_path = h5_dir.joinpath('train_small.h5')
h5_test_path = h5_dir.joinpath('test_small.h5')

log_dir = project_dir.joinpath('logs').joinpath('resnet50_small')
model_dir = project_dir.joinpath('models').joinpath('resnet50_small')
best_dir = model_dir.joinpath('best')
latest_dir = model_dir.joinpath('latest')

csv_log_path = project_dir.joinpath('csv_logs').joinpath('resnet50_small.csv')

In [4]:
################################ Start of network construction #########################################
################################# conv1  ################################## 
resnet50_conv1_input = Input(shape=( 256, 256, 3), name='conv1_input' )

x = Conv2D(
    64, kernel_size=7, strides=2, activation='relu',
    padding='same', kernel_constraint=keras.constraints.max_norm(2.)
)(resnet50_conv1_input)

resnet50_conv1_output = MaxPooling2D(
    pool_size=3, padding='same', strides=2
)(x)



resnet50_conv1 = Model( 
    inputs = resnet50_conv1_input,
    outputs = resnet50_conv1_output, 
    name = "ResNet-50_conv_1block"
)


################################# conv2  ################################## 
# First Block of conv2 

resnet50_conv2_first_block_input = Input(shape=(64,64,64), name='conv2_first_block_input' )
x = Conv2D( 64, kernel_size=1, strides=1, activation='relu', padding='valid', kernel_constraint=keras.constraints.max_norm(2.))(resnet50_conv2_first_block_input)
x = BatchNormalization()(x)
x = Conv2D( 64, kernel_size=3, strides=1, activation='relu', padding='same', kernel_constraint=keras.constraints.max_norm(2.))(x)
x = BatchNormalization()(x)
x = Conv2D( 256, kernel_size=1, strides=1, padding='valid', kernel_constraint=keras.constraints.max_norm(2.))(x)
x = BatchNormalization()(x)

shortcut = Conv2D(
    256, kernel_size=1, strides=1, activation='relu', padding='same', kernel_constraint=keras.constraints.max_norm(2.) 
)(resnet50_conv2_first_block_input)
shortcut = BatchNormalization()(shortcut)
x = Add()([x, shortcut])

resnet50_conv2_first_block_output = ReLU()(x)

resnet50_conv2_first_block = Model( 
    inputs = resnet50_conv2_first_block_input, 
    outputs = resnet50_conv2_first_block_output, 
    name = 'resnet50_conv2_first_block'
)


# Identity Block of conv2 
resnet50_conv2_identity_block_input = Input(shape=(64,64,256), name='conv2_identity_block_input' )
x = Conv2D( 64, kernel_size=1, strides=1, activation='relu', padding='valid', kernel_constraint=keras.constraints.max_norm(2.))(resnet50_conv2_identity_block_input)
x = BatchNormalization()(x)
x = Conv2D( 64, kernel_size=3, strides=1, activation='relu', padding='same', kernel_constraint=keras.constraints.max_norm(2.))(x)
x = BatchNormalization()(x)
x = Conv2D( 256, kernel_size=1, strides=1, activation='relu', padding='valid', kernel_constraint=keras.constraints.max_norm(2.))(x)
x = BatchNormalization()(x)

x = Add()([x, resnet50_conv2_identity_block_input ])
resnet50_conv2_identity_block_output = ReLU()(x)

resnet50_conv2_identity_block = Model( 
    inputs = resnet50_conv2_identity_block_input, 
    outputs= resnet50_conv2_identity_block_output, 
    name = 'resnet50_conv2_identity_block'
)



# Combining the 2 types of blocks 
resnet50_conv2_input = Input(shape=(64,64,64), name='resnet50_conv2_input')

x = resnet50_conv2_first_block(resnet50_conv2_input)
# x = resnet50_conv2_identity_block(resnet50_conv2_input)
x = resnet50_conv2_identity_block(x)
resnet50_conv2_output = resnet50_conv2_identity_block(x) 

resnet50_conv2 = Model(
    inputs = resnet50_conv2_input, 
    outputs = resnet50_conv2_output, 
    name = "ResNet-50_conv2_block"
)


################################# conv3  ##################################
# First Block of conv3 

resnet50_conv3_first_block_input = Input(shape=(64,64,256), name='conv3_first_block_input' )
x = Conv2D( 128, kernel_size=1, strides=2, activation='relu', padding='valid', kernel_constraint=keras.constraints.max_norm(2.))(resnet50_conv3_first_block_input)
x = BatchNormalization()(x)
x = Conv2D( 128, kernel_size=3, strides=1, activation='relu', padding='same', kernel_constraint=keras.constraints.max_norm(2.))(x)
x = BatchNormalization()(x)
x = Conv2D( 512, kernel_size=1, strides=1, padding='valid', kernel_constraint=keras.constraints.max_norm(2.))(x)
x = BatchNormalization()(x)

shortcut = Conv2D(512, kernel_size=1, strides=2, activation='relu', padding='same', kernel_constraint=keras.constraints.max_norm(2.))(resnet50_conv3_first_block_input)
shortcut = BatchNormalization()(shortcut)
x = Add()([ x, shortcut])
resnet50_conv3_first_block_output = ReLU()(x)

resnet50_conv3_first_block = Model(
    inputs  = resnet50_conv3_first_block_input, 
    outputs = resnet50_conv3_first_block_output, 
    name = 'resnet50_conv3_first_block'
)


# Identity Block of conv3 
resnet50_conv3_identity_block_input = Input(shape=(32,32,512), name='conv3_identity_block_input' )
x = Conv2D( 128, kernel_size=1, strides=1, activation='relu', padding='valid', kernel_constraint=keras.constraints.max_norm(2.))(resnet50_conv3_identity_block_input)
x = BatchNormalization()(x)
x = Conv2D( 128, kernel_size=3, strides=1, activation='relu', padding='same', kernel_constraint=keras.constraints.max_norm(2.))(x)
x = BatchNormalization()(x)
x = Conv2D( 512, kernel_size=1, strides=1, activation='relu', padding='valid', kernel_constraint=keras.constraints.max_norm(2.))(x)
x = BatchNormalization()(x)

x = Add()([x, resnet50_conv3_identity_block_input ])
resnet50_conv3_identity_block_output = ReLU()(x)

resnet50_conv3_identity_block = Model( 
    inputs = resnet50_conv3_identity_block_input, 
    outputs= resnet50_conv3_identity_block_output, 
    name = 'resnet50_conv3_identity_block'
)



# Combining the 2 types of blocks 
resnet50_conv3_input = Input(shape=(64,64,256), name='resnet50_conv3_input' )

x = resnet50_conv3_first_block(resnet50_conv3_input)
x = resnet50_conv3_identity_block(x)
x = resnet50_conv3_identity_block(x)
resnet50_conv3_output = resnet50_conv3_identity_block(x) 

resnet50_conv3 = Model(
    inputs = resnet50_conv3_input, 
    outputs = resnet50_conv3_output, 
    name = "ResNet-50_conv3_block"
)


 
################################# conv4  ################################## 
# First Block of conv4

resnet50_conv4_first_block_input = Input(shape=(32,32,512), name='conv4_first_block_input' )
x = Conv2D( 256, kernel_size=1, strides=2, activation='relu', padding='valid', kernel_constraint=keras.constraints.max_norm(2.))(resnet50_conv4_first_block_input)
x = BatchNormalization()(x)
x = Conv2D( 256, kernel_size=3, strides=1, activation='relu', padding='same', kernel_constraint=keras.constraints.max_norm(2.))(x)
x = BatchNormalization()(x)
x = Conv2D( 1024, kernel_size=1, strides=1, padding='valid', kernel_constraint=keras.constraints.max_norm(2.))(x)
x = BatchNormalization()(x)

shortcut = Conv2D(1024, kernel_size=1, strides=2, activation='relu', padding='same', kernel_constraint=keras.constraints.max_norm(2.))(resnet50_conv4_first_block_input)
shortcut = BatchNormalization()(shortcut)
x = Add()([ x, shortcut])
resnet50_conv4_first_block_output = ReLU()(x)

resnet50_conv4_first_block = Model(
    inputs  = resnet50_conv4_first_block_input, 
    outputs = resnet50_conv4_first_block_output, 
    name = 'resnet50_conv4_first_block'
)



# Identity Block of conv4
resnet50_conv4_identity_block_input = Input(shape=(16,16,1024), name='conv4_identity_block_input' )
x = Conv2D( 256, kernel_size=1, strides=1, activation='relu', padding='valid', kernel_constraint=keras.constraints.max_norm(2.))(resnet50_conv4_identity_block_input)
x = BatchNormalization()(x)
x = Conv2D( 256, kernel_size=3, strides=1, activation='relu', padding='same', kernel_constraint=keras.constraints.max_norm(2.))(x)
x = BatchNormalization()(x)
x = Conv2D( 1024, kernel_size=1, strides=1, activation='relu', padding='valid', kernel_constraint=keras.constraints.max_norm(2.))(x)
x = BatchNormalization()(x)

x = Add()([x, resnet50_conv4_identity_block_input ])
resnet50_conv4_identity_block_output = ReLU()(x)

resnet50_conv4_identity_block = Model( 
    inputs = resnet50_conv4_identity_block_input, 
    outputs= resnet50_conv4_identity_block_output, 
    name = 'resnet50_conv4_identity_block'
)




# Combining the 2 types of blocks 
resnet50_conv4_input = Input(shape=(32,32,512), name='resnet50_conv4_input' )

x = resnet50_conv4_first_block(resnet50_conv4_input)
x = resnet50_conv4_identity_block(x)
x = resnet50_conv4_identity_block(x)
x = resnet50_conv4_identity_block(x)
x = resnet50_conv4_identity_block(x)
resnet50_conv4_output = resnet50_conv4_identity_block(x) 

resnet50_conv4 = Model(
    inputs = resnet50_conv4_input, 
    outputs = resnet50_conv4_output, 
    name = "ResNet-50_conv4_block"
)



################################# conv5  ################################## 
# First Block of conv5
resnet50_conv5_first_block_input = Input(shape=(16,16,1024), name='conv5_first_block_input' )
x = Conv2D( 512, kernel_size=1, strides=2, activation='relu', padding='valid', kernel_constraint=keras.constraints.max_norm(2.))(resnet50_conv5_first_block_input)
x = BatchNormalization()(x)
x = Conv2D( 512, kernel_size=3, strides=1, activation='relu', padding='same', kernel_constraint=keras.constraints.max_norm(2.))(x)
x = BatchNormalization()(x)
x = Conv2D( 2048, kernel_size=1, strides=1, padding='valid', kernel_constraint=keras.constraints.max_norm(2.))(x)
x = BatchNormalization()(x)

shortcut = Conv2D(2048, kernel_size=1, strides=2, activation='relu', padding='same', kernel_constraint=keras.constraints.max_norm(2.))(resnet50_conv5_first_block_input)
shortcut = BatchNormalization()(shortcut)
x = Add()([ x, shortcut])
resnet50_conv5_first_block_output = ReLU()(x)

resnet50_conv5_first_block = Model(
    inputs  = resnet50_conv5_first_block_input, 
    outputs = resnet50_conv5_first_block_output, 
    name = 'resnet50_conv5_first_block'
)




# Identity Block of conv5
resnet50_conv5_identity_block_input = Input(shape=(8,8,2048), name='conv5_identity_block_input' )
x = Conv2D( 512, kernel_size=1, strides=1, activation='relu', padding='valid', kernel_constraint=keras.constraints.max_norm(2.))(resnet50_conv5_identity_block_input)
x = BatchNormalization()(x)
x = Conv2D( 512, kernel_size=3, strides=1, activation='relu', padding='same', kernel_constraint=keras.constraints.max_norm(2.))(x)
x = BatchNormalization()(x)
x = Conv2D( 2048, kernel_size=1, strides=1, activation='relu', padding='valid', kernel_constraint=keras.constraints.max_norm(2.))(x)
x = BatchNormalization()(x)

x = Add()([x, resnet50_conv5_identity_block_input ])
resnet50_conv5_identity_block_output = ReLU()(x)

resnet50_conv5_identity_block = Model( 
    inputs = resnet50_conv5_identity_block_input, 
    outputs= resnet50_conv5_identity_block_output, 
    name = 'resnet50_conv5_identity_block'
)




# Combining the 2 types of blocks  
resnet50_conv5_input = Input(shape=(16,16,1024), name='resnet50_conv5_input' )

x = resnet50_conv5_first_block(resnet50_conv5_input)
x = resnet50_conv5_identity_block(x)
resnet50_conv5_output = resnet50_conv5_identity_block(x) 

resnet50_conv5 = Model(
    inputs = resnet50_conv5_input, 
    outputs = resnet50_conv5_output, 
    name = "ResNet-50_conv5_block"
)

# tf.keras.utils.plot_model(resnet50_conv5, show_shapes=True, show_dtype=True)

################################# classifier  ################################## 
# The classifier will tell us whether this event is 
# electron CC / muon CC / tauon CC / Neutral (4 types)


resnet50_classifier_input = Input(shape=(8,8,2048), name='classification')
x = AveragePooling2D(pool_size=2, padding='same')(resnet50_classifier_input)
x = Dropout(0.2)(x) 
x = Flatten()(x)


# I think it's 4, but if we want to classify more types of events 
# Need to change 4 to something else... 
number_of_categories = 5

resnet50_classifier_output = Dense(
    number_of_categories, activation='softmax', kernel_constraint=keras.constraints.max_norm(2.)
)(x)


resnet50_classifier = Model(
    inputs = resnet50_classifier_input, 
    outputs= resnet50_classifier_output, 
    name = "ResNet-50_Classifier"
)



#################################################################################### 
def build_resnet50(optimizer, loss, metrics): 
    resnet50_input = Input(shape=( 256, 256, 3))
    
    # image Augumentation ?? 
    x = resnet50_input 
    
    x = resnet50_conv1(x)  # do conv in this block  
    x = resnet50_conv2(x)
    x = resnet50_conv3(x)
    x = resnet50_conv4(x)
    x = resnet50_conv5(x)
    
    resnet50_output = resnet50_classifier(x)
    
    resnet50_model = Model( 
        inputs  = resnet50_input, 
        outputs = resnet50_output, 
        name = 'ResNet-50_Whole_Network'
    )
    
    print( resnet50_model.summary() )
        
    resnet50_model.compile( 
        optimizer=optimizer,
        loss=loss,
        metrics=metrics,
    )

    return resnet50_model

################################ End of network construction ######################################### 
    


# tf.keras.utils.plot_model(resnet_obj, show_shapes=True, show_dtype=True)

2023-03-20 03:50:25.363949: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-20 03:50:25.970509: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22294 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:41:00.0, compute capability: 8.6


In [5]:
### define optimizerm, loss, metrics
loss = keras.losses.CategoricalCrossentropy(from_logits=False)
optimizer = keras.optimizers.Adam(learning_rate=0.0005)



ca = keras.metrics.CategoricalAccuracy(
    name='categorical_accuracy', dtype=None
)
metrics = [ca]

### build the resnet model and compile
resnet50_network = build_resnet50(loss=loss, optimizer=optimizer, metrics=metrics)

Model: "ResNet-50_Whole_Network"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 256, 256, 3)]     0         
                                                                 
 ResNet-50_conv_1block (Func  (None, 64, 64, 64)       9472      
 tional)                                                         
                                                                 
 ResNet-50_conv2_block (Func  (None, 64, 64, 256)      148480    
 tional)                                                         
                                                                 
 ResNet-50_conv3_block (Func  (None, 32, 32, 512)      665600    
 tional)                                                         
                                                                 
 ResNet-50_conv4_block (Func  (None, 16, 16, 1024)     2641920   
 tional)                                   

In [6]:
class generator:
    def __init__(self, file, mode, batch_size):
        self.file = file
        self.mode = mode
        
        self.batch_size = batch_size
        self.length = self.compute_length()
        
        self.indices = None

    def __call__(self):
        self.indices = list(range(self.length))
        random.shuffle(self.indices)
            
        with h5py.File(self.file, 'r') as hf:
            for i in range(int(self.length/self.batch_size)-1): 
                sel_indices = [self.indices.pop() for _ in range(self.batch_size)]
                sel_indices.sort()

                sel_imgs = hf[f"X_{self.mode}"][sel_indices]
                sel_labels = hf[f"y_{self.mode}"][sel_indices]

                sel_imgs = sel_imgs.swapaxes(1,-1)
                #sel_labels = sel_labels.reshape(self.batch_size, 5)

                yield sel_imgs, sel_labels
    
    def compute_length(self):
        length = 0
        with h5py.File(self.file, 'r') as hf:
            length = len(hf[f"X_{self.mode}"])
        return length

In [7]:
batch_size=128
train_gen = generator(h5_train_path, mode='train', batch_size=batch_size)
test_gen = generator(h5_test_path, mode='test', batch_size=batch_size)

ds_train = tf.data.Dataset.from_generator(
    train_gen,
    output_signature=(
         tf.TensorSpec(shape=(batch_size, 256, 256, 3), dtype=tf.float32),
         tf.TensorSpec(shape=(batch_size, 5), dtype=tf.float32)
    )
)

ds_val = tf.data.Dataset.from_generator(
    test_gen,
    output_signature=(
         tf.TensorSpec(shape=(batch_size, 256, 256, 3), dtype=tf.float32),
         tf.TensorSpec(shape=(batch_size, 5), dtype=tf.float32)
    )
)

ds_train.take(1)

<TakeDataset element_spec=(TensorSpec(shape=(128, 256, 256, 3), dtype=tf.float32, name=None), TensorSpec(shape=(128, 5), dtype=tf.float32, name=None))>

In [None]:
cb_tb = keras.callbacks.TensorBoard(log_dir=log_dir)
cb_csv = keras.callbacks.CSVLogger(csv_log_path)
cb_save_best = keras.callbacks.ModelCheckpoint(filepath=best_dir, monitor='val_loss', save_best_only=True)
cb_save_latest = keras.callbacks.ModelCheckpoint(filepath=latest_dir, monitor='val_loss', save_freq='epoch')

callbacks = [cb_tb, cb_csv, cb_save_best, cb_save_latest]

history = resnet50_network.fit(
    x=ds_train, epochs=3,
    validation_data=ds_val,
    workers=8,
    use_multiprocessing=True
)

Epoch 1/3


2023-03-20 03:50:34.204679: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inResNet-50_Whole_Network/ResNet-50_Classifier/dropout/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer
2023-03-20 03:50:40.521572: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8204
2023-03-20 03:50:42.331051: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:630] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2023-03-20 03:50:43.043378: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x7f7948c25db0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-03-20 03:50:43.043423: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (0): NVIDIA GeForce RTX 3090, Compute Capability 8.6
2023-03-20 03:50:43.048185: I tensorflow/compiler/m

Epoch 2/3
 2/35 [>.............................] - ETA: 2:13 - loss: 2.0394 - categorical_accuracy: 0.4180