<a href="https://colab.research.google.com/github/eflatlan/CNN_PID/blob/main/cnn_adjusted.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install h5py numpy

import os
import h5py
import numpy as np

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import matplotlib.pyplot as plt
import random

def plot_random_element(X_train_map):
    index = random.randint(0, len(X_train_map) - 1)  # Pick a random index
    element = X_train_map[index, :, :, 0]  # Retrieve the element

    plt.figure(figsize=(8, 6))
    plt.imshow(element, cmap='viridis', origin='lower')
    plt.title(f"Random Element from X_train_map (Index {index})")
    plt.colorbar(label='Intensity')
    plt.xlabel('X Axis')
    plt.ylabel('Y Axis')
    plt.show()






In [4]:
def plot5(X_test_map, particle_vector):

  # Plotting random maps with information

  # Select 5 random indices from the test data
  random_indices = np.random.choice(range(X_test_map.shape[0]), size=5, replace=False)

  # Create a subplot with 5 rows and 1 column
  fig, axes = plt.subplots(nrows=5, ncols=1, figsize=(8, 20))

  # Iterate over the random indices and plot each map with information
  for i, index in enumerate(random_indices):
      # Get the map and corresponding information
      map_data = X_test_map[index, :, :, 0]
      mass_category = particle_vector[index].mass_category
      ckov = particle_vector[index].ckov
      mip_position = particle_vector[index].mip_position
      momentum = particle_vector[index].momentum

      # Plot the map
      axes[i].imshow(map_data, cmap='gray')

      # Add a red dot at the MIP position
      axes[i].plot(mip_position[0], mip_position[1], 'ro')

      # Set the title with the information
      axes[i].set_title(f"Mass: {mass_category}, CKOV: {ckov}, MIP Position: {mip_position}, Momentum: {momentum}")
      axes[i].axis('off')

  # Adjust the spacing between subplots
  plt.tight_layout()

  # Show the plot
  plt.show()

In [5]:
def create_lr_scheduler(num_epochs = 10):

  tf.random.set_seed(42)
  div = num_epochs/4
  print("div =", div)
  print("1e-4 * 10**(epoch/div) = ", 1e-4 * 10**(num_epochs/div))
  lr_scheduler = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 1e-4 * 10**(epoch/div)) # traverse a set of learning rate values starting from 1e-4, increasing by 10**(epoch/20) every epoch
  return lr_scheduler





def plot_lr(num_epochs = 10, history = None):
  div = num_epochs/4
  lrs = 1e-4 * (10 ** (np.arange(num_epochs)/div))
  plt.figure(figsize=(10, 7))
  plt.semilogx(lrs, history.history["loss"]) # we want the x-axis (learning rate) to be log scale
  plt.xlabel("Learning Rate")
  plt.ylabel("Loss")
  plt.title("Learning rate vs. loss");

In [21]:
class ParticleDataUtils:
    class Candidate2:
        def __init__(self, x, y, candStatus):
            self.x = x
            self.y = y
            self.candStatus = candStatus

    class ParticleInfo: # pls help me understand if any of these fields are missing from the X_train :
        def __init__(self, momentum, mass, energy, refractiveIndex, ckov, xRad, yRad, xPC, yPC, thetaP, phiP, arrayInfo, candsCombined):
            self.momentum = momentum # this dhould be with
            self.mass = mass    # not with
            self.energy = energy # with
            self.refractiveIndex = refractiveIndex # with
            self.ckov = ckov # not with
            self.xRad = xRad # with
            self.yRad = yRad # with
            self.xPC = xPC# with
            self.yPC = yPC# with
            self.thetaP = thetaP# with
            self.phiP = phiP# with
            self.arrayInfo = arrayInfo # do not include this to model
            self.candsCombined = candsCombined # with the field candStatus is a int that is 0..7, please make it categorical
            self.mass_category = self.infer_mass_category(mass)

        @staticmethod
        def infer_mass_category(mass):
            pion_mass = 0.1396
            proton_mass = 0.938
            kaon_mass = 0.4937

            if abs(mass - pion_mass) < 1e-6:
                return "pion"
            elif abs(mass - proton_mass) < 1e-6:
                return "proton"
            elif abs(mass - kaon_mass) < 1e-6:
                return "kaon"
            else:
                return "unknown"

def load_particle_info_from_hdf5(filename):
    particle_vector = []

    with h5py.File(filename, 'r') as file:
        first_group_name = list(file.keys())[0]
        group = file[first_group_name]

        print("Attributes for group '{}':".format(first_group_name))
        for attr_name, attr_value in group.attrs.items():
          print("{}: {}".format(attr_name, attr_value))


        for i, group_name in enumerate(file):
            group = file[group_name]

            momentum = group.attrs['Momentum']
            mass = group.attrs['Mass']
            energy = group.attrs['Energy']
            refractiveIndex = group.attrs['RefractiveIndex']
            ckov = group.attrs['Ckov']
            xRad = group.attrs['xRad']
            yRad = group.attrs['yRad']
            xPC = group.attrs['xPC']
            yPC = group.attrs['yPC']
            thetaP = group.attrs['ThetaP']
            phiP = group.attrs['PhiP']

            # Read arrayInfo
            arrayInfo_dataset = group['ArrayInfo']
            arrayInfo_data = arrayInfo_dataset[...]

            # Read candsCombined as complex type
            candsCombined_dataset = group['candsCombined']
            candsCombined_data = candsCombined_dataset[...]
            candsCombined = [ParticleDataUtils.Candidate2(x['x'], x['y'], x['candStatus']) for x in candsCombined_data]

            particle_info = ParticleDataUtils.ParticleInfo(
                momentum, mass, energy, refractiveIndex, ckov, xRad, yRad, xPC, yPC, thetaP, phiP, arrayInfo=arrayInfo_data, candsCombined=candsCombined)

            particle_vector.append(particle_info)

    return particle_vector

def read_particle_data_from_file(filename="particle.h5"):
    drive_path = '/content/drive/MyDrive/Colab Notebooks/CERN_ML/CNN_PID/test/'  # Update the path to your Google Drive folder
    file_path = os.path.join(drive_path, filename)
    particle_vector = load_particle_info_from_hdf5(file_path)
    return particle_vector

# Prepare the training data
#X_train_candsCombined_xy = [np.array([(cand.x, cand.y) for cand in particle.candsCombined]) for particle in particle_vector]
#X_train_candsCombined_status = [to_categorical([cand.candStatus for cand in particle.candsCombined], num_classes=8) for particle in particle_vector]


# Prepare the additional training data
X_train_candsCombined_xy = []
X_train_candsCombined_status = []
# Extract other fields



filename = 'ParticleInfo15k.h5'
particle_vector = read_particle_data_from_file(filename)


for particle in particle_vector:
    cand_xy_list = []
    cand_status_list = []
    for cand in particle.candsCombined:
        cand_xy_list.append([cand.x, cand.y])
        cand_status_list.append(cand.candStatus)

    X_train_candsCombined_xy.append(cand_xy_list)
    X_train_candsCombined_status.append(cand_status_list)

# Convert them into NumPy arrays
X_train_candsCombined_xy = np.array(X_train_candsCombined_xy)
X_train_candsCombined_status = np.array(X_train_candsCombined_status)
# Normalize X_train_candsCombined_xy
X_train_candsCombined_xy = std_scaler.fit_transform(X_train_candsCombined_xy)


# One-hot encode X_train_candsCombined_status
X_train_candsCombined_status_encoded = label_binarizer.fit_transform(X_train_candsCombined_status)


X_train_rad_position = [particle.xRad for particle in particle_vector]
X_train_phi = [particle.phiP for particle in particle_vector]
X_train_theta = [particle.thetaP for particle in particle_vector]
X_train_energy = [particle.energy for particle in particle_vector]

# Convert to NumPy arrays
X_train_rad_position = np.array(X_train_rad_position)
X_train_phi = np.array(X_train_phi)
X_train_theta = np.array(X_train_theta)
X_train_energy = np.array(X_train_energy)
X_train_candsCombined_xy = np.array(X_train_candsCombined_xy)
X_train_candsCombined_status = np.array(X_train_candsCombined_status)




# One-hot encode the labels
label_binarizer = LabelBinarizer()
y_train_encoded = label_binarizer.fit_transform([particle.mass_category for particle in particle_vector])
y_test_encoded = label_binarizer.transform([particle.mass_category for particle in particle_vector])
X_train_momentum = [particle.momentum for particle in particle_vector]

X_train_momentum = np.array(X_train_momentum)
# create a scaler object
std_scaler = StandardScaler()
# Normalize X_train_refractive_index
X_train_refractive_index = X_train_refractive_index.reshape(-1, 1)
X_train_refractive_index = std_scaler.fit_transform(X_train_refractive_index)




# Splitting the data into train and test sets
X_train_momentum, X_test_momentum, X_train_refractive_index, X_test_refractive_index, X_train_mip_position, X_test_mip_position, X_train_candsCombined_xy, X_test_candsCombined_xy, X_train_candsCombined_status, X_test_candsCombined_status, X_train_rad_position, X_test_rad_position, X_train_phi, X_test_phi, X_train_theta, X_test_theta, X_train_energy, X_test_energy, y_train, y_test = train_test_split(
    X_train_momentum,
    X_train_refractive_index,
    X_train_mip_position,
    X_train_candsCombined_xy,
    X_train_candsCombined_status,
    X_train_rad_position,
    X_train_phi,
    X_train_theta,
    X_train_energy,
    y_train_encoded,
    test_size=0.2,
    random_state=42
)

# Define the input shapes
momentum_shape = (1,)
refractive_index_shape = (1,)
mip_position_shape = (2,)
candsCombined_xy_shape = X_train_candsCombined_xy.shape[1:]  # adjust as necessary
candsCombined_status_shape = X_train_candsCombined_status.shape[1:]  # adjust as necessary
rad_position_shape = (1,)
phi_shape = (1,)
theta_shape = (1,)
energy_shape = (1,)

# Define inputs
# vectors, each element number n containing a x,y pair in  candsCombined_xy_input should be paired with the categorical value in
# candsCombined_status_input
candsCombined_xy_input = Input(shape=candsCombined_xy_shape, name='candsCombined_xy_input')
candsCombined_status_input = Input(shape=candsCombined_status_shape, name='candsCombined_status_input')

#scalars
momentum_input = Input(shape=momentum_shape, name='momentum_input')
refractive_index_input = Input(shape=refractive_index_shape, name='refractive_index_input')
phi_input = Input(shape=phi_shape, name='phi_input')
theta_input = Input(shape=theta_shape, name='theta_input')
energy_input = Input(shape=energy_shape, name='energy_input')

# x,y pairs
mip_position_input = Input(shape=mip_position_shape, name='mip_position_input')
rad_position_input = Input(shape=rad_position_shape, name='rad_position_input')



from tensorflow.keras.layers import Dense, Flatten, Input, concatenate, Dropout, BatchNormalization
from tensorflow.keras.models import Model

# Define an input processing layer for candsCombined_xy
xy_dense = Dense(32, activation='relu')(candsCombined_xy_input)
xy_dense = Dropout(0.2)(xy_dense)

# Define an input processing layer for candsCombined_status
status_dense = Dense(32, activation='relu')(candsCombined_status_input)
status_dense = Dropout(0.2)(status_dense)

# Concatenate the xy and status layers
candsCombined_processed = concatenate([xy_dense, status_dense])

# Define a dense layer for the concatenated xy and status
candsCombined_dense = Dense(64, activation='relu')(candsCombined_processed)
candsCombined_dense = Dropout(0.2)(candsCombined_dense)

# Define the scalar inputs
scalar_inputs = concatenate([momentum_input, refractive_index_input, phi_input, theta_input, energy_input])

# Define a dense layer for the scalar inputs
scalar_dense = Dense(64, activation='relu')(scalar_inputs)
scalar_dense = Dropout(0.2)(scalar_dense)

# Define the position inputs
position_inputs = concatenate([mip_position_input, rad_position_input])

# Define a dense layer for the position inputs
position_dense = Dense(64, activation='relu')(position_inputs)
position_dense = Dropout(0.2)(position_dense)

# Concatenate all the processed inputs
concat = concatenate([candsCombined_dense, scalar_dense, position_dense])

# Define the final fully connected layers
fc1 = Dense(128, activation='relu')(concat)
fc1 = BatchNormalization()(fc1)
fc1 = Dropout(0.1)(fc1)

fc2 = Dense(32, activation='relu')(fc1)
fc2 = BatchNormalization()(fc2)
fc2 = Dropout(0.1)(fc2)

output = Dense(3, activation='softmax')(fc2)  # Predicting mass categories

# Define the model
model = Model(inputs=[candsCombined_xy_input, candsCombined_status_input, momentum_input, refractive_index_input, phi_input, theta_input, energy_input, mip_position_input, rad_position_input], outputs=output)

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0002), loss='categorical_crossentropy', metrics=['accuracy'])

Attributes for group 'Particle0':
Ckov: 0.609717845916748
Energy: 5.904012680053711
Mass: 0.9380000233650208
Momentum: 2.986387252807617
PhiP: -0.4440668523311615
RefractiveIndex: 1.278549075126648
ThetaP: 0.1677367240190506
xPC: 27.119104385375977
xRad: 25.70473289489746
yPC: 130.7153778076172
yRad: 131.38827514648438


KeyError: ignored

In [None]:
# Plot training loss and validation loss
import matplotlib.pyplot as plt
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# Plot training accuracy and validation accuracy
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

In [20]:
from __future__ import print_function
import os
import h5py
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense, concatenate, BatchNormalization, MaxPooling2D, Dropout
from sklearn.model_selection import train_test_split

class ParticleDataUtils:
    class ParticleInfo:
        def __init__(self, momentum, mass, energy, refractiveIndex, ckov, filledBins):
            self.momentum = momentum
            self.mass = mass
            self.energy = energy
            self.refractiveIndex = refractiveIndex
            self.ckov = ckov
            self.filledBins = filledBins
            self.mass_category = self.infer_mass_category(mass)  # Infer mass category based on mass

        @staticmethod
        def infer_mass_category(mass):
            pion_mass = 0.1396
            proton_mass = 0.938
            kaon_mass = 0.4937

            if abs(mass - pion_mass) < 1e-6:
                return "pion"
            elif abs(mass - proton_mass) < 1e-6:
                return "proton"
            elif abs(mass - kaon_mass) < 1e-6:
                return "kaon"
            else:
                return "unknown"

def save_particle_info_to_hdf5(particle_vector, filename):
    with h5py.File(filename, 'w') as file:
        for i, particle in enumerate(particle_vector):
            # Create a group for each particle
            group = file.create_group(f'Particle{i}')

            # Store scalar values
            group.attrs['Momentum'] = particle.momentum
            group.attrs['Mass'] = particle.mass
            group.attrs['Energy'] = particle.energy
            group.attrs['RefractiveIndex'] = particle.refractiveIndex
            group.attrs['Ckov'] = particle.ckov
            group.attrs['MassCategory'] = particle.massCategory  # Save the mass category

            # Store filledBins to HDF5 file
            group.create_dataset("FilledBins", data=np.array(particle.filledBins, dtype=[('x', 'f'), ('y', 'f')]))

def load_particle_info_from_hdf5(filename):
    particle_vector = []

    with h5py.File(filename, 'r') as file:
        for i, group_name in enumerate(file):
            group = file[group_name]

            # Read scalar values
            momentum = group.attrs['Momentum']
            mass = group.attrs['Mass']
            energy = group.attrs['Energy']
            refractiveIndex = group.attrs['RefractiveIndex']
            ckov = group.attrs['Ckov']
            #massCategory = group.attrs['MassCategory']  # Load the mass category

            # Read filledBins
            filled_bins_dataset = group['FilledBins']
            filled_bins_data = filled_bins_dataset[...]  # Retrieve the data as a numpy array

            filled_bins = filled_bins_data.tolist()  # Convert the numpy array to a list

            particle_info = ParticleDataUtils.ParticleInfo(
                momentum, mass, energy, refractiveIndex, ckov, filledBins=filled_bins)

            #particle_info.massCategory = massCategory  # Set the mass category

            particle_vector.append(particle_info)

    return particle_vector


def read_particle_data_from_file(filename="particle.h5"):
    drive_path = '/content/drive/MyDrive/Colab Notebooks/CERN_ML/CNN_PID/test/'  # Update the path to your Google Drive folder
    file_path = os.path.join(drive_path, filename)
    particle_vector = load_particle_info_from_hdf5(file_path)
    return particle_vector

# Example usage
filename = 'ParticleInfo.h5'
particle_vector = read_particle_data_from_file(filename)

# Create an empty list to store the map_data for all particles
map_data_list = []

# Create an empty 2D map
map_shape = (144, 160)
map_data = np.zeros(map_shape)

# Iterate over all particles in the particle_vector
for i, particle in enumerate(particle_vector):
    # Reset the map_data for each particle
    map_data = np.zeros(map_shape)

    for entry in particle.filledBins:
        x = int(round(entry[0]))
        y = int(round(entry[1]))
        # Shift the coordinates to the center
        if 0 <= y < map_shape[0] and 0 <= x < map_shape[1]:
            map_data[y, x] = 1

    # Add the map_data to the list
    map_data_list.append(map_data)

# Convert the map_data_list to a NumPy array
map_data_array = np.array(map_data_list)

# Prepare the training data
X_train_map = map_data_array  # Use the map_data_array as X_train_map
X_train_momentum = np.array([particle.momentum for particle in particle_vector])
X_train_refractive_index = np.array([particle.refractiveIndex for particle in particle_vector])
y_train = np.array([particle.mass_category for particle in particle_vector])

from sklearn.preprocessing import StandardScaler

# create a scaler object
std_scaler = StandardScaler()

# fit and transform the data
X_train_momentum = np.array(X_train_momentum).reshape(-1, 1)
X_train_momentum = std_scaler.fit_transform(X_train_momentum)

X_train_refractive_index = np.array(X_train_refractive_index).reshape(-1, 1)
X_train_refractive_index = std_scaler.fit_transform(X_train_refractive_index)


# # Split the data into train and test sets
# X_train_map, X_test_map, X_train_momentum, X_test_momentum, X_train_refractive_index, X_test_refractive_index, y_train, y_test = train_test_split(
#     X_train_map,
#     X_train_momentum,
#     X_train_refractive_index,
#     y_train,
#     test_size=0.2,
#     random_state=42
# )

# Reshape the input map to include the channel dimension
X_train_map = X_train_map.reshape(X_train_map.shape[0], X_train_map.shape[1], X_train_map.shape[2], 1)
# X_test_map = X_test_map.reshape(X_test_map.shape[0], X_test_map.shape[1], X_test_map.shape[2], 1)

# Define input shapes
map_shape = X_train_map.shape[1:]
momentum_shape = (1,)
refractive_index_shape = (1,)

# Define inputs
map_input = Input(shape=map_shape, name='map_input')
momentum_input = Input(shape=momentum_shape, name='momentum_input')
refractive_index_input = Input(shape=refractive_index_shape, name='refractive_index_input')

# Define convolutional layers for the map input
conv1 = Conv2D(32, (3, 3))(map_input)
conv1 = BatchNormalization()(conv1)
conv1 = tf.keras.activations.relu(conv1)
conv1 = MaxPooling2D((2, 2))(conv1)  # Add max pooling after conv1
conv1 = Dropout(0.2)(conv1)  # Add dropout after max pooling


conv2 = Conv2D(64, (5, 5))(conv1)
conv2 = BatchNormalization()(conv2)
conv2 = tf.keras.activations.relu(conv2)

conv2 = MaxPooling2D((2, 2))(conv2)  # Add max pooling after conv2
conv2 = Dropout(0.2)(conv2)  # Add dropout after max pooling


conv3 = Conv2D(16, (7, 7), activation='relu')(conv2)

conv3 = MaxPooling2D((2, 2))(conv3)  # Add max pooling after conv3
conv3 = Dropout(0.3)(conv3)  # Add dropout after max pooling


# Flatten
flat_map = Flatten()(conv3)

# Concatenate map features with other inputs
concat = concatenate([flat_map, momentum_input, refractive_index_input])

# Define fully connected layers
fc1 = Dense(128)(concat)
fc1 = BatchNormalization()(fc1)
fc1 = tf.keras.activations.relu(fc1)
fc1 = Dropout(0.3)(fc1)  # Add dropout after the first fully connected layer

fc2 = Dense(32)(fc1)
fc2 = BatchNormalization()(fc2)
fc2 = tf.keras.activations.relu(fc2)
fc2 = Dropout(0.3)(fc2)  # Add dropout after the second fully connected layer

output = Dense(3, activation='softmax')(fc2)  # Predicting mass categories

# Define the model
model = Model(inputs=[map_input, momentum_input, refractive_index_input], outputs=output)

# Compile the model
#model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate = 0.0002), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate = 0.0002), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Print the shapes of the input data
print("Dimensions of the input data:")
print("Training:")
print("  X_train_map =", X_train_map.shape)
print("  X_train_momentum =", X_train_momentum.shape)
print("  X_train_refractive_index =", X_train_refractive_index.shape)
print("  y_train =", y_train.shape)

# print("Testing:")
# print("  X_test_map =", X_test_map.shape)
# print("  X_test_momentum =", X_test_momentum.shape)
# print("  X_test_refractive_index =", X_test_refractive_index.shape)
# print("  y_test =", y_test.shape)

print(np.any(X_train_map == None))
print(np.any(X_train_momentum == None))
print(np.any(X_train_refractive_index == None))
print(np.any(y_train == None))

from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
y_train_encoded = encoder.fit_transform(y_train)

from sklearn.model_selection import train_test_split

# Split the data into train and test sets
X_train_map, X_test_map, \
X_train_momentum, X_test_momentum, \
X_train_refractive_index, X_test_refractive_index, \
y_train, y_test = train_test_split(X_train_map, X_train_momentum, X_train_refractive_index, y_train_encoded,
                                   test_size=0.2, random_state=42)

# Train the model
history = model.fit(
    x = [X_train_map, X_train_momentum, X_train_refractive_index],
    y = y_train,
    epochs=20,
    validation_data=([X_test_map, X_test_momentum, X_test_refractive_index], y_test),
    batch_size=32
)


#plot_lr(num_epochs = 20, history = history)


plot_random_element(X_train_map)





KeyError: ignored

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Make predictions on the validation data
y_train_pred = model.predict([X_train_map, X_train_momentum, X_train_refractive_index])

# Convert the predictions from categorical back to original labels
y_train_pred_classes = np.argmax(y_train_pred, axis=1)

# Calculate the confusion matrix
cm = confusion_matrix(y_train, y_train_pred_classes)

# Use seaborn to visualize the confusion matrix
plt.figure(figsize=(10, 7))
sns.heatmap(cm, annot=True, fmt="d")
plt.title('Confusion matrix Training Data')
plt.ylabel('Actual label')
plt.xlabel('Predicted label')
plt.show()


In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Make predictions on the validation data
y_val_pred = model.predict([X_test_map, X_test_momentum, X_test_refractive_index])

# Convert the predictions from categorical back to original labels
y_val_pred_classes = np.argmax(y_val_pred, axis=1)

# Calculate the confusion matrix
cm = confusion_matrix(y_test, y_val_pred_classes)

# Use seaborn to visualize the confusion matrix
plt.figure(figsize=(10, 7))
sns.heatmap(cm, annot=True, fmt="d")
plt.title('Confusion matrix Validation Data')
plt.ylabel('Actual label')
plt.xlabel('Predicted label')
plt.show()


In [19]:
import os
import h5py
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense, concatenate

class ParticleDataUtils:
    class ParticleInfo:
        def __init__(self, momentum, mass, energy, refractiveIndex, ckov, filledBins):
            self.momentum = momentum
            self.mass = mass
            self.energy = energy
            self.refractiveIndex = refractiveIndex
            self.ckov = ckov
            self.filledBins = filledBins

def save_particle_info_to_hdf5(particle_vector, filename):
    with h5py.File(filename, 'w') as file:
        for i, particle in enumerate(particle_vector):
            # Create a group for each particle
            group = file.create_group(f'Particle{i}')

            # Store scalar values
            group.attrs['Momentum'] = particle.momentum
            group.attrs['Mass'] = particle.mass
            group.attrs['Energy'] = particle.energy
            group.attrs['RefractiveIndex'] = particle.refractiveIndex
            group.attrs['Ckov'] = particle.ckov



            # Store filledBins to HDF5 file
            group.create_dataset("FilledBins", data=np.array(particle.filledBins, dtype=[('x', 'f'), ('y', 'f')]))

def load_particle_info_from_hdf5(filename):
    particle_vector = []

    with h5py.File(filename, 'r') as file:
        for i, group_name in enumerate(file):
            group = file[group_name]

            # Read scalar values
            momentum = group.attrs['Momentum']
            mass = group.attrs['Mass']
            energy = group.attrs['Energy']
            refractiveIndex = group.attrs['RefractiveIndex']
            ckov = group.attrs['Ckov']

            # Read filledBins
            filled_bins_dataset = group['FilledBins']
            filled_bins_data = filled_bins_dataset[...]  # Retrieve the data as a numpy array

            filled_bins = filled_bins_data.tolist()  # Convert the numpy array to a list

            particle_info = ParticleDataUtils.ParticleInfo(
                momentum, mass, energy, refractiveIndex, ckov, filledBins=filled_bins)

            particle_vector.append(particle_info)

    return particle_vector

def read_particle_data_from_file(filename="particle.h5"):
    drive_path = '/content/drive/MyDrive/Colab Notebooks/CERN_ML/CNN_PID/test/'  # Update the path to your Google Drive folder
    file_path = os.path.join(drive_path, filename)
    particle_vector = load_particle_info_from_hdf5(file_path)
    return particle_vector

# Example usage
filename = 'ParticleInfo.h5'
particle_vector = read_particle_data_from_file(filename)

# Prepare the training data
X_train_map = [np.array(particle.filledBins) for particle in particle_vector]  # Convert to list of arrays
X_train_momentum = np.array([particle.momentum for particle in particle_vector])
X_train_refractive_index = np.array([particle.refractiveIndex for particle in particle_vector])
y_train = np.array([particle.mass for particle in particle_vector])

# Convert the NumPy arrays to TensorFlow tensors
X_train_map = [tf.convert_to_tensor(arr) for arr in X_train_map]  # Convert to list of TensorFlow tensors
X_train_momentum = tf.convert_to_tensor(X_train_momentum)
X_train_refractive_index = tf.convert_to_tensor(X_train_refractive_index)
y_train = tf.convert_to_tensor(y_train)

# Define input shapes
map_shape = (160, 144, 2)  # 160 x pads [0..159] | 144 y pads [0..143] | 2D bin values
momentum_shape = (1,)  # Scalar input
refractive_index_shape = (1,)  # Scalar input

# Define inputs
map_input = Input(shape=map_shape, name='map_input')
momentum_input = Input(shape=momentum_shape, name='momentum_input')
refractive_index_input = Input(shape=refractive_index_shape, name='refractive_index_input')

# Define convolutional layers for the map input
conv1 = Conv2D(32, (3, 3), activation='relu')(map_input)
conv2 = Conv2D(32, (3, 3), activation='relu')(conv1)
conv3 = Conv2D(32, (3, 3), activation='relu')(conv2)
flat_map = Flatten()(conv3)

# Concatenate map features with other inputs
concat = concatenate([flat_map, momentum_input, refractive_index_input])

# Define fully connected layers
fc1 = Dense(128, activation='relu')(concat)
fc2 = Dense(64, activation='relu')(fc1)
output = Dense(1, activation='linear')(fc2)  # Predicting mass as a scalar value

# Define the model
model = Model(inputs=[map_input, momentum_input, refractive_index_input], outputs=output)

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')


KeyError: ignored