In [7]:
import numpy as np
import matplotlib.pyplot as plt
import boost_histogram as bh
import tensorflow as tf
import sklearn as sk
import pandas as pd
import pyarrow.feather as feather
import sklearn.model_selection as model_selection
import os

class DataLoader:
    def __init__(self, file_name, selected_labels):
        self.file_name = file_name
        self.selected_labels = selected_labels
        self.x_data, self.y_data, self.weight_data = self.import_feather()

    def import_feather(self):
        df = feather.read_feather(self.file_name)
        df = df.set_index(['entry', 'subentry'])

        weight_data = df['Event.Weight'][:,0].values.reshape(-1, 1)
        weight_data = np.vstack([weight_data, weight_data])
        y_data = np.array(weight_data)
        y_data = (y_data > 0).astype(int)

        feature_columns = df.columns.intersection(self.selected_labels).tolist()
        selected_df = df[feature_columns].copy()

        flip_mask = selected_df.columns.str.contains(r'\.PT|\.Eta')
        phi_mask = selected_df.columns.str.contains(r'\.Phi')
        selected_df.loc[:, flip_mask] *= -1 
        selected_df.loc[:, phi_mask] = (selected_df.loc[:, phi_mask] + np.pi) % (2 * np.pi) - np.pi

        Pflipped_df = (selected_df.assign(counter=selected_df.groupby('entry').cumcount())
                       .pivot_table(index='entry', columns=['subentry'], values=feature_columns, dropna=False, fill_value=0))

        selected_df = (selected_df.assign(counter=selected_df.groupby('entry').cumcount())
                       .pivot_table(index='entry', columns=['subentry'], values=feature_columns, dropna=False, fill_value=0))
        x_data = np.concatenate([selected_df.to_numpy(), Pflipped_df.to_numpy()], axis=0)

        print('x_data, y_data, weight_data', x_data.shape, y_data.shape, weight_data.shape)
        return x_data, y_data, weight_data

class CustomKerasClassifier(sk.base.BaseEstimator, sk.base.ClassifierMixin):
    fit_count = 0 
    total_fits = 1

    def __init__(self, input_shape, learning_rate=0.001, neurons=16, layers=2, reg_strength=0.001, epochs=50, patience=2, dropout_rate=0, batch_size=128):
        self.input_shape = input_shape
        self.learning_rate = learning_rate
        self.neurons = neurons
        self.layers = layers
        self.reg_strength = reg_strength
        self.epochs = epochs
        self.patience = patience
        self.batch_size = batch_size
        self.dropout_rate = dropout_rate
        self.model = None

    def build_model(self):
        model = tf.keras.models.Sequential()
        model.add(tf.keras.Input(shape=(self.input_shape,)))

        for _ in range(self.layers-1):
            model.add(tf.keras.layers.Dense(self.neurons, activation='relu'))

        model.add(tf.keras.layers.Dense(self.neurons, kernel_regularizer=tf.keras.regularizers.l2(self.reg_strength)))
        model.add(tf.keras.layers.Dropout(rate=self.dropout_rate))
        model.add(tf.keras.layers.Dense(2, activation='softmax'))
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate),
                      loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        return model

    def fit(self, x_train, y_train, validation_data=(None, None)):
        CustomKerasClassifier.fit_count += 1
        print("\n" f"Running fit {CustomKerasClassifier.fit_count}/{CustomKerasClassifier.total_fits}")

        self.model = self.build_model()
        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss', patience=self.patience, restore_best_weights=True
        )

        history = self.model.fit(
            x_train, y_train, 
            validation_data=validation_data,  
            epochs=self.epochs, 
            batch_size=self.batch_size,
            verbose=2, 
            callbacks=[early_stopping]
        )

        self.history_ = history.history

        return self

    def predict(self, x):
        return self.model.predict(x).argmax(axis=1)

    def score(self, x, y):
        _, accuracy = self.model.evaluate(x, y, verbose=0)
        return accuracy

class MLModel:
    def __init__(self):
        self.model = None

    def data_preprocessing(self, x_data, y_data):
        x_data = sk.preprocessing.StandardScaler().fit_transform(x_data)
        x_train, x_test, y_train, y_test = model_selection.train_test_split(x_data, y_data, test_size=0.2, random_state=42)
        x_validate, x_test, y_validate, y_test = model_selection.train_test_split(x_test, y_test, test_size=0.5, random_state=42)
        
        return x_train, x_test, y_train, y_test, x_validate, y_validate

    def grid_search(self, x_data, y_data, param_grid, model_filename):
        x_train, x_test, y_train, y_test, x_validate, y_validate = self.data_preprocessing(x_data, y_data)
        input_shape = x_train.shape[1]

        CustomKerasClassifier.total_fits = np.prod([len(v) for v in param_grid.values()]) * 2
        CustomKerasClassifier.fit_count = 0

        model = CustomKerasClassifier(input_shape=input_shape)
        grid = model_selection.GridSearchCV(estimator=model, param_grid=param_grid, cv=2, n_jobs=1, verbose=2)
        grid_result = grid.fit(x_train, y_train, validation_data=(x_validate, y_validate))

        print("Best Accuracy: {:.4f} using {}".format(grid_result.best_score_, grid_result.best_params_))
        self.model = grid_result.best_estimator_.model
        self.model.save(model_filename)
        print(self.model.history.history)
        history_df = pd.DataFrame.from_dict(self.model.history.history, orient="columns")
        print("history_df",history_df)
        feather.write_feather(history_df, model_filename.replace(".keras", "_history.feather"))

    def predict(self, x_data):
        x_data = sk.preprocessing.StandardScaler().fit_transform(x_data)
        prediction = self.model.predict(x_data)
        return prediction
    
    def load_ML_model(model_filename):
        ml_model = MLModel()
        ml_model.model = tf.keras.models.load_model(model_filename)
        history_df = feather.read_feather(model_filename.replace(".keras", "_history.feather"))
        
        return ml_model, history_df
    
class Plotter:
    @staticmethod
    def plot_loss(history):
        training_loss = history['loss'] 
        validation_loss = history['val_loss']
        epochs = np.arange(1, len(training_loss) + 1)
        
        plt.figure(figsize=(8, 5))
        plt.plot(epochs, training_loss, marker='x', linestyle='-', color='mediumblue', label='Training')
        plt.plot(epochs, validation_loss, marker='x', linestyle='-', color='red', label='Validation')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.title('Loss Function Over Epochs')
        plt.legend()
        plt.show() 
 
    @staticmethod
    def plot_accuracy(history):
        training_accuracy = history['accuracy'] 
        validation_accuracy = history['val_accuracy']
        epochs = np.arange(1, len(training_accuracy) + 1)

        plt.figure(figsize=(8, 5))
        plt.plot(epochs, training_accuracy, marker='x', linestyle='-', color='mediumblue', label='Training')
        plt.plot(epochs, validation_accuracy, marker='x', linestyle='-', color='red', label='Validation')
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.title('Accuracy Over Epochs')
        plt.legend()
        plt.show()

    @staticmethod
    def chi_squared(observed, expected, uncertainty):
        chi_squared = np.sum(((observed - expected) / uncertainty) ** 2)
        return chi_squared

    @staticmethod        
    def plot_ML_observable(ML_results, weight_data, region, interation):
        scale_factor = 1
        ML_observable = ML_results[:, 1] - ML_results[:, 0]

        bin_number = 20
        range_limit = (-1, 1)

        histogram_0 = bh.Histogram(bh.axis.Regular(bin_number, *range_limit, underflow=False, overflow=False))
        print("ML and Weight shape", ML_observable.shape, weight_data.shape)
        histogram_0.fill(ML_observable, weight=weight_data)

        bin_widths = histogram_0.axes[0].widths
        differential_cross_section_0 = histogram_0.view() / bin_widths

        hist_0_squared = bh.Histogram(histogram_0.axes[0])
        hist_0_squared.fill(ML_observable, weight=weight_data**2)

        uncertainty_0 = np.sqrt(hist_0_squared.view()) / bin_widths

        bin_centers = histogram_0.axes[0].centers
        bin_edges = histogram_0.axes[0].edges

        chi_squared_1 = Plotter.chi_squared(differential_cross_section_0, 0, uncertainty_0)
        print("chi_squared_1 about y=0:", chi_squared_1)

        plt.figure()

        plt.hist(bin_edges[:-1], bins=bin_edges, weights=differential_cross_section_0 /scale_factor, label=r"$O_{NN}$", color='mediumblue', histtype='step')
        plt.errorbar(bin_centers, differential_cross_section_0 / scale_factor, yerr=uncertainty_0 /scale_factor, fmt='None', color='mediumblue')

        plt.title(f'$O_{{NN}}$ for ${region}$ region', fontsize=14)
        plt.xlabel(r'$O_{NN}$', fontsize=14)
        plt.ylabel(r'$d\sigma/dO_{NN}$ [fb]', fontsize=14)
        y_min, y_max = plt.ylim()
        plt.ylim(y_min, y_max + 0.20 * (y_max - y_min))
        plt.xticks(fontsize=12)  
        plt.yticks(fontsize=12)
        plt.legend(frameon=False, fontsize=12)
        plt.savefig(f"O_NN_{interation}_{region}_jj.png", dpi=1000, bbox_inches="tight")
        plt.show()

        fractional_uncertainty_0 = np.divide(
            uncertainty_0, differential_cross_section_0,
            out=np.zeros_like(uncertainty_0),
            where=differential_cross_section_0 != 0
        )

        print("O_NN Fractional Uncertainty:", fractional_uncertainty_0)

In [6]:
def __main__():
    vbs_data = DataLoader("VBS_data_cwtil_2.feather", ['Electron.Eta', 'Electron.Phi', 'Muon.Eta', 'Muon.Phi', 'Jet.Eta', 'Jet.Phi'])
    model_filename = "vbs_lj_eta_phi.keras"

    param_grid = {
            'learning_rate': [0.001, 0.01],
            'neurons': [16, 32, 64],
            'layers': [2, 4],
            'reg_strength': [0, 0.0001, 0.001],
            'patience': [2],
            'batch_size': [128],
            'dropout_rate': [0, 0.2],
        }

    if os.path.exists(model_filename):
        print(f"Loading existing model for {model_filename}")
    else:
        print("No existing model found, training new model")
        vbs_model = MLModel()
        vbs_model.grid_search(vbs_data.x_data, vbs_data.y_data, param_grid, model_filename)
    
    vbs_model, vbs_history = MLModel.load_ML_model(model_filename)

    vbs_results = vbs_model.predict(vbs_data.x_data)
    Plotter.plot_ML_observable(vbs_results, vbs_data.weight_data, "VBS", 2)
    Plotter.plot_loss(vbs_history)
    Plotter.plot_accuracy(vbs_history)

if __name__ == '__main__':
    __main__()

x_data, y_data, weight_data (169076, 72) (169076, 1) (169076, 1)
No existing model found, training new model
Fitting 2 folds for each of 72 candidates, totalling 144 fits

Running fit 1/144

Running fit 1/144

Running fit 1/144

Running fit 1/144

Running fit 1/144

Running fit 1/144

Running fit 1/144

Running fit 1/144


2025-03-04 17:09:36.144942: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.
2025-03-04 17:09:36.144957: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.
2025-03-04 17:09:36.144981: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.
2025-03-04 17:09:36.144994: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.
2025-03-04 17:09:36.144993: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.
2025-03-04 17:09:36.145010: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.
2025-03-04 17:09:36.145096: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] A

Epoch 1/50
Epoch 1/50
Epoch 1/50
Epoch 1/50
Epoch 1/50
Epoch 1/50
Epoch 1/50
Epoch 1/50


2025-03-04 17:09:36.373876: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.


529/529 - 6s - 11ms/step - accuracy: 0.5010 - loss: 0.7025 - val_accuracy: 0.5020 - val_loss: 0.6955
Epoch 2/50
529/529 - 6s - 11ms/step - accuracy: 0.5043 - loss: 0.7185 - val_accuracy: 0.5005 - val_loss: 0.7089
529/529 - 6s - 11ms/step - accuracy: 0.5052 - loss: 0.7010 - val_accuracy: 0.5049 - val_loss: 0.6960
Epoch 2/50
Epoch 2/50
529/529 - 6s - 11ms/step - accuracy: 0.4975 - loss: 0.7065 - val_accuracy: 0.5072 - val_loss: 0.6959
Epoch 2/50
529/529 - 6s - 11ms/step - accuracy: 0.5059 - loss: 0.7025 - val_accuracy: 0.5073 - val_loss: 0.6944
Epoch 2/50
529/529 - 6s - 11ms/step - accuracy: 0.5023 - loss: 0.7000 - val_accuracy: 0.5075 - val_loss: 0.6937
Epoch 2/50
529/529 - 6s - 11ms/step - accuracy: 0.4990 - loss: 0.7125 - val_accuracy: 0.5043 - val_loss: 0.7045
Epoch 2/50
529/529 - 6s - 11ms/step - accuracy: 0.4999 - loss: 0.7002 - val_accuracy: 0.5025 - val_loss: 0.6935
Epoch 2/50
529/529 - 2s - 4ms/step - accuracy: 0.5103 - loss: 0.6942 - val_accuracy: 0.5078 - val_loss: 0.6949
Epoc

2025-03-04 17:10:03.082101: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.


Epoch 1/50
529/529 - 6s - 11ms/step - accuracy: 0.5021 - loss: 0.7039 - val_accuracy: 0.4994 - val_loss: 0.6981
Epoch 2/50
529/529 - 3s - 6ms/step - accuracy: 0.5562 - loss: 0.6874 - val_accuracy: 0.5601 - val_loss: 0.6870
Epoch 9/50
529/529 - 4s - 7ms/step - accuracy: 0.5353 - loss: 0.6926 - val_accuracy: 0.5351 - val_loss: 0.6930
Epoch 7/50
529/529 - 3s - 7ms/step - accuracy: 0.5843 - loss: 0.6753 - val_accuracy: 0.5893 - val_loss: 0.6763
Epoch 10/50
529/529 - 5s - 9ms/step - accuracy: 0.5870 - loss: 0.6739 - val_accuracy: 0.5879 - val_loss: 0.6770
Epoch 8/50
529/529 - 5s - 10ms/step - accuracy: 0.5745 - loss: 0.6792 - val_accuracy: 0.5759 - val_loss: 0.6790
Epoch 9/50
529/529 - 3s - 7ms/step - accuracy: 0.5077 - loss: 0.6975 - val_accuracy: 0.5173 - val_loss: 0.6967
Epoch 3/50
529/529 - 5s - 10ms/step - accuracy: 0.5265 - loss: 0.6920 - val_accuracy: 0.5261 - val_loss: 0.6965
Epoch 9/50
529/529 - 2s - 4ms/step - accuracy: 0.5511 - loss: 0.6896 - val_accuracy: 0.5551 - val_loss: 0.69

2025-03-04 17:10:52.374201: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.


Epoch 1/50
529/529 - 5s - 9ms/step - accuracy: 0.6045 - loss: 0.6636 - val_accuracy: 0.6056 - val_loss: 0.6772
Epoch 12/50
529/529 - 5s - 9ms/step - accuracy: 0.6140 - loss: 0.6474 - val_accuracy: 0.6173 - val_loss: 0.6517


2025-03-04 17:10:52.945615: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.


[CV] END batch_size=128, dropout_rate=0, layers=2, learning_rate=0.001, neurons=16, patience=2, reg_strength=0; total time= 1.3min

Running fit 1/144


2025-03-04 17:10:54.101090: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.


Epoch 1/50
529/529 - 3s - 6ms/step - accuracy: 0.6181 - loss: 0.6466 - val_accuracy: 0.6202 - val_loss: 0.6596
Epoch 16/50
529/529 - 3s - 6ms/step - accuracy: 0.6097 - loss: 0.6548 - val_accuracy: 0.6089 - val_loss: 0.6659
Epoch 24/50
529/529 - 3s - 6ms/step - accuracy: 0.6089 - loss: 0.6585 - val_accuracy: 0.6105 - val_loss: 0.6653
Epoch 13/50
[CV] END batch_size=128, dropout_rate=0, layers=2, learning_rate=0.001, neurons=32, patience=2, reg_strength=0; total time= 1.4min

Running fit 1/144
529/529 - 5s - 9ms/step - accuracy: 0.5033 - loss: 0.7272 - val_accuracy: 0.5082 - val_loss: 0.7140


2025-03-04 17:10:57.342484: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.


Epoch 2/50
Epoch 1/50
529/529 - 2s - 4ms/step - accuracy: 0.6135 - loss: 0.6534 - val_accuracy: 0.6165 - val_loss: 0.6651
Epoch 14/50
529/529 - 5s - 9ms/step - accuracy: 0.5010 - loss: 0.7250 - val_accuracy: 0.5092 - val_loss: 0.7136
Epoch 2/50
529/529 - 5s - 9ms/step - accuracy: 0.6179 - loss: 0.6446 - val_accuracy: 0.6208 - val_loss: 0.6625
529/529 - 5s - 9ms/step - accuracy: 0.6097 - loss: 0.6528 - val_accuracy: 0.6053 - val_loss: 0.6607
Epoch 25/50
529/529 - 3s - 5ms/step - accuracy: 0.6148 - loss: 0.6515 - val_accuracy: 0.6170 - val_loss: 0.6664
Epoch 15/50
529/529 - 2s - 4ms/step - accuracy: 0.5099 - loss: 0.7108 - val_accuracy: 0.5038 - val_loss: 0.7085
Epoch 3/50
[CV] END batch_size=128, dropout_rate=0, layers=2, learning_rate=0.001, neurons=16, patience=2, reg_strength=0.001; total time= 1.4min
529/529 - 5s - 9ms/step - accuracy: 0.5024 - loss: 0.7008 - val_accuracy: 0.5179 - val_loss: 0.6946
Epoch 2/50

Running fit 1/144
529/529 - 5s - 9ms/step - accuracy: 0.5090 - loss: 0.70

2025-03-04 17:11:02.410364: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.


529/529 - 3s - 5ms/step - accuracy: 0.6124 - loss: 0.6524 - val_accuracy: 0.6103 - val_loss: 0.6621
Epoch 26/50
[CV] END batch_size=128, dropout_rate=0, layers=2, learning_rate=0.001, neurons=16, patience=2, reg_strength=0.001; total time= 1.5min

Running fit 1/144
529/529 - 3s - 6ms/step - accuracy: 0.6202 - loss: 0.6489 - val_accuracy: 0.6218 - val_loss: 0.6647
Epoch 16/50
Epoch 1/50
529/529 - 3s - 6ms/step - accuracy: 0.5155 - loss: 0.7050 - val_accuracy: 0.5279 - val_loss: 0.7023
Epoch 4/50
529/529 - 3s - 6ms/step - accuracy: 0.5131 - loss: 0.7039 - val_accuracy: 0.5157 - val_loss: 0.7023
Epoch 4/50
529/529 - 3s - 6ms/step - accuracy: 0.6125 - loss: 0.6512 - val_accuracy: 0.6073 - val_loss: 0.6655
529/529 - 3s - 5ms/step - accuracy: 0.6214 - loss: 0.6464 - val_accuracy: 0.6159 - val_loss: 0.6671
Epoch 17/50
[CV] END batch_size=128, dropout_rate=0, layers=2, learning_rate=0.001, neurons=32, patience=2, reg_strength=0.0001; total time= 1.2min

Running fit 1/144
Epoch 1/50
529/529 - 5

2025-03-04 17:11:16.458292: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.


Epoch 1/50
529/529 - 4s - 7ms/step - accuracy: 0.5134 - loss: 0.7018 - val_accuracy: 0.5157 - val_loss: 0.6996
Epoch 3/50
529/529 - 3s - 5ms/step - accuracy: 0.5734 - loss: 0.6827 - val_accuracy: 0.5771 - val_loss: 0.6800
Epoch 6/50
529/529 - 5s - 9ms/step - accuracy: 0.5345 - loss: 0.6922 - val_accuracy: 0.5496 - val_loss: 0.6879
Epoch 4/50
529/529 - 3s - 5ms/step - accuracy: 0.5716 - loss: 0.6867 - val_accuracy: 0.5872 - val_loss: 0.6839
Epoch 5/50
529/529 - 4s - 8ms/step - accuracy: 0.5612 - loss: 0.6900 - val_accuracy: 0.5603 - val_loss: 0.6925
Epoch 7/50
529/529 - 4s - 8ms/step - accuracy: 0.6219 - loss: 0.6440 - val_accuracy: 0.6197 - val_loss: 0.6687
Epoch 20/50
529/529 - 4s - 8ms/step - accuracy: 0.5761 - loss: 0.6873 - val_accuracy: 0.5834 - val_loss: 0.6855
Epoch 7/50
529/529 - 3s - 6ms/step - accuracy: 0.5889 - loss: 0.6763 - val_accuracy: 0.5942 - val_loss: 0.6751
Epoch 6/50
529/529 - 3s - 6ms/step - accuracy: 0.6232 - loss: 0.6438 - val_accuracy: 0.6192 - val_loss: 0.6634


2025-03-04 17:12:09.453104: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.


529/529 - 5s - 9ms/step - accuracy: 0.4998 - loss: 0.7026 - val_accuracy: 0.5007 - val_loss: 0.6979
Epoch 2/50
529/529 - 5s - 10ms/step - accuracy: 0.5047 - loss: 0.7005 - val_accuracy: 0.5092 - val_loss: 0.6984
Epoch 2/50
529/529 - 3s - 7ms/step - accuracy: 0.5018 - loss: 0.7035 - val_accuracy: 0.5009 - val_loss: 0.7000
529/529 - 6s - 11ms/step - accuracy: 0.6064 - loss: 0.6631 - val_accuracy: 0.6076 - val_loss: 0.6599
Epoch 9/50
[CV] END batch_size=128, dropout_rate=0, layers=2, learning_rate=0.001, neurons=64, patience=2, reg_strength=0.0001; total time= 1.1min

Running fit 1/144
Epoch 1/50
529/529 - 6s - 12ms/step - accuracy: 0.5123 - loss: 0.6944 - val_accuracy: 0.5166 - val_loss: 0.6941
529/529 - 6s - 12ms/step - accuracy: 0.5021 - loss: 0.6966 - val_accuracy: 0.5005 - val_loss: 0.6988
Epoch 3/50
529/529 - 7s - 13ms/step - accuracy: 0.5052 - loss: 0.6972 - val_accuracy: 0.5247 - val_loss: 0.6942
Epoch 3/50
529/529 - 5s - 9ms/step - accuracy: 0.6120 - loss: 0.6568 - val_accuracy: 

2025-03-04 17:12:25.489773: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.


Epoch 1/50
529/529 - 4s - 8ms/step - accuracy: 0.5016 - loss: 0.6982 - val_accuracy: 0.4976 - val_loss: 0.6939
Epoch 5/50
529/529 - 5s - 10ms/step - accuracy: 0.5064 - loss: 0.6936 - val_accuracy: 0.5143 - val_loss: 0.6944
Epoch 5/50
529/529 - 7s - 13ms/step - accuracy: 0.5042 - loss: 0.7067 - val_accuracy: 0.4980 - val_loss: 0.7012
Epoch 2/50
529/529 - 5s - 9ms/step - accuracy: 0.6170 - loss: 0.6526 - val_accuracy: 0.6160 - val_loss: 0.6562
Epoch 12/50
529/529 - 4s - 8ms/step - accuracy: 0.5093 - loss: 0.6938 - val_accuracy: 0.5290 - val_loss: 0.6926
Epoch 3/50
529/529 - 3s - 6ms/step - accuracy: 0.5048 - loss: 0.6942 - val_accuracy: 0.5015 - val_loss: 0.6938
Epoch 6/50
529/529 - 3s - 5ms/step - accuracy: 0.6182 - loss: 0.6492 - val_accuracy: 0.6151 - val_loss: 0.6628
Epoch 13/50
529/529 - 7s - 12ms/step - accuracy: 0.5002 - loss: 0.7018 - val_accuracy: 0.4999 - val_loss: 0.6993
Epoch 2/50
529/529 - 4s - 7ms/step - accuracy: 0.5018 - loss: 0.6963 - val_accuracy: 0.5102 - val_loss: 0.6

2025-03-04 17:12:30.248511: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.


Epoch 1/50
529/529 - 4s - 8ms/step - accuracy: 0.5073 - loss: 0.6971 - val_accuracy: 0.4969 - val_loss: 0.7273
Epoch 6/50
529/529 - 4s - 8ms/step - accuracy: 0.5229 - loss: 0.6927 - val_accuracy: 0.5293 - val_loss: 0.6923
Epoch 4/50
529/529 - 5s - 10ms/step - accuracy: 0.5057 - loss: 0.7035 - val_accuracy: 0.5004 - val_loss: 0.7124
Epoch 2/50
529/529 - 2s - 4ms/step - accuracy: 0.5062 - loss: 0.6938 - val_accuracy: 0.5013 - val_loss: 0.6933
Epoch 4/50
529/529 - 3s - 5ms/step - accuracy: 0.5017 - loss: 0.7072 - val_accuracy: 0.4991 - val_loss: 0.6944
Epoch 7/50
529/529 - 3s - 5ms/step - accuracy: 0.5009 - loss: 0.7285 - val_accuracy: 0.4995 - val_loss: 0.7818
Epoch 3/50
529/529 - 5s - 10ms/step - accuracy: 0.5047 - loss: 0.6934 - val_accuracy: 0.5059 - val_loss: 0.6945
Epoch 7/50
529/529 - 5s - 9ms/step - accuracy: 0.5035 - loss: 0.7152 - val_accuracy: 0.4999 - val_loss: 0.6990
Epoch 3/50
529/529 - 5s - 10ms/step - accuracy: 0.6166 - loss: 0.6501 - val_accuracy: 0.6191 - val_loss: 0.652

2025-03-04 17:12:43.626422: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.


529/529 - 5s - 9ms/step - accuracy: 0.5051 - loss: 0.6936 - val_accuracy: 0.5020 - val_loss: 0.6947
Epoch 9/50
[CV] END batch_size=128, dropout_rate=0, layers=2, learning_rate=0.01, neurons=32, patience=2, reg_strength=0; total time=  18.9s

Running fit 1/144
Epoch 1/50
529/529 - 4s - 8ms/step - accuracy: 0.5191 - loss: 0.6936 - val_accuracy: 0.5253 - val_loss: 0.6918
Epoch 7/50
529/529 - 4s - 7ms/step - accuracy: 0.5043 - loss: 0.7149 - val_accuracy: 0.5009 - val_loss: 0.7134
529/529 - 6s - 11ms/step - accuracy: 0.5578 - loss: 0.6878 - val_accuracy: 0.5687 - val_loss: 0.6872
Epoch 7/50
529/529 - 6s - 11ms/step - accuracy: 0.5041 - loss: 0.6933 - val_accuracy: 0.5011 - val_loss: 0.6934
Epoch 10/50
[CV] END batch_size=128, dropout_rate=0, layers=2, learning_rate=0.01, neurons=16, patience=2, reg_strength=0.0001; total time=  46.6s

Running fit 1/144
Epoch 1/50
529/529 - 8s - 15ms/step - accuracy: 0.5027 - loss: 0.7045 - val_accuracy: 0.5019 - val_loss: 0.7072
Epoch 2/50
529/529 - 7s - 1

2025-03-04 17:12:55.861578: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.


Epoch 1/50
529/529 - 4s - 8ms/step - accuracy: 0.5573 - loss: 0.6879 - val_accuracy: 0.5730 - val_loss: 0.6875
Epoch 9/50
529/529 - 4s - 8ms/step - accuracy: 0.5027 - loss: 0.7064 - val_accuracy: 0.4985 - val_loss: 0.7062
Epoch 3/50
529/529 - 5s - 9ms/step - accuracy: 0.5858 - loss: 0.6772 - val_accuracy: 0.5847 - val_loss: 0.6856
Epoch 9/50
[CV] END batch_size=128, dropout_rate=0, layers=2, learning_rate=0.01, neurons=32, patience=2, reg_strength=0; total time=  36.5s

Running fit 1/144
Epoch 1/50
529/529 - 9s - 16ms/step - accuracy: 0.4972 - loss: 0.7061 - val_accuracy: 0.5203 - val_loss: 0.6986
Epoch 2/50
529/529 - 5s - 10ms/step - accuracy: 0.5051 - loss: 0.6930 - val_accuracy: 0.5000 - val_loss: 0.6932
Epoch 12/50
529/529 - 5s - 9ms/step - accuracy: 0.5734 - loss: 0.6849 - val_accuracy: 0.5911 - val_loss: 0.6805
Epoch 10/50
529/529 - 5s - 9ms/step - accuracy: 0.5024 - loss: 0.7188 - val_accuracy: 0.4963 - val_loss: 0.6987
Epoch 4/50
529/529 - 8s - 16ms/step - accuracy: 0.4989 - lo

2025-03-04 17:13:18.974186: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.


Epoch 1/50
529/529 - 4s - 8ms/step - accuracy: 0.4995 - loss: 0.7468 - val_accuracy: 0.5000 - val_loss: 0.7552
529/529 - 4s - 8ms/step - accuracy: 0.5973 - loss: 0.6715 - val_accuracy: 0.6029 - val_loss: 0.6711
Epoch 15/50
[CV] END batch_size=128, dropout_rate=0, layers=2, learning_rate=0.01, neurons=64, patience=2, reg_strength=0; total time=  24.7s

Running fit 1/144
Epoch 1/50
529/529 - 3s - 5ms/step - accuracy: 0.5957 - loss: 0.6716 - val_accuracy: 0.6019 - val_loss: 0.6715
Epoch 16/50
[CV] END batch_size=128, dropout_rate=0, layers=2, learning_rate=0.01, neurons=32, patience=2, reg_strength=0.001; total time=  32.3s

Running fit 1/144
Epoch 1/50


2025-03-04 17:13:23.463965: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.


529/529 - 5s - 10ms/step - accuracy: 0.5023 - loss: 0.7296 - val_accuracy: 0.5016 - val_loss: 0.7205
Epoch 2/50
529/529 - 5s - 10ms/step - accuracy: 0.5048 - loss: 0.7398 - val_accuracy: 0.4965 - val_loss: 0.8322
Epoch 2/50
529/529 - 3s - 7ms/step - accuracy: 0.6003 - loss: 0.6687 - val_accuracy: 0.6096 - val_loss: 0.6711
Epoch 17/50
[CV] END batch_size=128, dropout_rate=0, layers=2, learning_rate=0.01, neurons=32, patience=2, reg_strength=0.001; total time=  32.5s

Running fit 1/144
[CV] END batch_size=128, dropout_rate=0, layers=2, learning_rate=0.01, neurons=64, patience=2, reg_strength=0; total time=  26.6s

Running fit 1/144
Epoch 1/50
529/529 - 6s - 11ms/step - accuracy: 0.5022 - loss: 0.7134 - val_accuracy: 0.4974 - val_loss: 0.7310
Epoch 2/50
Epoch 1/50
529/529 - 3s - 5ms/step - accuracy: 0.5026 - loss: 0.7709 - val_accuracy: 0.5000 - val_loss: 0.7179
Epoch 3/50
[CV] END batch_size=128, dropout_rate=0, layers=2, learning_rate=0.01, neurons=32, patience=2, reg_strength=0.0001; t

2025-03-04 17:13:59.141257: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.


Epoch 1/50
529/529 - 3s - 6ms/step - accuracy: 0.5453 - loss: 0.6877 - val_accuracy: 0.5532 - val_loss: 0.6860
Epoch 5/50
529/529 - 5s - 9ms/step - accuracy: 0.6059 - loss: 0.6529 - val_accuracy: 0.6079 - val_loss: 0.6530
Epoch 8/50
529/529 - 8s - 16ms/step - accuracy: 0.4989 - loss: 0.7076 - val_accuracy: 0.5076 - val_loss: 0.7011
Epoch 2/50
529/529 - 5s - 10ms/step - accuracy: 0.6027 - loss: 0.6536 - val_accuracy: 0.6118 - val_loss: 0.6477
Epoch 8/50
529/529 - 5s - 10ms/step - accuracy: 0.6011 - loss: 0.6580 - val_accuracy: 0.6025 - val_loss: 0.6600
Epoch 8/50
529/529 - 7s - 13ms/step - accuracy: 0.5077 - loss: 0.7030 - val_accuracy: 0.5055 - val_loss: 0.6987
Epoch 2/50
529/529 - 3s - 6ms/step - accuracy: 0.6135 - loss: 0.6459 - val_accuracy: 0.6176 - val_loss: 0.6435
Epoch 9/50
529/529 - 3s - 6ms/step - accuracy: 0.6089 - loss: 0.6530 - val_accuracy: 0.6067 - val_loss: 0.6540
Epoch 9/50
529/529 - 5s - 10ms/step - accuracy: 0.5701 - loss: 0.6777 - val_accuracy: 0.5793 - val_loss: 0.6

2025-03-04 17:15:45.166435: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.


529/529 - 4s - 7ms/step - accuracy: 0.6879 - loss: 0.5906 - val_accuracy: 0.6863 - val_loss: 0.6026
Epoch 27/50
[CV] END batch_size=128, dropout_rate=0, layers=4, learning_rate=0.001, neurons=16, patience=2, reg_strength=0.0001; total time= 2.1min

Running fit 1/144
Epoch 1/50
529/529 - 7s - 12ms/step - accuracy: 0.7049 - loss: 0.5706 - val_accuracy: 0.6932 - val_loss: 0.5891
Epoch 14/50
529/529 - 7s - 13ms/step - accuracy: 0.6844 - loss: 0.5904 - val_accuracy: 0.6793 - val_loss: 0.5971
Epoch 25/50
529/529 - 6s - 12ms/step - accuracy: 0.6318 - loss: 0.6314 - val_accuracy: 0.6342 - val_loss: 0.6347
Epoch 8/50
529/529 - 10s - 18ms/step - accuracy: 0.5065 - loss: 0.7104 - val_accuracy: 0.5195 - val_loss: 0.7007
Epoch 2/50
529/529 - 4s - 8ms/step - accuracy: 0.7106 - loss: 0.5665 - val_accuracy: 0.6929 - val_loss: 0.5907
Epoch 15/50
529/529 - 6s - 11ms/step - accuracy: 0.6885 - loss: 0.5889 - val_accuracy: 0.6921 - val_loss: 0.5938
Epoch 28/50
529/529 - 4s - 7ms/step - accuracy: 0.6853 - l

2025-03-04 17:15:56.340920: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.


Epoch 1/50
529/529 - 5s - 9ms/step - accuracy: 0.6871 - loss: 0.5885 - val_accuracy: 0.6850 - val_loss: 0.5912
Epoch 27/50
529/529 - 5s - 10ms/step - accuracy: 0.6914 - loss: 0.5863 - val_accuracy: 0.6911 - val_loss: 0.5914
Epoch 29/50
529/529 - 10s - 20ms/step - accuracy: 0.5110 - loss: 0.6947 - val_accuracy: 0.5453 - val_loss: 0.6891
Epoch 2/50
529/529 - 5s - 10ms/step - accuracy: 0.5443 - loss: 0.6956 - val_accuracy: 0.5643 - val_loss: 0.6880
Epoch 3/50
529/529 - 9s - 18ms/step - accuracy: 0.5463 - loss: 0.6915 - val_accuracy: 0.5738 - val_loss: 0.6786
Epoch 3/50
529/529 - 5s - 10ms/step - accuracy: 0.6487 - loss: 0.6155 - val_accuracy: 0.6509 - val_loss: 0.6214
Epoch 10/50
529/529 - 4s - 8ms/step - accuracy: 0.7158 - loss: 0.5594 - val_accuracy: 0.7022 - val_loss: 0.5825
Epoch 17/50
529/529 - 4s - 7ms/step - accuracy: 0.6921 - loss: 0.5857 - val_accuracy: 0.6948 - val_loss: 0.5912
Epoch 30/50
529/529 - 4s - 8ms/step - accuracy: 0.5665 - loss: 0.6781 - val_accuracy: 0.5958 - val_los

2025-03-04 17:16:35.953433: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.


Epoch 1/50
529/529 - 4s - 8ms/step - accuracy: 0.6984 - loss: 0.5720 - val_accuracy: 0.6905 - val_loss: 0.5872
Epoch 9/50
529/529 - 6s - 11ms/step - accuracy: 0.7262 - loss: 0.5439 - val_accuracy: 0.7098 - val_loss: 0.5654
Epoch 10/50
529/529 - 6s - 11ms/step - accuracy: 0.6796 - loss: 0.6009 - val_accuracy: 0.6793 - val_loss: 0.6036
Epoch 10/50
529/529 - 6s - 11ms/step - accuracy: 0.6868 - loss: 0.5920 - val_accuracy: 0.6841 - val_loss: 0.6009
Epoch 11/50
529/529 - 4s - 7ms/step - accuracy: 0.7144 - loss: 0.5593 - val_accuracy: 0.7023 - val_loss: 0.5818
Epoch 19/50
529/529 - 3s - 6ms/step - accuracy: 0.6904 - loss: 0.5919 - val_accuracy: 0.6842 - val_loss: 0.5995
Epoch 11/50
529/529 - 3s - 6ms/step - accuracy: 0.7112 - loss: 0.5599 - val_accuracy: 0.6942 - val_loss: 0.5816
Epoch 10/50
529/529 - 5s - 9ms/step - accuracy: 0.7300 - loss: 0.5379 - val_accuracy: 0.7238 - val_loss: 0.5499
Epoch 11/50
529/529 - 8s - 16ms/step - accuracy: 0.5110 - loss: 0.6991 - val_accuracy: 0.5266 - val_los

2025-03-04 17:17:31.563212: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.


Epoch 1/50
529/529 - 5s - 9ms/step - accuracy: 0.5011 - loss: 0.6934 - val_accuracy: 0.4999 - val_loss: 0.6942
Epoch 5/50
529/529 - 6s - 11ms/step - accuracy: 0.7300 - loss: 0.5405 - val_accuracy: 0.7075 - val_loss: 0.5762
Epoch 13/50
529/529 - 3s - 6ms/step - accuracy: 0.6766 - loss: 0.5997 - val_accuracy: 0.6762 - val_loss: 0.6048
Epoch 8/50
529/529 - 5s - 10ms/step - accuracy: 0.7358 - loss: 0.5339 - val_accuracy: 0.7074 - val_loss: 0.5736
Epoch 14/50
529/529 - 3s - 6ms/step - accuracy: 0.7258 - loss: 0.5486 - val_accuracy: 0.7042 - val_loss: 0.5832
529/529 - 3s - 6ms/step - accuracy: 0.7327 - loss: 0.5403 - val_accuracy: 0.7147 - val_loss: 0.5701
Epoch 25/50
529/529 - 3s - 6ms/step - accuracy: 0.7349 - loss: 0.5341 - val_accuracy: 0.7105 - val_loss: 0.5707
Epoch 14/50
529/529 - 3s - 6ms/step - accuracy: 0.7386 - loss: 0.5297 - val_accuracy: 0.7136 - val_loss: 0.5659
Epoch 15/50
529/529 - 3s - 6ms/step - accuracy: 0.7331 - loss: 0.5390 - val_accuracy: 0.7189 - val_loss: 0.5597
529/5

2025-03-04 17:18:24.382218: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.


529/529 - 4s - 8ms/step - accuracy: 0.4990 - loss: 0.6936 - val_accuracy: 0.4989 - val_loss: 0.6933
Epoch 6/50
529/529 - 3s - 5ms/step - accuracy: 0.5793 - loss: 0.6687 - val_accuracy: 0.5894 - val_loss: 0.6626
Epoch 10/50
529/529 - 5s - 9ms/step - accuracy: 0.6534 - loss: 0.6217 - val_accuracy: 0.6494 - val_loss: 0.6229
Epoch 14/50
529/529 - 3s - 5ms/step - accuracy: 0.6274 - loss: 0.6375 - val_accuracy: 0.6434 - val_loss: 0.6322
Epoch 13/50
529/529 - 3s - 5ms/step - accuracy: 0.5051 - loss: 0.6930 - val_accuracy: 0.5143 - val_loss: 0.6893
Epoch 4/50
529/529 - 3s - 5ms/step - accuracy: 0.5845 - loss: 0.6646 - val_accuracy: 0.5964 - val_loss: 0.6587
Epoch 11/50
529/529 - 5s - 9ms/step - accuracy: 0.5714 - loss: 0.6639 - val_accuracy: 0.5724 - val_loss: 0.6616
Epoch 13/50
529/529 - 3s - 5ms/step - accuracy: 0.6457 - loss: 0.6281 - val_accuracy: 0.6439 - val_loss: 0.6290
Epoch 14/50
[CV] END batch_size=128, dropout_rate=0, layers=4, learning_rate=0.001, neurons=64, patience=2, reg_streng

2025-03-04 17:18:34.221052: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.
2025-03-04 17:18:34.244814: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.
2025-03-04 17:18:34.280987: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19477440 exceeds 10% of free system memory.


Epoch 1/50
Epoch 1/50

Running fit 1/144
Epoch 1/50


KeyboardInterrupt: 

In [8]:
def __main__():
    vbs_data = DataLoader("VBS_data_cwtil_2.feather", ['Electron.Eta', 'Electron.Phi', 'Muon.Eta', 'Muon.Phi'])
    model_filename = "vbs_l_eta_phi.keras"

    param_grid = {
            'learning_rate': [0.001, 0.005, 0.01],
            'neurons': [16, 32, 64],
            'layers': [2, 4],
            'reg_strength': [0, 0.0001, 0.001],
            'patience': [2],
            'batch_size': [128],
            'dropout_rate': [0],
        }

    if os.path.exists(model_filename):
        print(f"Loading existing model for {model_filename}")
    else:
        print("No existing model found, training new model")
        vbs_model = MLModel()
        vbs_model.grid_search(vbs_data.x_data, vbs_data.y_data, param_grid, model_filename)
    
    vbs_model, vbs_history = MLModel.load_ML_model(model_filename)

    vbs_results = vbs_model.predict(vbs_data.x_data)
    Plotter.plot_ML_observable(vbs_results, vbs_data.weight_data, "VBS", 2)
    Plotter.plot_loss(vbs_history)
    Plotter.plot_accuracy(vbs_history)

if __name__ == '__main__':
    __main__()

Data shapes:  (84538, 48) (84538, 1) (84538, 1)
No existing model found, training new model
Fitting 2 folds for each of 54 candidates, totalling 108 fits

Running fit 1/108

Running fit 1/108

Running fit 1/108

Running fit 1/108
Epoch 1/50
Epoch 1/50
Epoch 1/50
Epoch 1/50
265/265 - 3s - 12ms/step - accuracy: 0.5006 - loss: 0.6957 - val_accuracy: 0.5048 - val_loss: 0.6962
Epoch 2/50
265/265 - 3s - 12ms/step - accuracy: 0.5036 - loss: 0.6959 - val_accuracy: 0.5012 - val_loss: 0.6955
Epoch 2/50
265/265 - 3s - 12ms/step - accuracy: 0.5018 - loss: 0.6986 - val_accuracy: 0.5047 - val_loss: 0.6969
Epoch 2/50
265/265 - 3s - 12ms/step - accuracy: 0.4981 - loss: 0.7019 - val_accuracy: 0.5011 - val_loss: 0.6963
Epoch 2/50
265/265 - 1s - 4ms/step - accuracy: 0.5064 - loss: 0.6946 - val_accuracy: 0.4995 - val_loss: 0.6957
Epoch 3/50
265/265 - 1s - 4ms/step - accuracy: 0.5055 - loss: 0.6940 - val_accuracy: 0.5014 - val_loss: 0.6944
Epoch 3/50
265/265 - 1s - 4ms/step - accuracy: 0.5021 - loss: 0.696

KeyboardInterrupt: 