### Test a GNN example before trying it on your own data

an example from https://uvadlc-notebooks.readthedocs.io/en/latest/tutorial_notebooks/tutorial7/GNN_overview.html

In [2]:
import torch
from torch_geometric.data import Data

In [1]:
from torch_geometric.datasets import TUDataset

In [9]:
tu_dataset = TUDataset(root='/tmp/MUTAG', name='MUTAG')

Downloading https://www.chrsmrrs.com/graphkerneldatasets/MUTAG.zip
Processing...
Done!


In [10]:
print("Data object:", tu_dataset.data)
print("Length:", len(tu_dataset))
print(f"Average label: {tu_dataset.data.y.float().mean().item():4.2f}")

Data object: Data(x=[3371, 7], edge_index=[2, 7442], edge_attr=[7442, 4], y=[188])
Length: 188
Average label: 0.66




In [11]:
torch.manual_seed(42)
tu_dataset.shuffle()
train_dataset = tu_dataset[:150]
test_dataset = tu_dataset[150:]

The standard approach to handling GNN of data with many graphs (rather than e.g. one single big graph) is to put them all together in a big tensor except without any connections between unrelated graphs (see tutorial for explanation)

In [13]:
import torch_geometric.data as geom_data
graph_train_loader = geom_data.DataLoader(train_dataset, batch_size=64, shuffle=True)
graph_val_loader = geom_data.DataLoader(test_dataset, batch_size=64) # Additional loader if you want to change to a larger dataset
graph_test_loader = geom_data.DataLoader(test_dataset, batch_size=64)



In [14]:
batch = next(iter(graph_test_loader))
print("Batch:", batch)
print("Labels:", batch.y[:10])
print("Batch indices:", batch.batch[:40])

Batch: DataBatch(edge_index=[2, 1512], x=[687, 7], edge_attr=[1512, 4], y=[38], batch=[687], ptr=[39])
Labels: tensor([1, 1, 1, 0, 0, 0, 1, 1, 1, 0])
Batch indices: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2])


In [20]:
import torch.nn as nn
import torch_geometric.nn as geom_nn
import torch.optim as optim
import pytorch_lightning as pl

class GNNModel(nn.Module):

    def __init__(self, c_in, c_hidden, c_out, num_layers=2, layer_name="GCN", dp_rate=0.1, **kwargs):
        """
        Inputs:
            c_in - Dimension of input features
            c_hidden - Dimension of hidden features
            c_out - Dimension of the output features. Usually number of classes in classification
            num_layers - Number of "hidden" graph layers
            layer_name - String of the graph layer to use
            dp_rate - Dropout rate to apply throughout the network
            kwargs - Additional arguments for the graph layer (e.g. number of heads for GAT)
        """
        super().__init__()
        gnn_layer = gnn_layer_by_name[layer_name]

        layers = []
        in_channels, out_channels = c_in, c_hidden
        for l_idx in range(num_layers-1):
            layers += [
                gnn_layer(in_channels=in_channels,
                          out_channels=out_channels,
                          **kwargs),
                nn.ReLU(inplace=True),
                nn.Dropout(dp_rate)
            ]
            in_channels = c_hidden
        layers += [gnn_layer(in_channels=in_channels,
                             out_channels=c_out,
                             **kwargs)]
        self.layers = nn.ModuleList(layers)

    def forward(self, x, edge_index):
        """
        Inputs:
            x - Input features per node
            edge_index - List of vertex index pairs representing the edges in the graph (PyTorch geometric notation)
        """
        for l in self.layers:
            # For graph layers, we need to add the "edge_index" tensor as additional input
            # All PyTorch Geometric graph layer inherit the class "MessagePassing", hence
            # we can simply check the class type.
            if isinstance(l, geom_nn.MessagePassing):
                x = l(x, edge_index)
            else:
                x = l(x)
        return x

class GraphGNNModel(nn.Module):

    def __init__(self, c_in, c_hidden, c_out, dp_rate_linear=0.5, **kwargs):
        """
        Inputs:
            c_in - Dimension of input features
            c_hidden - Dimension of hidden features
            c_out - Dimension of output features (usually number of classes)
            dp_rate_linear - Dropout rate before the linear layer (usually much higher than inside the GNN)
            kwargs - Additional arguments for the GNNModel object
        """
        super().__init__()
        self.GNN = GNNModel(c_in=c_in,
                            c_hidden=c_hidden,
                            c_out=c_hidden, # Not our prediction output yet!
                            **kwargs)
        self.head = nn.Sequential(
            nn.Dropout(dp_rate_linear),
            nn.Linear(c_hidden, c_out)
        )

    def forward(self, x, edge_index, batch_idx):
        """
        Inputs:
            x - Input features per node
            edge_index - List of vertex index pairs representing the edges in the graph (PyTorch geometric notation)
            batch_idx - Index of batch element for each node
        """
        x = self.GNN(x, edge_index)
        x = geom_nn.global_mean_pool(x, batch_idx) # Average pooling
        x = self.head(x)
        return x

In [21]:
class GraphLevelGNN(pl.LightningModule):

    def __init__(self, **model_kwargs):
        super().__init__()
        # Saving hyperparameters
        self.save_hyperparameters()

        self.model = GraphGNNModel(**model_kwargs)
        self.loss_module = nn.BCEWithLogitsLoss() if self.hparams.c_out == 1 else nn.CrossEntropyLoss()

    def forward(self, data, mode="train"):
        x, edge_index, batch_idx = data.x, data.edge_index, data.batch
        x = self.model(x, edge_index, batch_idx)
        x = x.squeeze(dim=-1)

        if self.hparams.c_out == 1:
            preds = (x > 0).float()
            data.y = data.y.float()
        else:
            preds = x.argmax(dim=-1)
        loss = self.loss_module(x, data.y)
        acc = (preds == data.y).sum().float() / preds.shape[0]
        return loss, acc

    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=1e-2, weight_decay=0.0) # High lr because of small dataset and small model
        return optimizer

    def training_step(self, batch, batch_idx):
        loss, acc = self.forward(batch, mode="train")
        self.log('train_loss', loss)
        self.log('train_acc', acc)
        return loss

    def validation_step(self, batch, batch_idx):
        _, acc = self.forward(batch, mode="val")
        self.log('val_acc', acc)

    def test_step(self, batch, batch_idx):
        _, acc = self.forward(batch, mode="test")
        self.log('test_acc', acc)

In [24]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print(device)

cpu


In [25]:
import os
CHECKPOINT_PATH = "../saved_models/tutorial7"
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint

def train_graph_classifier(model_name, **model_kwargs):
    pl.seed_everything(42)

    # Create a PyTorch Lightning trainer with the generation callback
    root_dir = os.path.join(CHECKPOINT_PATH, "GraphLevel" + model_name)
    os.makedirs(root_dir, exist_ok=True)
    trainer = pl.Trainer(default_root_dir=root_dir,
                         callbacks=[ModelCheckpoint(save_weights_only=True, mode="max", monitor="val_acc")],
                         accelerator="gpu" if str(device).startswith("cuda") else "cpu",
                         devices=1,
                         max_epochs=500,
                         enable_progress_bar=False)
    trainer.logger._default_hp_metric = None # Optional logging argument that we don't need

    # Check whether pretrained model exists. If yes, load it and skip training
    pretrained_filename = os.path.join(CHECKPOINT_PATH, f"GraphLevel{model_name}.ckpt")
    if os.path.isfile(pretrained_filename):
        print("Found pretrained model, loading...")
        model = GraphLevelGNN.load_from_checkpoint(pretrained_filename)
    else:
        pl.seed_everything(42)
        model = GraphLevelGNN(c_in=tu_dataset.num_node_features,
                              c_out=1 if tu_dataset.num_classes==2 else tu_dataset.num_classes,
                              **model_kwargs)
        trainer.fit(model, graph_train_loader, graph_val_loader)
        model = GraphLevelGNN.load_from_checkpoint(trainer.checkpoint_callback.best_model_path)
    # Test best model on validation and test set
    train_result = trainer.test(model, graph_train_loader, verbose=False)
    test_result = trainer.test(model, graph_test_loader, verbose=False)
    result = {"test": test_result[0]['test_acc'], "train": train_result[0]['test_acc']}
    return model, result

In [30]:
tu_dataset.num_classes

2

In [27]:
gnn_layer_by_name = {
    "GCN": geom_nn.GCNConv,
    "GAT": geom_nn.GATConv,
    "GraphConv": geom_nn.GraphConv
}

In [28]:
model, result = train_graph_classifier(model_name="GraphConv",
                                       c_hidden=256,
                                       layer_name="GraphConv",
                                       num_layers=3,
                                       dp_rate_linear=0.5,
                                       dp_rate=0.0)

Seed set to 42
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Seed set to 42
Missing logger folder: ../saved_models/tutorial7/GraphLevelGraphConv/lightning_logs

  | Name        | Type              | Params
--------------------------------------------------
0 | model       | GraphGNNModel     | 266 K 
1 | loss_module | BCEWithLogitsLoss | 0     
--------------------------------------------------
266 K     Trainable params
0         Non-trainable params
266 K     Total params
1.067     Total estimated model params size (MB)
/opt/homebrew/Caskroom/miniforge/base/envs/tensorflow/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=9` in the `DataLoader` to improve performance.
/opt/homebrew/Caskroom/m

In [29]:
print(f"Train performance: {100.0*result['train']:4.2f}%")
print(f"Test performance:  {100.0*result['test']:4.2f}%")

Train performance: 92.76%
Test performance:  92.11%


In [1]:
import numpy as np
from random import shuffle
from functools import reduce

import csv
import pickle

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, ShuffleSplit
from sklearn.utils import shuffle
# from sklearn.utils import compute_class_weight

import tensorflow as tf
# from keras import layers, regularizers
#layers = tf.keras.layers
from tensorflow.keras import layers, models
regularizers = tf.keras.regularizers

In [2]:
def read_files( filenames ):

    params = []
    flagged = []
    event_number = []
    for filename in filenames:
        with open(filename, 'r') as f:
            reader = csv.reader(f)
            first_event=True
            event_matrix = []
            for line in reader:
                splitline = line[0].split(' ')
                if splitline[0]=='E':
                    if not first_event: params.append(event_matrix)
                    if first_event: first_event=False
                    event_number.append(int(splitline[1]))
                    flagged.append(int(splitline[2]))
                    event_matrix = []
                else:
                    param_line = [float(s) for s in line]
                    event_matrix.append(param_line)
            params.append(event_matrix)

    flagged = np.array(flagged, dtype=np.float64)
    event_number = np.array(event_number, dtype=np.float64)
    # zero-pad params
    params_max_size = max(map(len, params))
    params = np.array(list(map(lambda mat: np.pad(np.array(mat, dtype=np.float64), ((0, params_max_size - len(mat)), (0, 0))), params))).reshape((-1, params_max_size, 1, 1, 4))

    return (params,flagged,event_number)

In [3]:
filenames = ['data/nodecay/jets_parton_nodecay_'+str(i)+'.dat' for i in range(1,11)]
(params,flagged,event_number) = read_files(filenames)
filenames = ['data/nodecay/jets_hadron_nodecay_'+str(i)+'.dat' for i in range(1,11)]
(params_hadron,flagged_hadron,event_number_hadron) = read_files(filenames)

In [4]:
params = params_hadron
flagged = flagged_hadron

Data has extreme class imbalance. Subsample the majority class to obtain a balance data set for training

In [5]:
is_flagged = flagged==1
percentage = np.sum(is_flagged) / len(flagged)
print('Percentage of flagged c-cbar events: '+str(percentage))

Percentage of flagged c-cbar events: 0.06374269005847953


In [6]:
flagged_params = params[ is_flagged ]
unflagged_params = params[ ~is_flagged ]

# downsample the unflagged (majority) class to be equal in length to the flagged (minority) class
unflagged_indices = np.random.choice(unflagged_params.shape[0], size=len(flagged_params), replace=False)
to_keep = unflagged_params[ unflagged_indices ]

# re-append flagged and unflagged samples together
keep_params = np.concatenate( (flagged_params, to_keep) )
keep_flagged = np.concatenate( (np.ones(len(flagged_params)), np.zeros(len(to_keep))) )
X_orig, y_orig = shuffle( keep_params, keep_flagged, random_state=0 ) # shuffle so that flagged and unflagged classes are all mixed together

In [7]:
X_orig

array([[[[[ 1.65831e-03,  5.87835e-03,  3.52590e-01,  2.11000e+02]]],


        [[[ 8.53011e-03,  2.75912e-01,  2.35697e-01, -2.11000e+02]]],


        [[[ 2.83377e-02,  2.03387e-01, -1.42485e-01, -2.11000e+02]]],


        ...,


        [[[ 0.00000e+00,  0.00000e+00,  0.00000e+00,  0.00000e+00]]],


        [[[ 0.00000e+00,  0.00000e+00,  0.00000e+00,  0.00000e+00]]],


        [[[ 0.00000e+00,  0.00000e+00,  0.00000e+00,  0.00000e+00]]]],



       [[[[ 4.23723e-03,  1.31423e-01, -3.63327e-01,  2.11000e+02]]],


        [[[ 3.96604e-03,  3.80450e-01, -8.68833e-02, -2.11000e+02]]],


        [[[ 5.87365e-03,  3.41434e-01, -8.71167e-02, -2.11000e+02]]],


        ...,


        [[[ 0.00000e+00,  0.00000e+00,  0.00000e+00,  0.00000e+00]]],


        [[[ 0.00000e+00,  0.00000e+00,  0.00000e+00,  0.00000e+00]]],


        [[[ 0.00000e+00,  0.00000e+00,  0.00000e+00,  0.00000e+00]]]],



       [[[[ 3.98320e-03, -2.95808e-01,  1.30156e-01,  2.20000e+01]]],


        [[[ 1.07891e-03,  2.92

In [170]:
def category_encoding_layer(pdgs, max_tokens=None):
  
  # Create a layer that turns integers into indices.
  index = layers.IntegerLookup(max_tokens=max_tokens)

  # Learn the set of possible values and assign them a fixed integer index.
  index.adapt( pdgs )

  # Encode the integer indices.
  encoder = layers.CategoryEncoding(num_tokens=index.vocabulary_size(), output_mode="one_hot")

  return lambda feature: encoder(index(feature))

def do_category_encoding(dataset, category_index):

    encoding_layer = category_encoding_layer( dataset[:,:,:,:,category_index] )
    encoded_pdgs = np.asarray( encoding_layer( dataset[:,:,:,:,category_index] ) )
    mask = np.ones( dataset.shape[4], bool)
    mask[category_index] = False
    dataset = np.concatenate( (dataset[:,:,:,:,mask], encoded_pdgs), axis=4 )
    return dataset

The most useful piece of information for understanding whether there is a c-cbar splitting inside of a jet is whether there is a c-cbar pair inside of the jet, but this information is often not accessible experimentally. As a test, we remove this information (either replacing charms with gluons or light quarks) and retrain the model to see how this impacts the performance

In [171]:
def replace_charms(pdgs, replacement):

    assert replacement=='gluons' or replacement=='light quarks'

    mask = np.isin(pdgs,[4,-4])

    # replace with the id of gluons (21)
    if replacement=='gluons':
        pdgs[ mask ] = 21

    # replace with up quarks (1)
    elif replacement=='light quarks':
        pdgs[ mask ] = np.sign( pdgs[mask] )

Make a copy of the original data, and then create two modifications for testing - either the charms are replaced by gluons, or by light (up) quarks

In [172]:
category_index = 3 # the only category in this dataset is the particle id, which is at index 3

X_orig_encoded = do_category_encoding(X_orig, category_index)
x_train_orig, x_test_orig, y_train_orig, y_test_orig = train_test_split(X_orig_encoded, y_orig, test_size = .2, random_state = 0)

# X = X_orig.copy()
# replace_charms(X[:,:,:,:,category_index], 'light quarks')
# X_encoded = do_category_encoding(X, category_index)
# x_train_light, x_test_light, y_train_light, y_test_light = train_test_split(X_encoded, y_orig, test_size = .2, random_state = 0)
# test_light = X[:,:,:,:,category_index].flatten()

# X2 = X_orig.copy()
# replace_charms(X2[:,:,:,:,category_index], 'gluons')
# X_encoded = do_category_encoding(X2, category_index)
# x_train_gluon, x_test_gluon, y_train_gluon, y_test_gluon = train_test_split(X_encoded, y_orig, test_size = .2, random_state = 0)
# test_gluon = X2[:,:,:,:,category_index].flatten()

Make the model

In [173]:
class ReductionLayer(layers.Layer):
    def __init__(self, name):
        super(ReductionLayer, self).__init__(name=name)

    def compute_mask(self, inputs, mask=None):
        return mask

    def call(self, inputs):
        return tf.reduce_sum(inputs, axis=[1, 2, 3])


def get_clf(
    meta={},
    hidden_layer_size=100,
    observable_num=128,
    activation='leaky_relu',
    dropout=0.2,
    kernel_reg=1e-5,
    bias_reg=1e-5,
    activity_reg=1e-5,
    shape=x_train_orig.shape[1:]
):
    inputs = layers.Input(
        shape=shape,
        name="input",
    )
    masked_inputs = layers.Masking(mask_value=0.0, name="masking")(inputs)
    dense_1 = layers.TimeDistributed(
        layers.Dense(
            hidden_layer_size,
            activation=activation,
            kernel_regularizer=regularizers.L2(kernel_reg),
            bias_regularizer=regularizers.L2(bias_reg),
            activity_regularizer=regularizers.L2(activity_reg),
        ),
        name="dense_1",
    )(masked_inputs)
    dropout_1 = layers.Dropout(dropout)(dense_1)
    dense_2 = layers.TimeDistributed(
        layers.Dense(
            hidden_layer_size,
            activation=activation,
            kernel_regularizer=regularizers.L2(kernel_reg),
            bias_regularizer=regularizers.L2(bias_reg),
            activity_regularizer=regularizers.L2(activity_reg),
        ),
        name="dense_2",
    )(dropout_1)
    dropout_2 = layers.Dropout(dropout)(dense_2)
    distributed_phi = layers.TimeDistributed(
        layers.Dense(
            observable_num,
            activation=activation,
            kernel_regularizer=regularizers.L2(kernel_reg),
            bias_regularizer=regularizers.L2(bias_reg),
            activity_regularizer=regularizers.L2(activity_reg),
        ),
        name="distributed_phi",
    )(dropout_2)
    observables = ReductionLayer(name="observables")(distributed_phi)
    dropout_obs = layers.Dropout(dropout)(observables)
    dense_3 = layers.Dense(
        hidden_layer_size,
        activation=activation,
        kernel_regularizer=regularizers.L2(kernel_reg),
        bias_regularizer=regularizers.L2(bias_reg),
        activity_regularizer=regularizers.L2(activity_reg),
        name="dense_3",
    )(dropout_obs)
    dropout_3 = layers.Dropout(dropout)(dense_3)
    dense_4 = layers.Dense(
        hidden_layer_size,
        activation=activation,
        kernel_regularizer=regularizers.L2(kernel_reg),
        bias_regularizer=regularizers.L2(bias_reg),
        activity_regularizer=regularizers.L2(activity_reg),
        name="dense_4",
    )(dropout_3)
    dropout_4 = layers.Dropout(dropout)(dense_4)
    dense_5 = layers.Dense(
        hidden_layer_size,
        activation=activation,
        kernel_regularizer=regularizers.L2(kernel_reg),
        bias_regularizer=regularizers.L2(bias_reg),
        activity_regularizer=regularizers.L2(activity_reg),
        name="dense_5",
    )(dropout_4)
    dropout_5 = layers.Dropout(dropout)(dense_5)
    output = layers.Dense(1, activation="sigmoid", name="output")(dropout_5)

    model = tf.keras.models.Model(inputs=inputs, outputs=output)

    return model

In [174]:
clf = get_clf(dropout=0.2)

loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=False)

# class_weights = compute_class_weight(
#     class_weight="balanced", classes=np.unique(flagged), y=flagged
# )
# class_weights /= sum(class_weights)
# loss_fn = tf.keras.losses.BinaryFocalCrossentropy(
#     apply_class_balancing=True, alpha=0.5, gamma=2, from_logits=False
# )

clf.compile(optimizer="adam", loss=loss_fn, metrics=["Accuracy", "AUC", "Precision", "Recall"])

The most useful piece of information for understanding whether there is a c-cbar splitting inside of a jet is whether there is a c-cbar pair inside of the jet, but this information is often not accessible experimentally. As a test, we remove this information (either replacing charms with gluons or light quarks) and retrain the model to see how this impacts the performance

In [175]:
clf.summary()

In [176]:
dot_img_file = "./model_1.png"
tf.keras.utils.plot_model(
    clf,
    to_file=dot_img_file,
    show_shapes=True,
    show_layer_names=True,
    show_layer_activations=True,
    expand_nested=True,
)

You must install pydot (`pip install pydot`) for `plot_model` to work.


In [177]:
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=10,
    restore_best_weights=True,
)

clf.fit(
    x_train_orig,
    y_train_orig,
    epochs=50,
    batch_size=250,
    validation_split=0.2,
    callbacks=[early_stop]
)

Epoch 1/50
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 245ms/step - AUC: 0.5033 - Accuracy: 0.5040 - Precision: 0.5049 - Recall: 0.5074 - loss: 1.5634 - val_AUC: 0.5606 - val_Accuracy: 0.5057 - val_Precision: 0.6159 - val_Recall: 0.0710 - val_loss: 0.7127
Epoch 2/50
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 248ms/step - AUC: 0.5571 - Accuracy: 0.5391 - Precision: 0.5417 - Recall: 0.5630 - loss: 0.6996 - val_AUC: 0.6730 - val_Accuracy: 0.6481 - val_Precision: 0.8956 - val_Recall: 0.3476 - val_loss: 0.6745
Epoch 3/50
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 262ms/step - AUC: 0.6598 - Accuracy: 0.6399 - Precision: 0.7624 - Recall: 0.4105 - loss: 0.6425 - val_AUC: 0.6790 - val_Accuracy: 0.6525 - val_Precision: 0.8942 - val_Recall: 0.3582 - val_loss: 0.6528
Epoch 4/50
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 267ms/step - AUC: 0.6528 - Accuracy: 0.6404 - Precision: 0.7925 - Recall: 0.3826 - lo

<keras.src.callbacks.history.History at 0x2efdecbf0>

In [181]:
clf.evaluate(x_test_orig,y_test_orig)

[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - AUC: 0.6612 - Accuracy: 0.6569 - Precision: 0.8826 - Recall: 0.3494 - loss: 0.6109


[0.6044131517410278,
 0.6714734435081482,
 0.6614008545875549,
 0.891566276550293,
 0.35693612694740295]

In [182]:
preds = clf.predict(x_test_orig)
binary_predictions = [1 if pred>0.5 else 0 for pred in preds]
(binary_predictions!=y_test_orig).sum()

[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step


3558

In [183]:
len(preds)

10508

Now, try the more difficult thing where you "mislabel" the charm quarks as up quarks

In [36]:
del clf
clf = get_clf(dropout=0.2,shape=x_train_light.shape[1:])
clf.compile(optimizer="adam", loss=loss_fn, metrics=["Accuracy", "AUC", "Precision", "Recall"])

clf.fit(
    x_train_light,
    y_train_light,
    epochs=50,
    batch_size=250,
    validation_split=0.2,
    callbacks=[early_stop],
)

Epoch 1/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 76ms/step - AUC: 0.5755 - Accuracy: 0.5555 - Precision: 0.5602 - Recall: 0.5348 - loss: 0.9910 - val_AUC: 0.9531 - val_Accuracy: 0.8774 - val_Precision: 0.9101 - val_Recall: 0.8328 - val_loss: 0.3812
Epoch 2/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 69ms/step - AUC: 0.9358 - Accuracy: 0.8915 - Precision: 0.8625 - Recall: 0.9301 - loss: 0.3592 - val_AUC: 0.9584 - val_Accuracy: 0.9397 - val_Precision: 0.8935 - val_Recall: 0.9960 - val_loss: 0.2616
Epoch 3/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 83ms/step - AUC: 0.9509 - Accuracy: 0.9291 - Precision: 0.8864 - Recall: 0.9849 - loss: 0.2797 - val_AUC: 0.9618 - val_Accuracy: 0.9410 - val_Precision: 0.8928 - val_Recall: 1.0000 - val_loss: 0.2408
Epoch 4/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 77ms/step - AUC: 0.9565 - Accuracy: 0.9337 - Precision: 0.8896 - Recall: 0.9905 - loss: 0.2533 - val

<keras.src.callbacks.history.History at 0x2edb5f800>

In [37]:
clf.evaluate(x_test_light,y_test_light)

[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - AUC: 0.9560 - Accuracy: 0.8786 - Precision: 0.9102 - Recall: 0.8321 - loss: 0.3460


[0.34794682264328003,
 0.9572073221206665,
 0.8745108246803284,
 0.9062857031822205,
 0.8334209322929382]

In [38]:
preds = clf.predict(x_test_light)
binary_predictions = [1 if pred>0.5 else 0 for pred in preds]
(binary_predictions!=y_test_light).sum()

[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


481

In [39]:
del clf
clf = get_clf(dropout=0.2,shape=x_train_gluon.shape[1:])
clf.compile(optimizer="adam", loss=loss_fn, metrics=["Accuracy", "AUC", "Precision", "Recall"])

clf.fit(
    x_train_gluon,
    y_train_gluon,
    epochs=50,
    batch_size=250,
    validation_split=0.2,
    callbacks=[early_stop],
)

Epoch 1/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 74ms/step - AUC: 0.5322 - Accuracy: 0.5288 - Precision: 0.5305 - Recall: 0.6157 - loss: 0.8156 - val_AUC: 0.6992 - val_Accuracy: 0.5549 - val_Precision: 0.5278 - val_Recall: 0.8932 - val_loss: 0.6815
Epoch 2/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 65ms/step - AUC: 0.6733 - Accuracy: 0.6269 - Precision: 0.6235 - Recall: 0.6649 - loss: 0.6638 - val_AUC: 0.7592 - val_Accuracy: 0.6720 - val_Precision: 0.6917 - val_Recall: 0.5999 - val_loss: 0.5887
Epoch 3/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 79ms/step - AUC: 0.7340 - Accuracy: 0.6654 - Precision: 0.6745 - Recall: 0.6701 - loss: 0.6175 - val_AUC: 0.7794 - val_Accuracy: 0.6896 - val_Precision: 0.6740 - val_Recall: 0.7133 - val_loss: 0.5657
Epoch 4/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 80ms/step - AUC: 0.7536 - Accuracy: 0.6720 - Precision: 0.6545 - Recall: 0.7350 - loss: 0.5976 - val

<keras.src.callbacks.history.History at 0x2f69a4500>

### Now take a look at boosted decision trees

For now, try the boosted decision tree where the features are the pt of all particles

In [28]:
train_shape = x_train_orig.shape
test_shape = x_test_orig.shape
x_train_0_bdt = np.reshape( x_train_orig,(train_shape[0],train_shape[1]*train_shape[4]) )
x_test_0_bdt = np.reshape( x_test_orig,(test_shape[0],test_shape[1]*test_shape[4]) )

In [31]:
from xgboost import XGBClassifier

# create model instance
bst = XGBClassifier(objective='binary:logistic', learning_rate = 0.1,
              max_depth = 15, n_estimators = 50)
# fit model
bst.fit(x_train_0_bdt, y_train_orig)
# make predictions
preds = bst.predict(x_test_0_bdt)

In [33]:
((y_test_orig==1) & (preds==1)).sum()

1774

In [34]:
((y_test_orig==0) & (preds==0)).sum()

1725

In [37]:
(y_test_orig==preds).sum() / len(y_test_orig)

0.9895361990950227