In [1]:
import os
import sys
os.getcwd()

'c:\\Users\\ebaca\\Desktop\\Phys 417\\Final Project - HEP Tagging'

In [2]:
# importing libraries & sending tensors to GPU

# neural network packages
import torch
import torch.nn as nn
from torch.nn import Transformer
from torch import Tensor
from torch.utils.data import DataLoader
sys.path.append('..\\..\\PHYS417_Project')
from nnrunner import NetRunner

# data packages
import numpy as np
import math
import pandas as pd
import sklearn.preprocessing as prep
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import fndict as fd

# visual packages
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from tqdm import tqdm
import warnings

# Create a torch.device object to tell pytorch where to store your tensors: cpu or gpu
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

<b>About the Dataset</b>

<u>Tag</u>: the type of the original particles involved in the collision

<u>Jet</u>: collection of particles that hadronized (decayed) together into a stable particle

Per jet, variable number of constituents (rows) with 5 features (columns):
1. $p_T$: transverse momentum as a fraction of the jet total
2. $\eta$: angular coordinate relative to jet center
3. $\phi$: angular coordinate relative to jet center
4. $E$: energy from constituent
5. $\Delta R = \sqrt{\eta^2 + \phi^2}$

#### Preparing the Data

In [3]:
# ---- STEP 1: establishing training features (x) and training targets (y) data -----------------------
print("--Establishing Training Features/Targets--")

# while training, model enters in the training features (x) and evaluates against the training targets (y)
trfeat = np.load('..\\..\\PHYS417_Project\\data_1_tiled.npz')['x']
# the testing data is used to evaluate the model's performance after training for predictions on unseen data
trtarget = np.load('..\\..\\PHYS417_Project\\data_1_tiled.npz')['y']

print("trfeat:", trfeat.shape, "\ntrtarget:", trtarget.shape)



# ---- STEP 2: normalizing/shaping data  --------------------------------------------------------------
print("\n --Normalizing/Shaping Data--")

# Reducing to 2D for scaling, then reshaping back to 3D afterwards
scaler = prep.StandardScaler()

# traing features
trfeat_2d = trfeat.reshape((trfeat.shape[0], -1))
trfeat_2d = scaler.fit_transform(trfeat_2d)
trfeat = trfeat_2d.reshape(trfeat.shape)
print("trfeat reduced, reshaped:", trfeat_2d.shape, trfeat.shape)

# training targets
trtarget_2d = trtarget.reshape((trtarget.shape[0], -1))
trtarget_2d = scaler.fit_transform(trtarget_2d)
trtarget = trtarget_2d.reshape(trtarget.shape)
print("trtarget reduced, reshaped:", trtarget_2d.shape, trtarget.shape)



# ---- STEP 3: splitting data into training, validation, and testing sets -----------------------------
print("\n --Splitting Data and Sending to GPU--")
# the validation data is used to visualize/evaluate the model's performance throughout training to help with tuning hyperparameters

# shuffling for random selection
trfeat, trtarget = shuffle(trfeat, trtarget, random_state=0) 


# splitting data into training, testing, and validation sets
trfeat = trfeat[1000:] 
print("trfeat:", trfeat.shape)

trtarget = trtarget[1000:]
print("trtarget:", trtarget.shape)

vafeat = trfeat[:1000] 
print("vafeat:", vafeat.shape)

vatarget = trtarget[:1000] 
print("vatarget:", vatarget.shape)


# sending data to GPU
with warnings.catch_warnings(): # booo warnings
    warnings.simplefilter("ignore")

    trfeat = torch.tensor(torch.from_numpy(trfeat), dtype=torch.float32).to(DEVICE)
    trtarget = torch.tensor(torch.from_numpy(trtarget), dtype=torch.float32).to(DEVICE)
    vafeat = torch.tensor(torch.from_numpy(vafeat), dtype=torch.float32).to(DEVICE)
    vatarget = torch.tensor(torch.from_numpy(vatarget), dtype=torch.float32).to(DEVICE)

--Establishing Training Features/Targets--
trfeat: (10000, 188, 5) 
trtarget: (10000, 5)

 --Normalizing/Shaping Data--
trfeat reduced, reshaped: (10000, 940) (10000, 188, 5)
trtarget reduced, reshaped: (10000, 5) (10000, 5)

 --Splitting Data and Sending to GPU--
trfeat: (9000, 188, 5)
trtarget: (9000, 5)
vafeat: (1000, 188, 5)
vatarget: (1000, 5)


In [4]:
# object with data for later
from torch.utils.data import Dataset

class MyDataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]

# Create the dataset
trainers = MyDataset(trfeat, trtarget)

#### Build Model & Initialize

In [5]:
# ---- BUILDING THE MODEL -----------------------------
class ParticleClassifier(nn.Module):
    def __init__(self, num_layers, dim_model, num_heads, dim_feedforward, dropout=0.1):
        super(ParticleClassifier, self).__init__()

        self.transformer = nn.Transformer(d_model = dim_model, 
                                          nhead = num_heads, 
                                          num_encoder_layers = num_layers, 
                                          num_decoder_layers = num_layers, 
                                          dim_feedforward = dim_feedforward, 
                                          dropout = dropout)
        self.linear = nn.Linear(dim_model, 5)  # 5 for the number of final state particles

    def forward(self, src, src_mask=None, src_key_padding_mask=None):
        src = src.permute(1, 0, 2)  # Transformer expects src to be of shape (sequence length, batch size, features)
        out = self.transformer(src=src, src_mask=src_mask, src_key_padding_mask=src_key_padding_mask)
        out = out.permute(1, 0, 2)  # Convert back to (batch size, sequence length, features)
        out = self.linear(out[:, -1])  # Use the last output only

        return out


# ---- INITIALIZING MODEL -----------------------------
classifier = ParticleClassifier(
    num_layers = 2, 
    dim_model = trfeat.shape[1], # embedded dimension must be divisible by num_heads
    num_heads = fd.highest_divisor(trfeat.shape[1]), 
    dim_feedforward = 512 
    ).to(DEVICE)



#### Train Model

<b>Hyperparameters</b>

In [6]:
loss_fn = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(classifier.parameters(), lr=0.0001, betas=(0.9, 0.98), eps=1e-9)
# betas are hyperparameters that control the exponential moving averages
# eps is a small constant added to improve numerical stability by preventing division by zero

epochs = 20
batch_size = 32

<b>Begin Training</b>

In [7]:
trlosses = []
valosses = []

trDataLoader = DataLoader(trainers, batch_size=batch_size)

from timeit import default_timer as timer
train = fd.trainer

for epoch in range(1, epochs+1):
    
    classifier.train()
    total_loss = 0

    start_time = timer()
    for batch in trDataLoader:
        
        inputs, targets = batch
        optimizer.zero_grad()

        # forward pass
        output = classifier(inputs)

        loss = loss_fn(output, targets)

        # backward pass and optimization
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    #---------------------------#
    end_time = timer()
    
    avg_loss = total_loss / len(trDataLoader)
    trlosses.extend(avg_loss)


    val_loss = train.evaluate(classifier)
    valosses.extend(val_loss)

    print(f"Epoch: {epoch}, Loss: {avg_loss}, Time: {(end_time - start_time):.3f}s")

In [12]:
# runner.simpleload(np.array(trfeat.cpu()), np.array(trtarget.cpu()))
# runner = NetRunner(withCuda=torch.cuda.is_available())
# runner.fit(classifier, lr=0.0001, epochs=epochs, optimizer='adam', lossFunc='cross_entropy')
# runner.train(batch=False)

<b>Visualizing</b>

In [None]:
# Plot the loss
plt.figure(figsize = (12, 7))

plt.subplot(2, 1, 1)
plt.plot(runner.losses, linewidth = 3)
plt.ylabel("training loss")
plt.xlabel("iterations")
sns.despine()

plt.subplot(2, 1, 2)
plt.plot(runner.accuracies, linewidth = 3, color = 'gold')
plt.ylabel("validation accuracy")
sns.despine()

#evaluate model performance against loaded test data
tspred, testacc, idxs = runner.eval()
print(testacc)