In [None]:
import os
import sys
os.getcwd()

In [None]:
# importing libraries & making torch.device object for GPU

# neural network packages
import torch
import torch.nn as nn
from torch.nn import Transformer
from torch import Tensor
from torch.utils.data import DataLoader
import torch.optim as optim
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

# data packages
import numpy as np
import math
import pandas as pd
import sklearn.preprocessing as prep
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import fndict as fd

# visual packages
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import tqdm
import warnings

# Create a torch.device object to tell pytorch where to store your tensors: cpu or gpu
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# ---- STEP 1: establishing training features (x) and training targets (y) data -----------------------
print("--Establishing Training Features/Targets--")
# while training, model enters in the training features (x) and evaluates against the training targets (y)

''' michael's data extraction: 10,000 final state particles per file and scaled with standard scaler '''

# event data/properties represented by a N x 5 matrix (N = variable number of constituents)
x1 = np.load('..\\..\\PHYS417_Project\\data_1.npz')['x'] 
x2 = np.load('..\\..\\PHYS417_Project\\data_2.npz')['x']
x = np.concatenate((x1, x2), axis=0)

# jet type identifier of a single 5 x 1 binary matrix (0 = no, 1 = yes)
y1 = np.load('..\\..\\PHYS417_Project\\data_1.npz')['y']
y2 = np.load('..\\..\\PHYS417_Project\\data_2.npz')['y']
y = np.concatenate((y1, y2), axis=0)

print("features (x):", x.shape, "\ntargets (y):", y.shape)



# ---- STEP 2: splitting data into training, validation, and testing sets -----------------------------
print("\n --Splitting Data--")

# shuffling for random selection, setting seed to 0 for reproduceability below
x, y = shuffle(x, y, random_state=0) 

# splitting data into training and testing sets
# the testing data is used to evaluate the model's performance after training for predictions on unseen data
# test_size = 0.2 means 20% of the data is for testing and 80% for training
trfeat, tefeat, trtarget, tetarget = train_test_split(x, y, test_size=0.2, random_state=0)

# splitting data again to get validation set
# the validation data is used to visualize/evaluate performance during training to help with tuning 
# 0.25 x 0.8 = 0.2
trfeat, vafeat, trtarget, vatarget = train_test_split(trfeat, trtarget, test_size=0.25, random_state=0) 

print("Training set:",   trfeat.shape, trtarget.shape)
print("Validation set:", vafeat.shape, vatarget.shape)
print("Testing set:",    tefeat.shape, tetarget.shape)


''' DATA PRE-SCALED, NO NEED TO NORMALIZE '''



# ---- STEP 3: sending to GPU  --------------------------------------------------------------
print("\n --Sending to GPU--")

with warnings.catch_warnings(): # booo warnings
    warnings.simplefilter("ignore")

    trfeat = torch.tensor(torch.from_numpy(trfeat), dtype=torch.float32).to(DEVICE)
    trtarget = torch.tensor(torch.from_numpy(trtarget), dtype=torch.float32).to(DEVICE)

    vafeat = torch.tensor(torch.from_numpy(vafeat), dtype=torch.float32).to(DEVICE)
    vatarget = torch.tensor(torch.from_numpy(vatarget), dtype=torch.float32).to(DEVICE)
    
    tefeat = torch.tensor(torch.from_numpy(tefeat), dtype=torch.float32).to(DEVICE)
    tetarget = torch.tensor(torch.from_numpy(tetarget), dtype=torch.float32).to(DEVICE)

In [None]:
# DataLoader collate function for variable-length sequences
def collate_fn(batch):
    batch.sort(key=lambda x: x[0].shape[0], reverse=True)
    sequences, labels = zip(*batch)
    lengths = [seq.shape[0] for seq in sequences]
    padded_sequences = nn.utils.rnn.pad_sequence(sequences, batch_first=True)
    labels = torch.stack(labels)
    return padded_sequences, labels, lengths

train_data = list(zip(trfeat, trtarget))
val_data = list(zip(vafeat, vatarget))
test_data = list(zip(tefeat, tetarget))

train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, collate_fn=collate_fn)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=32, collate_fn=collate_fn)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=32, collate_fn=collate_fn)

# Define the model
class ParticleClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(ParticleClassifier, self).__init__()
        self.cnn = nn.Conv1d(in_channels=input_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
        self.lstm = nn.LSTM(hidden_dim, hidden_dim, batch_first=True)
        self.attention = nn.Linear(hidden_dim, 1)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x, lengths):
        x = x.permute(0, 2, 1)  # Convert to (batch_size, input_dim, sequence_length) for CNN
        x = torch.relu(self.cnn(x))
        x = x.permute(0, 2, 1)  # Convert back to (batch_size, sequence_length, hidden_dim) for LSTM
        
        packed_input = pack_padded_sequence(x, lengths, batch_first=True, enforce_sorted=False)
        packed_output, (h_n, c_n) = self.lstm(packed_input)
        output, _ = pad_packed_sequence(packed_output, batch_first=True)
        
        # Attention mechanism
        attn_weights = torch.tanh(self.attention(output))
        attn_weights = torch.softmax(attn_weights, dim=1)
        attn_output = torch.sum(attn_weights * output, dim=1)
        
        out = self.fc(attn_output)
        return out

# Initialize the model, loss function, and optimizer
model = ParticleClassifier(input_dim=5, hidden_dim=64, output_dim=5).to(DEVICE)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training the model
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=25):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for sequences, labels, lengths in train_loader:
            optimizer.zero_grad()
            outputs = model(sequences.float(), lengths)
            labels = labels.squeeze().float()
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * sequences.size(0)
        epoch_loss = running_loss / len(train_loader.dataset)
        
        # Validation loss
        val_loss = 0.0
        model.eval()
        with torch.no_grad():
            for sequences, labels, lengths in val_loader:
                outputs = model(sequences.float(), lengths)
                labels = labels.squeeze().float()
                loss = criterion(outputs, labels)
                val_loss += loss.item() * sequences.size(0)
        val_loss /= len(val_loader.dataset)
        
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Validation Loss: {val_loss:.4f}")

# Evaluate the model
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for sequences, labels, lengths in test_loader:
            outputs = model(sequences.float(), lengths)
            predicted = torch.argmax(outputs, dim=1)
            labels = torch.argmax(labels, dim=1).squeeze()
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = correct / total
    print(f"Accuracy: {accuracy:.4f}")

# Train and evaluate
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=25)
evaluate_model(model, test_loader)

In [None]:
epochs = 70
batch_size = 15
learning_rate = 0.01 - 9e-6
# learning_rate = 0.002
betas = (0.9, 0.98)
eps = 1e-8

In [None]:
from timeit import default_timer as timer

# trfeat = trfeat.float()
trtarget = trtarget.long() # convert to 64-bit integer for CrossEntropyLoss

# vafeat = vafeat.float()
vatarget = vatarget.long()

# tefeat = tefeat.float()
# tetarget = tetarget.long()

# trmask = trmask[:batch_size].transpose(0, 1)
# vamask = vamask[:batch_size].transpose(0, 1)

print(trmask.shape, vamask.shape)

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(classifier.parameters(), lr=learning_rate, betas=betas, eps=eps)

# Training Loop ---------------------------------------------------------------------------------------

trlosses = []
valosses = []

print(f'Learning Rate: {learning_rate} \nBatch Size: {batch_size} \nEpochs: {epochs} \n')
with warnings.catch_warnings(): # booo warnings
    warnings.simplefilter("ignore")
    
    start_time = timer()
    for epoch in tqdm.trange(epochs):
        
        optimizer.zero_grad()
        
        train_outputs = classifier(trfeat)
        
        loss = loss_fn(train_outputs, trtarget)
        
        trlosses.append(loss.item())
        
        loss.backward()
        
        optimizer.step()
        
        # Compute Validation Accuracy ----------------------------------------------------------------------
        
        with torch.no_grad(): # Telling PyTorch we aren't passing inputs to network for training purpose
            
            validation_outputs = classifier(vafeat)
            
            correct = (torch.argmax(validation_outputs, dim=1) == 
                    vatarget).type(torch.FloatTensor)
            
            valosses.append(correct.mean())
    end_time = timer()
    time = end_time - start_time
    print(f"Total Train Time: {'{:.0f}m {:.1f}s'.format(*divmod(time, 60))}")

In [None]:
test_outputs = classifier(tefeat)
_ignore_, predicted = torch.max(test_outputs, 1)
correct = (predicted == tetarget).float()
accuracy = correct.mean().item()
# print(f'Test Accuracy: {accuracy*100:.3f}%')

# Plot the loss
plt.figure(figsize = (12, 7))

plt.subplot(2, 1, 1)
plt.plot(trlosses, linewidth = 3)
# plt.plot(runner.losses, linewidth = 3)
plt.ylabel("Losses in Training")
plt.annotate(f'Learning Rate: {learning_rate} \nBatch Size: {batch_size} \nLowest Loss: {min(trlosses):.3f}', 
             xy=(0.95, 0.85), xycoords='axes fraction', va='top', ha='right')
sns.despine()

plt.subplot(2, 1, 2)
plt.plot(valosses, linewidth = 3, color = 'gold')
# plt.plot(runner.accuracies, linewidth = 3, color = 'gold')
plt.ylabel("Training Accuracy (Validation)")
plt.annotate(f'Accuracy: {accuracy*100:.3f}%', xy=(0.95, 0.20), xycoords='axes fraction', va='top', ha='right')
sns.despine()