In [1]:
import torch
from torch_geometric.data import Data
import uproot
import awkward as ak
import numpy as np
import matplotlib.pyplot as plt
import concurrent.futures
import torch
from torch.utils.data import Dataset, ConcatDataset, random_split
from torch_geometric.loader import DataLoader

In [2]:
fileset = {}

# sig_dir = '/ceph/cms/store/user/aaportel/B-Parking/rechits_v2/BToKPhi_MuonLLPDecayGenFilter_PhiToPi0Pi0_mPhi0p3_ctau300/'



fileset['sample'] = [sig_dir + f'BToKPhi_MuonLLPDecayGenFilter_PhiToPi0Pi0_mPhi0p3_ctau300_{str(i).zfill(7)}_graphs.pt' for i in range(328)]
# fileset['sample'] = [sig_dir + f'BToKPhi_MuonLLPDecayGenFilter_PhiToPi0Pi0_mPhi0p3_ctau300_{str(i).zfill(7)}_graphs.pt' for i in range(2)]

# bkg_dir = '/ceph/cms/store/user/aaportel/B-Parking/rechits_v2/ParkingBPH1_2018A/'
# fileset['background'] = [bkg_dir + f'ParkingBPH1_2018A_{str(i).zfill(7)}.root' for i in range(380)]

In [3]:
batch_size = 64
shuffle_dataset = True
random_seed = 42
split_ratio = 0.8  # 80% of the data for training, 20% for testing

# Set random seed for reproducibility
torch.manual_seed(random_seed)

datasets = [torch.load(fp) for fp in fileset['sample']]
dataset = ConcatDataset(datasets)

dataset_size = len(dataset)
train_size = int(split_ratio * dataset_size)
test_size = dataset_size - train_size

train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle_dataset)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=shuffle_dataset)


In [4]:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool


class GCN(torch.nn.Module):
    def __init__(self, num_node_features, hidden_channels, num_classes):
        super(GCN, self).__init__()
        self.conv = GCNConv(num_node_features, hidden_channels)
        self.lin = Linear(hidden_channels, num_classes)

    def forward(self, x, edge_index, batch):
        # 1. Obtain node embeddings 
        x = self.conv(x, edge_index)

        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x

model = GCN(num_node_features=17, hidden_channels=64, num_classes=2)
print(model)

GCN(
  (conv): GCNConv(17, 64)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)


In [5]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()

    for data in train_loader:  # Iterate in batches over the training dataset.
         out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
         loss = criterion(out, data.y)  # Compute the loss.
         loss.backward()  # Derive gradients.
         optimizer.step()  # Update parameters based on gradients.
         optimizer.zero_grad()  # Clear gradients.

def test(loader):
     model.eval()

     correct = 0
     for data in loader:  # Iterate in batches over the training/test dataset.
         out = model(data.x, data.edge_index, data.batch)  
         pred = out.argmax(dim=1)  # Use the class with highest probability.
         correct += int((pred == data.y).sum())  # Check against ground-truth labels.
     return correct / len(loader.dataset)  # Derive ratio of correct predictions.


for epoch in range(1, 171):
    train()
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

Epoch: 001, Train Acc: 0.4874, Test Acc: 0.4919
Epoch: 002, Train Acc: 0.5126, Test Acc: 0.5081
Epoch: 003, Train Acc: 0.4874, Test Acc: 0.4919
Epoch: 004, Train Acc: 0.5126, Test Acc: 0.5081
Epoch: 005, Train Acc: 0.5126, Test Acc: 0.5081
Epoch: 006, Train Acc: 0.5126, Test Acc: 0.5081
Epoch: 007, Train Acc: 0.5126, Test Acc: 0.5081
Epoch: 008, Train Acc: 0.4874, Test Acc: 0.4919
Epoch: 009, Train Acc: 0.4874, Test Acc: 0.4919
Epoch: 010, Train Acc: 0.5126, Test Acc: 0.5081


KeyboardInterrupt: 