In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn import Linear
from torch import Tensor
from torch_geometric.nn import GCNConv
from torch_geometric.data   import Data
from torch_geometric.loader import DataLoader
from torch_geometric.data import InMemoryDataset

In [2]:
# signal data
df = pd.read_csv('Dijet_bb_pt10_15_dw.csv')

# background data
df2 = pd.read_csv('Dijet_qq_pt10_15_dw.csv')

In [3]:
# view column names
list(df.columns)

['Unnamed: 0',
 'Jet0_ENDVERTEX_X',
 'Jet0_ENDVERTEX_Y',
 'Jet0_ENDVERTEX_Z',
 'Jet0_ENDVERTEX_XERR',
 'Jet0_ENDVERTEX_YERR',
 'Jet0_ENDVERTEX_ZERR',
 'Jet0_ENDVERTEX_CHI2',
 'Jet0_ENDVERTEX_NDOF',
 'Jet0_OWNPV_X',
 'Jet0_OWNPV_Y',
 'Jet0_OWNPV_Z',
 'Jet0_OWNPV_XERR',
 'Jet0_OWNPV_YERR',
 'Jet0_OWNPV_ZERR',
 'Jet0_OWNPV_CHI2',
 'Jet0_OWNPV_NDOF',
 'Jet0_IP_OWNPV',
 'Jet0_IPCHI2_OWNPV',
 'Jet0_FD_OWNPV',
 'Jet0_FDCHI2_OWNPV',
 'Jet0_DIRA_OWNPV',
 'Jet0_ORIVX_X',
 'Jet0_ORIVX_Y',
 'Jet0_ORIVX_Z',
 'Jet0_ORIVX_XERR',
 'Jet0_ORIVX_YERR',
 'Jet0_ORIVX_ZERR',
 'Jet0_ORIVX_CHI2',
 'Jet0_ORIVX_NDOF',
 'Jet0_FD_ORIVX',
 'Jet0_FDCHI2_ORIVX',
 'Jet0_DIRA_ORIVX',
 'Jet0_P',
 'Jet0_PT',
 'Jet0_PE',
 'Jet0_PX',
 'Jet0_PY',
 'Jet0_PZ',
 'Jet0_MM',
 'Jet0_MMERR',
 'Jet0_M',
 'Jet0_ID',
 'Jet0_Eta',
 'Jet0_Phi',
 'Jet0_MLoKi',
 'Jet0_MMLoKi',
 'Jet0_NTrk',
 'Jet0_N90',
 'Jet0_MTF',
 'Jet0_NSatCells',
 'Jet0_NHasPVInfo',
 'Jet0_JEC_Cor',
 'Jet0_JEC_PV',
 'Jet0_JEC_Error',
 'Jet0_w',
 'Jet0_CPF',
 'Jet0_

In [4]:
# separate signal and background of Jet 0 and 1
jet00 = df[df.columns[df.columns.str.contains("Jet0")]] # Jet0 sig
jet01 = df2[df2.columns[df2.columns.str.contains("Jet0")]] # Jet0 back

jet10 = df[df.columns[df.columns.str.contains("Jet1")]] # Jet 1 sig
jet11 = df2[df2.columns[df2.columns.str.contains("Jet1")]] # Jet 1 back

In [5]:
train_data = pd.concat([jet00, jet01]) # combine Jet 0 sig/ back data
test_data = pd.concat([jet10, jet11]) # combine Jet 1 sig/ back data

In [6]:
# define nodes & convert to numpy
x = [len(train_data), 0] # x = [nodes, features]
edge_index = [2, len(train_data.columns)] # edge_index = [2, edges] 

In [10]:
loader = DataLoader(train_data, batch_size=32, shuffle=True) # must load to iterate later

In [11]:
# GNN Model with 2 conv layers, relu activation fxns, and linear layer
# input_size = number of nodes
# ReLu (rectified linear layer) activation fxn

class GNN_Model(nn.Module):
    def __init__(self, input_size, hidden_layers, conv):
        super().__init__()
        self.conv1 = conv(input_size, hidden_layers)
        self.conv2 = conv(input_size, hidden_layers)
        self.lin = Linear(hidden_layers, input_size)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        # linear layers maps input nodes to output
        x = self.lin(x)
    
        return F.relu(self.conv2(x))

input_size = 32
hidden_layers = 64
model = GNN_Model(input_size, hidden_layers, GCNConv)
print(model)

GNN_Model(
  (conv1): GCNConv(32, 64)
  (conv2): GCNConv(32, 64)
  (lin): Linear(in_features=64, out_features=32, bias=True)
)


In [12]:
optimizer = optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()
epochs = 1

def train():    
    model.train()

    for data in range (len(train_data)): # iterate in batches across dataset
        total_loss = 0
        optimizer.zero_grad()
        output = model(data.x, data.edge_index)
        loss = criterion(output, data.y) # compute loss
        loss.backward() # derive the gradients
        optimizer.step() # udpate parameters from gradients
        total_loss += loss.item()
        
        avg_loss = total_loss / len(loader.data)
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}')

In [13]:
for epoch in range(epochs):
    train()

AttributeError: 'int' object has no attribute 'x'