In [114]:
from torch_geometric.utils import to_networkx, erdos_renyi_graph, barabasi_albert_graph
from higgs_dataloader import HiggsDatasetPyG
import torch

from utils import create_graph


def generate_higgs_exp_graph_edge():
    # Generate a fully connect graph between edge 0 and 5
    fully_connect_physical = erdos_renyi_graph(5, 1.0)
    fully_connect_jet_1 = erdos_renyi_graph(4, 1.0) + 5
    fully_connect_jet_2 = erdos_renyi_graph(4, 1.0) + 9
    fully_connect_jet_3 = erdos_renyi_graph(4, 1.0) + 13
    fully_connect_jet_4 = erdos_renyi_graph(4, 1.0) + 17
    fully_connect_manual_feat = erdos_renyi_graph(7, 1.0) + 21
    connection = torch.tensor([[5, 9, 13, 17, 21], [0, 0, 0, 0, 0]])
    all_edge = torch.cat(
        [fully_connect_physical, fully_connect_jet_1, fully_connect_jet_2, fully_connect_jet_3, fully_connect_jet_4,
         fully_connect_manual_feat, connection], dim=1)
    return all_edge


def generate_higgs_exp_graph_edge_v2():
    # Generate a fully connect graph between edge 0 and 5
    fully_connect_physical = barabasi_albert_graph(5, 4)
    fully_connect_jet_1 = barabasi_albert_graph(4, 3) + 5
    fully_connect_jet_2 = barabasi_albert_graph(4, 3) + 9
    fully_connect_jet_3 = barabasi_albert_graph(4, 3) + 13
    fully_connect_jet_4 = barabasi_albert_graph(4, 3) + 17
    fully_connect_manual_feat = barabasi_albert_graph(7, 6) + 21
    # connectionn = torch.tensor([[5, 9, 13, 17, 21], [0, 0, 0, 0, 0]])
    all_edge = torch.cat(
        [fully_connect_physical, fully_connect_jet_1, fully_connect_jet_2, fully_connect_jet_3, fully_connect_jet_4,
         fully_connect_manual_feat], dim=1)
    return all_edge

def generate_higgs_exp_graph_edge_v3():
    # Generate a fully connect graph between edge 0 and 5
    fully_connect_physical = torch.tensor([[0, 1, 2, 3, 4], [0, 0, 0, 0, 0]])
    fully_connect_jet_1 = torch.tensor([[0, 1, 2, 3], [0, 0, 0, 0]]) + 5
    fully_connect_jet_2 = torch.tensor([[0, 1, 2, 3], [0, 0, 0, 0]]) + 9
    fully_connect_jet_3 = torch.tensor([[0, 1, 2, 3], [0, 0, 0, 0]]) + 13
    fully_connect_jet_4 = torch.tensor([[0, 1, 2, 3], [0, 0, 0, 0]]) + 17
    fully_connect_manual_feat = torch.tensor([[0, 1, 2, 3, 4, 5, 6], [0, 0, 0, 0, 0, 0, 0]]) + 21
    connection = torch.tensor([[5, 9, 13, 17, 21], [0, 0, 0, 0, 0]])
    all_edge = torch.cat(
        [fully_connect_physical, fully_connect_jet_1, fully_connect_jet_2, fully_connect_jet_3, fully_connect_jet_4,
         fully_connect_manual_feat, connection], dim=1)
    return all_edge


csv_file = 'data/HIGGS.csv.gz'
edge_index_hg = generate_higgs_exp_graph_edge_v3()
higgs_dataset_train = HiggsDatasetPyG(csv_file=csv_file, edge_index=edge_index_hg, split='train', drop_feats=False)

first_item = higgs_dataset_train[0]
fig = create_graph(first_item)
fig.show()


To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).



In [115]:
print(first_item.x.shape)

torch.Size([28, 1])


In [116]:
import pandas as pd
higgs_data = pd.read_csv('./data/HIGGS.csv.gz', compression='gzip', header=None, nrows=80000)
higgs_data.columns = ['class_label',
                      'lepton_pT', 'lepton_eta', 'lepton_phi',
                      'missing_energy_magnitude', 'missing_energy_phi',
                      'jet_1_pt', 'jet_1_eta', 'jet_1_phi', 'jet_1_b-tag',
                      'jet_2_pt', 'jet_2_eta', 'jet_2_phi', 'jet_2_b-tag',
                      'jet_3_pt', 'jet_3_eta', 'jet_3_phi', 'jet_3_b-tag',
                      'jet_4_pt', 'jet_4_eta', 'jet_4_phi', 'jet_4_b-tag',
                      'm_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb']

In [117]:
test_row = higgs_data.head(5)
test_row

Unnamed: 0,class_label,lepton_pT,lepton_eta,lepton_phi,missing_energy_magnitude,missing_energy_phi,jet_1_pt,jet_1_eta,jet_1_phi,jet_1_b-tag,...,jet_4_eta,jet_4_phi,jet_4_b-tag,m_jj,m_jjj,m_lv,m_jlv,m_bb,m_wbb,m_wwbb
0,1.0,0.869293,-0.635082,0.22569,0.32747,-0.689993,0.754202,-0.248573,-1.092064,0.0,...,-0.010455,-0.045767,3.101961,1.35376,0.979563,0.978076,0.920005,0.721657,0.988751,0.876678
1,1.0,0.907542,0.329147,0.359412,1.49797,-0.31301,1.095531,-0.557525,-1.58823,2.173076,...,-1.13893,-0.000819,0.0,0.30222,0.833048,0.9857,0.978098,0.779732,0.992356,0.798343
2,1.0,0.798835,1.470639,-1.635975,0.453773,0.425629,1.104875,1.282322,1.381664,0.0,...,1.128848,0.900461,0.0,0.909753,1.10833,0.985692,0.951331,0.803252,0.865924,0.780118
3,0.0,1.344385,-0.876626,0.935913,1.99205,0.882454,1.786066,-1.646778,-0.942383,0.0,...,-0.678379,-1.360356,0.0,0.946652,1.028704,0.998656,0.728281,0.8692,1.026736,0.957904
4,1.0,1.105009,0.321356,1.522401,0.882808,-1.205349,0.681466,-1.070464,-0.921871,0.0,...,-0.373566,0.113041,0.0,0.755856,1.361057,0.98661,0.838085,1.133295,0.872245,0.808487


In [118]:
import math
import numpy as np

def generate_3d_jet(row):
    jet_1 = process_single_jet(row['jet_1_pt'], row['jet_1_eta'], row['jet_1_phi'])
    jet_1_feature = [row['jet_1_b-tag'], 0, 0]
    jet_2 = process_single_jet(row['jet_2_pt'], row['jet_2_eta'], row['jet_2_phi'])
    jet_2_feature = [row['jet_2_b-tag'], 0, 0]
    jet_3 = process_single_jet(row['jet_3_pt'], row['jet_3_eta'], row['jet_3_phi'])
    jet_3_feature = [row['jet_3_b-tag'], 0, 0]
    jet_4 = process_single_jet(row['jet_4_pt'], row['jet_4_eta'], row['jet_4_phi'])
    jet_4_feature = [row['jet_4_b-tag'], 0, 0]
    lepton_jet = process_single_jet(row['lepton_pT'], row['lepton_eta'], row['lepton_phi'])
    lepton_jet_feature = [0, 1, 0]
    
    # jets = [jet_1, jet_2, jet_3, jet_4, lepton_jet]
    # mean_jet = np.mean(jets, axis=0)
    # mean_jet_mag = np.linalg.norm(mean_jet)
    # mean_jet_unit_vector = mean_jet / mean_jet_mag
    missing_energy_jet = process_single_jet(row['missing_energy_magnitude'], 0, row['missing_energy_phi'])
    missing_energy_jet_feature = [0, 0, 1]
    
    return [jet_1, jet_2, jet_3, jet_4, lepton_jet, missing_energy_jet], [jet_1_feature, jet_2_feature, jet_3_feature, jet_4_feature, lepton_jet_feature, missing_energy_jet_feature]

def process_single_jet(jet_pt, jet_eta, jet_phi):
    jet_theta = calculate_theta(jet_eta)
    x = jet_pt * math.sin(jet_theta) * math.cos(jet_phi)
    y = jet_pt * math.sin(jet_theta) * math.sin(jet_phi)
    z = jet_pt * math.cos(jet_theta)
    return [x, y, z]

def calculate_theta(n):
    theta = 2 * math.atan(math.exp(-n))
    return theta

import plotly.graph_objects as go

def plot_3d_vectors(vectors, label=None):
    fig = go.Figure()
    for vector in vectors:
        x, y, z = vector
        fig.add_trace(go.Scatter3d(x=[0, x], y=[0, y], z=[0, z],
                                   mode='lines',
                                   line=dict(width=2)))
        # add label as title
        fig.update_layout(title=label)
    fig.show()

In [119]:
jets, feat = generate_3d_jet(test_row.iloc[0])
plot_3d_vectors(jets, test_row.iloc[0]['class_label'])
jets, feat = generate_3d_jet(test_row.iloc[1])
plot_3d_vectors(jets, test_row.iloc[1]['class_label'])
jets, feat = generate_3d_jet(test_row.iloc[2])
plot_3d_vectors(jets, test_row.iloc[2]['class_label'])
jets, feat = generate_3d_jet(test_row.iloc[3])
plot_3d_vectors(jets, test_row.iloc[3]['class_label'])