In [1]:
import os
import shutil
import sys
import os.path as osp
sys.path
sys.path.append('../../L1DeepMETv2/')
from graphmetnetwork import GraphMetNetwork

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch_geometric.utils import to_undirected
from torch_cluster import radius_graph, knn_graph
from torch_geometric.datasets import MNISTSuperpixels
import torch_geometric.transforms as T
from torch_geometric.data import DataLoader
from tqdm import tqdm
import model.net as net
import model.data_loader as data_loader
import utils

  from .autonotebook import tqdm as notebook_tqdm


### Load Parameters

In [2]:
data_dir = '../../L1DeepMETv2/data_ttbar'

In [3]:
dataloaders = data_loader.fetch_dataloader(data_dir = data_dir, batch_size=6, validation_split=.2)
train_dl = dataloaders['train']
test_dl = dataloaders['test']
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Training dataloader: {}, Test dataloader: {}'.format(len(train_dl), len(test_dl)))
print(device)

Processing...
0it [00:00, ?it/s]
Done!


Split: 199708
length of train/val data:  798834 199708
Training dataloader: 133139, Test dataloader: 33285
cuda




In [4]:
test_data = None
for cnt, test_data in enumerate(test_dl):
    if cnt == 5:
        break

#### Load Tensor Parameters

In [5]:
n_features_cont = 6
x_cont_test = test_data.x[:,:n_features_cont] .to(device)  # include puppi
x_cat_test = test_data.x[:,n_features_cont:].long().to(device)
etaphi_test = torch.cat([test_data.x[:, 3][:, None], test_data.x[:, 4][:, None]], dim=1).to(device=device)
batch_test = test_data.batch.to(device)
edge_index_test = radius_graph(etaphi_test, r=0.4, batch=batch_test, loop=False, max_num_neighbors=255).to(device=device)
print(f'x_cont_test: {x_cont_test.shape}')
print(f'x_cat_test: {x_cat_test.shape}')
print(f'etaphi: {etaphi_test.shape}')
print(f'batch: {batch_test.shape}')
print(f'edge_index: {edge_index_test.shape}')

NVIDIA RTX A6000 with CUDA capability sm_86 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_61 sm_70 sm_75 compute_37.
If you want to use the NVIDIA RTX A6000 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/



x_cont_test: torch.Size([267, 6])
x_cat_test: torch.Size([267, 2])
etaphi: torch.Size([267, 2])
batch: torch.Size([267])
edge_index: torch.Size([2, 1494])


#### Convert Tensor parameters to Numpy arrays

In [6]:
x_cont = np.ascontiguousarray(x_cont_test.squeeze(0).cpu().numpy())
x_cat = np.ascontiguousarray(x_cat_test.squeeze(0).cpu().numpy())
batch = np.ascontiguousarray(batch_test.squeeze(0).cpu().numpy())
etaphi = etaphi_test.squeeze(0).cpu().numpy()
edge_index = edge_index_test.squeeze(0).cpu().numpy().transpose()
num_nodes = x_cont.shape[0]
batch_size = batch.shape[0]
print(f'Number of nodes: {num_nodes}')
assert(num_nodes == batch_size)

Number of nodes: 267


### Load the Torch Model

In [7]:
prefix = '../../L1DeepMETv2/ckpts_April30_scale_sigmoid'
restore_ckpt = osp.join(prefix, 'last.pth.tar')
norm = torch.tensor([1., 1., 1., 1., 1., 1.]).to(device=device)
torch_model = net.Net(continuous_dim=6, categorical_dim=2 , norm=norm).to(device)
torch_model.eval()
print(torch_model)

Net(
  (graphnet): GraphMETNetwork(
    (embed_charge): Embedding(3, 8)
    (embed_pdgid): Embedding(7, 8)
    (embed_continuous): Sequential(
      (0): Linear(in_features=6, out_features=16, bias=True)
      (1): ELU(alpha=1.0)
    )
    (embed_categorical): Sequential(
      (0): Linear(in_features=16, out_features=16, bias=True)
      (1): ELU(alpha=1.0)
    )
    (encode_all): Sequential(
      (0): Linear(in_features=32, out_features=32, bias=True)
      (1): ELU(alpha=1.0)
    )
    (bn_all): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv_continuous): ModuleList(
      (0): ModuleList(
        (0): EdgeConv(nn=Sequential(
          (0): Linear(in_features=64, out_features=32, bias=True)
        ))
        (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): ModuleList(
        (0): EdgeConv(nn=Sequential(
          (0): Linear(in_features=64, out_features=32, bias=True)
        ))
     



#### Get the weights

In [8]:
param_restored_new = utils.load_checkpoint(restore_ckpt, torch_model)
weights_dict = param_restored_new['state_dict']
print(weights_dict)

OrderedDict([('graphnet.embed_charge.weight', tensor([[ 0.9152, -1.0589, -0.9910,  0.0120, -1.2543,  0.2998, -0.3684, -0.0260],
        [ 1.1699, -1.1929, -0.4268, -0.7047, -0.3582,  0.5368,  1.0060, -0.7781],
        [-1.1897, -0.7680,  0.9429,  0.2915, -0.2274, -1.3632,  0.6982,  0.4960]],
       device='cuda:0')), ('graphnet.embed_pdgid.weight', tensor([[-2.0383, -0.3847, -0.2413,  0.9122, -0.4805,  0.2302, -0.4746,  0.9317],
        [-0.4117,  1.3556,  0.8550,  0.9977, -0.1622,  1.0582, -0.3220, -0.1858],
        [ 0.3344, -1.7237,  0.2500,  0.0249, -1.0838, -0.7614, -0.2512, -1.4239],
        [-0.0926,  0.4536, -0.2328,  1.5914, -0.3767, -1.6118,  0.4281,  0.3517],
        [-0.8126, -1.1399, -0.2381, -0.4232,  0.0113, -1.9791, -0.4646, -1.3267],
        [ 0.1623,  0.1171, -0.4400,  0.8133, -1.5196,  2.1547,  1.1111, -1.5128],
        [-2.0250,  0.8894,  1.1295, -1.7737, -1.0402, -1.3035, -0.5598, -0.0130]],
       device='cuda:0')), ('graphnet.embed_continuous.0.weight', tensor([[

In [9]:
output_dir = "weights_files/"

# Check if the directory exists
if os.path.exists(output_dir):
    # Iterate over all the files in the directory
    for filename in os.listdir(output_dir):
        file_path = os.path.join(output_dir, filename)
        try:
            # Check if it's a file and delete it
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            # If it's a directory, delete the directory and its contents
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print(f"Failed to delete {file_path}. Reason: {e}")
else:
    print(f"Directory {output_dir} does not exist.")


# Function to save the weights as binary files
def save_weights_as_binary(weights_dict, output_dir):
    for key, tensor in weights_dict.items():
        # Convert the tensor to a NumPy array
        np_array = tensor.cpu().numpy()

        # Create a binary file name based on the tensor name
        file_name = output_dir + key.replace('.', '_') + '.bin'

        # Save the NumPy array as a binary file
        np_array.tofile(file_name)
        
# Save all weights in the OrderedDict to binary files
save_weights_as_binary(weights_dict, output_dir)

### Load the C++ Model

In [10]:
# Create an instance of the C++ GraphMetNetwork model
cmodel = GraphMetNetwork()

# Load the weights
cmodel.load_weights(output_dir)

### Test the weights

In [11]:
num_weights = 0
for key, tensor in weights_dict.items():
    # Convert the tensor to a NumPy array
    np_array = tensor.cpu().numpy()

    # Return cmodel function pointer to get the weight array
    cmodel_weight_func_name = 'get_' + key.replace('.', '_')
    # if (cmodel_weight_func_name == 'get_graphnet_bn_all_num_batches_tracked' or
    #     cmodel_weight_func_name == 'get_graphnet_conv_continuous_0_1_num_batches_tracked' or
    #     cmodel_weight_func_name == 'get_graphnet_conv_continuous_1_1_num_batches_tracked'):
    #     continue
    cmodel_weight_func = getattr(cmodel, cmodel_weight_func_name)
    cmodel_weight_array = cmodel_weight_func()
    
    # Compare Torch model weight with Cmodel weight
    assert(np.allclose(np_array, cmodel_weight_array, atol=1e-5)), f'cmodel.{cmodel_weight_func_name} returned the wrong weights'
    # print(f'cmodel.{cmodel_weight_func_name} matches')
    num_weights += 1

print(f'Number of weights checked: {num_weights}')

Number of weights checked: 31


### Run the Torch Model

In [12]:
results = torch_model(x_cont_test, x_cat_test, edge_index_test, batch_test)

### Run the C++ Model

In [13]:
cmodel.GraphMetNetworkLayers(x_cont, x_cat, batch, num_nodes)

#### Test Inputs

In [14]:
c_x_cont = cmodel.get_x_cont()
c_x_cat = cmodel.get_x_cat()
c_batch = cmodel.get_batch()
c_num_nodes = cmodel.get_num_nodes()
print(f'Shape of c_x_cont: {c_x_cont.shape}')
print(f'Shape of c_x_cat: {c_x_cat.shape}')
print(f'Shape of c_batch: {c_batch.shape}')
print(f'Value of c_num_nodes: {c_num_nodes}')

Shape of c_x_cont: (267, 6)
Shape of c_x_cat: (267, 2)
Shape of c_batch: (267,)
Value of c_num_nodes: 267


In [15]:
# print(x_cont)
assert(np.allclose(x_cont, c_x_cont, atol=1e-5))
assert(np.allclose(x_cat, c_x_cat, atol=1e-5))
assert(np.allclose(batch, c_batch, atol=1e-5))
assert(np.allclose(num_nodes, c_num_nodes, atol=1e-5))

#### Test Internal Variables

In [16]:
c_num_edges = cmodel.get_num_edges()
c_edge_index = cmodel.get_edge_index()
edge_index_np = edge_index_test.squeeze(0).cpu().numpy().transpose()
c_etaphi = cmodel.get_etaphi()
assert(np.allclose(etaphi, c_etaphi, atol=1e-5))
print(edge_index_np.shape)
print(c_edge_index.shape)
print(f'Number of C edges: {c_num_edges}')

(1494, 2)
(1494, 2)
Number of C edges: 1494


#### Test Intermediate Variables

In [17]:
# Compare intermediate values as before
np.testing.assert_allclose(torch_model.graphnet.emb_cont_.cpu().numpy(), cmodel.get_emb_cont(), rtol=1e-5)

AssertionError: 
Not equal to tolerance rtol=1e-05, atol=0

Mismatched elements: 6 / 4272 (0.14%)
Max absolute difference: 1.9073486e-06
Max relative difference: 0.000123
 x: array([[-1.      , -0.968301,  2.005721, ..., -0.997374, -1.      ,
        -0.956466],
       [-0.999936, -1.      , -0.999971, ..., -0.999973, -0.999997,...
 y: array([[-1.      , -0.968301,  2.005721, ..., -0.997374, -1.      ,
        -0.956466],
       [-0.999936, -1.      , -0.999971, ..., -0.999973, -0.999997,...

In [18]:
np.testing.assert_allclose(torch_model.graphnet.emb_chrg_.cpu().numpy(), cmodel.get_emb_chrg(), rtol=1e-5)

In [19]:
np.testing.assert_allclose(torch_model.graphnet._emb_pdg_.cpu().numpy(), cmodel.get_emb_pdg(), rtol=1e-5)

In [20]:
np.testing.assert_allclose(torch_model.graphnet.emb_cat_.cpu().numpy(), cmodel.get_emb_cat(), rtol=1e-5)

AssertionError: 
Not equal to tolerance rtol=1e-05, atol=0

Mismatched elements: 3 / 4272 (0.0702%)
Max absolute difference: 2.3841858e-07
Max relative difference: 2.137216e-05
 x: array([[-0.266257, -0.23016 , -0.468719, ..., -0.358534,  0.351129,
        -0.558314],
       [ 0.125356,  0.067314,  1.219448, ..., -0.520794, -0.241082,...
 y: array([[-0.266257, -0.23016 , -0.468719, ..., -0.358534,  0.351129,
        -0.558314],
       [ 0.125356,  0.067314,  1.219449, ..., -0.520794, -0.241082,...

In [21]:
np.testing.assert_allclose(torch_model.graphnet.encode_all_.cpu().numpy(), cmodel.get_encode_all(), rtol=1e-5)

AssertionError: 
Not equal to tolerance rtol=1e-05, atol=0

Mismatched elements: 34 / 8544 (0.398%)
Max absolute difference: 1.4305115e-06
Max relative difference: 0.00036407
 x: array([[-0.266241, -0.231575,  0.549079, ..., -0.165303, -0.911824,
        -0.921602],
       [ 1.704721, -0.755376,  0.741442, ...,  0.621892, -0.904935,...
 y: array([[-0.266241, -0.231575,  0.549079, ..., -0.165303, -0.911824,
        -0.921602],
       [ 1.704721, -0.755376,  0.741442, ...,  0.621892, -0.904935,...

In [22]:
np.testing.assert_allclose(torch_model.graphnet.emb_.cpu().numpy(), cmodel.get_emb(), rtol=1e-5)

AssertionError: 
Not equal to tolerance rtol=1e-05, atol=0

Mismatched elements: 42 / 8544 (0.492%)
Max absolute difference: 3.0517578e-05
Max relative difference: 0.0010149
 x: array([[-1.496144, -2.618178,  1.397834, ..., -3.41971 , -5.633175,
        -3.583886],
       [ 5.387336, -9.104922,  2.471953, ...,  1.018879, -5.595757,...
 y: array([[-1.496144, -2.618177,  1.397833, ..., -3.41971 , -5.633175,
        -3.583886],
       [ 5.387336, -9.104922,  2.471954, ...,  1.018879, -5.595757,...

In [23]:
np.testing.assert_allclose(torch_model.graphnet.emb1_.cpu().numpy(), cmodel.get_emb1(), rtol=1e-2)

AssertionError: 
Not equal to tolerance rtol=0.01, atol=0

x and y -inf location mismatch:
 x: array([[ 50.80014 , -14.90132 , -14.460999, ..., -40.38353 , -26.757717,
        -10.349055],
       [ 21.010948, -15.40152 ,   9.094251, ..., -13.145414, -10.811636,...
 y: array([[ 50.80014 , -14.901326, -14.460999, ..., -40.383537, -26.75771 ,
        -10.349054],
       [ 21.010944, -15.40152 ,   9.094253, ..., -13.145413, -10.811638,...

In [25]:
torch_emb1 = torch_model.graphnet.emb1_.cpu().numpy()
c_emb1 = cmodel.get_emb1()
print(torch_emb1.shape)
print(c_emb1.shape)

(267, 32)
(267, 32)


In [26]:
print("c_emb1 has inf:", np.isinf(c_emb1).any())

c_emb1 has inf: True


In [28]:
print(torch_emb1)

[[ 50.80014    -14.90132    -14.4609995  ... -40.38353    -26.757717
  -10.349055  ]
 [ 21.010948   -15.40152      9.094251   ... -13.145414   -10.811636
   -0.5771712 ]
 [ 20.654512   -11.478975     7.195159   ... -11.484139   -10.536681
   -0.13062656]
 ...
 [  9.999883     0.4486767   -0.19843754 ...  -5.4074097   -7.0787883
   -1.1682463 ]
 [ 10.504782     1.1126866    2.0058398  ...  -5.272686   -10.679102
    2.0757022 ]
 [ 13.856306    47.9183       0.22565247 ...  12.416369   -10.4405775
   14.771691  ]]


In [27]:
print(c_emb1)

[[ 50.80014    -14.901326   -14.4609995  ... -40.383537   -26.75771
  -10.349054  ]
 [ 21.010944   -15.40152      9.094253   ... -13.145413   -10.811638
   -0.57717085]
 [ 20.65451    -11.478975     7.195159   ... -11.484137   -10.536682
   -0.1306274 ]
 ...
 [  9.999885     0.44867814  -0.19843808 ...  -5.4074097   -7.0787888
   -1.1682469 ]
 [ 10.504782     1.1126882    2.0058398  ...  -5.2726865  -10.679101
    2.075703  ]
 [ 13.856305    47.918297     0.22565307 ...  12.416371   -10.44058
   14.771691  ]]


In [30]:
ar_inf = np.where(np.isinf(c_emb1))
print(ar_inf)

(array([ 24,  24,  24, ..., 262, 262, 262]), array([ 0,  1,  2, ..., 29, 30, 31]))


In [31]:
c_emb1[24][0]

-inf

In [None]:
np.testing.assert_allclose(torch_model.graphnet.emb2_.cpu().numpy(), cmodel.get_emb2(), atol=1e-5)

In [None]:
# Calculate Euclidean distance
arr1 = torch_model.graphnet.emb2_.cpu().numpy()
arr2 = cmodel.get_emb2()
print(arr1.shape)
print(arr2.shape)
# euclidean_distance = np.linalg.norm(torch_model.graphnet.emb2_.cpu().numpy() - cmodel.get_emb2())
# print("Euclidean distance:", euclidean_distance)

mse = np.mean((arr1 - arr2) ** 2)
print("Mean Squared Error:", mse)

In [None]:
# Check if either array contains NaN or inf
print("Array1 has NaN:", np.isnan(arr1).any())
print("Array1 has inf:", np.isinf(arr1).any())
print("Array2 has NaN:", np.isnan(arr2).any())
print("Array2 has inf:", np.isinf(arr2).any())


#### Test Final Result

In [None]:
np_results = results.detach().cpu().numpy()
print(np_results)

In [None]:
np.testing.assert_allclose(np_results, cmodel.get_output(), rtol=1e-5)

### Other Tests

In [None]:
import c_radius_graph

# Example points in 2D space and their corresponding batch indices
points = [[0.0, 0.0], [1.0, 1.0], [2.0, 2.0],  # Batch 0
          [3.0, 3.0], [4.0, 4.0],              # Batch 1
          [5.0, 5.0], [6.0, 6.0]]              # Batch 2
batch_indices = [0, 0, 0, 1, 1, 2, 2]  # Batch assignments
radius = 1.5

In [None]:
# Call the C++ function
neighbors = c_radius_graph.find_neighbors_by_batch(points, batch_indices, radius)

# Print neighbors
for pair in neighbors:
    print(f"Point {pair[0]} is within radius of point {pair[1]}")

neighbors

In [None]:
points_tensor = torch.tensor(points)
batch_tensor = torch.tensor(batch_indices)

edge_index_pts = radius_graph(points_tensor, r=radius, batch=batch_tensor, loop=False, max_num_neighbors=255)

In [None]:
print(edge_index_pts)

In [None]:
# Example usage in Python
import graphmetnetwork_bindings as gmn

# Create an instance of the model
model = gmn.GraphMetNetwork()

# Load the weights
model.load_weights("path_to_weights_file.txt")

# Now, you can run the model with input data
for i, (x_cont, x_cat, edge_index, batch) in enumerate(dataloader):
    num_nodes = x_cont.shape[0]

    # Run the PyTorch model
    with torch.no_grad():
        output = torch_model(x_cont.squeeze(0), x_cat.squeeze(0), edge_index.squeeze(0), batch.squeeze(0))

    # Run the C++ model
    model.GraphMetNetworkLayer(x_cont.squeeze(0).numpy(), x_cat.squeeze(0).numpy(), num_nodes, batch.squeeze(0).numpy())

    # Compare intermediate values as before
    np.testing.assert_allclose(torch_model._emb_cont.numpy(), model.get_emb_cont(), rtol=1e-5)
    np.testing.assert_allclose(torch_model._emb_chrg.numpy(), model.get_emb_chrg(), rtol=1e-5)
    np.testing.assert_allclose(torch_model._emb_pdg.numpy(), model.get_emb_pdg(), rtol=1e-5)
    np.testing.assert_allclose(torch_model._emb_cat.numpy(), model.get_emb_cat(), rtol=1e-5)
    np.testing.assert_allclose(torch_model._emb.numpy(), model.get_emb(), rtol=1e-5)
    np.testing.assert_allclose(torch_model._emb1.numpy(), model.get_emb1(), rtol=1e-5)
    np.testing.assert_allclose(torch_model._emb2.numpy(), model.get_emb2(), rtol=1e-5)
    np.testing.assert_allclose(output.numpy(), model.get_output(), rtol=1e-5)
