In [1]:
import os
import sys

sys.path.append("../")

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
from os.path import join

import energyflow as ef
import h5py
import hydra
import numpy as np
import pytorch_lightning as pl
import torch
from omegaconf import OmegaConf
from sklearn.neighbors import KernelDensity

2023-09-08 22:03:17.198392: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
# plots and metrics
import matplotlib.pyplot as plt

from src.data.components import (
    calculate_all_wasserstein_metrics,
    inverse_normalize_tensor,
    normalize_tensor,
)
from src.utils.data_generation import generate_data
from src.utils.plotting import apply_mpl_styles, plot_data, prepare_data_for_plotting

apply_mpl_styles()

In [4]:
from jetnet.evaluation import w1efp, w1m, w1p
from src.utils.jet_substructure import dump_hlvs
from src.data.components.metrics import wasserstein_distance_batched
from src.utils.plotting import (
    plot_substructure,
    plot_full_substructure,
)

In [5]:
# set env variable DATA_DIR again because of hydra
from dotenv import load_dotenv

load_dotenv()
os.environ["DATA_DIR"] = os.environ.get("DATA_DIR")

In [6]:
data_folder = os.environ.get("DATA_DIR")

## load data

In [7]:
# Load vinicius data
path_v = f"{data_folder}/lhco/generated/FPCD_LHCO_SR.h5"
with h5py.File(path_v, "r") as f:
    print(f.keys())
    jet_features_v = f["jet_features"][:]
    particle_data_v = f["particle_features"][:]
    mjj_v = f["mjj"][:]
print(jet_features_v.shape)
print(particle_data_v.shape)
print(mjj_v.shape)

<KeysViewHDF5 ['jet_features', 'mjj', 'particle_features']>
(200000, 2, 5)
(200000, 2, 279, 3)
(200000,)


In [8]:
# Load idealized data
path_id = f"{data_folder}/lhco/generated/idealized_LHCO.h5"
with h5py.File(path_id, "r") as f:
    print(f.keys())
    jet_features_id = f["jet_features"][:]
    particle_data_id = f["particle_features"][:]
    mjj_id = f["mjj"][:]
print(jet_features_id.shape)
print(particle_data_id.shape)
print(mjj_id.shape)

<KeysViewHDF5 ['jet_features', 'mjj', 'particle_features']>
(121351, 2, 5)
(121351, 2, 279, 3)
(121351,)


In [9]:
# Load ced data
#path_ced = f"{data_folder}/lhco/generated/FM_LHCO_SR.h5"
#path_ced = f"{data_folder}/lhco/generated/lhco_both_jets-midpoint-250.h5"
#path_ced = f"{data_folder}/lhco/generated/latent64-midpoint-200.h5"
#path_ced = f"{data_folder}/lhco/generated/lhco-xy-midpoint-300.h5"
path_ced = f"{data_folder}/lhco/generated/lhco-xy-256-logpt_sr-midpoint-500.h5"
#path_ced = f"{data_folder}/lhco/generated/FPCD_LHCO_SR_2.h5"


with h5py.File(path_ced, "r") as f:
    print(f.keys())
    jet_features_ced = f["jet_features"][:]
    particle_data_ced = f["particle_features"][:]
    mjj_ced = f["mjj"][:]
    raw_ced = f["data_raw"][:]
print(jet_features_ced.shape)
print(particle_data_ced.shape)
#print(mjj_ced.shape)

<KeysViewHDF5 ['data_raw', 'jet_features', 'mjj', 'particle_features', 'particle_features_nonrel']>
(121351, 2, 5)
(121351, 2, 279, 3)


In [10]:
particle_data_v = particle_data_v[:len(particle_data_id)]
jet_features_v = jet_features_v[:len(jet_features_id)]
mjj_v = mjj_v[:len(mjj_id)]

In [11]:
particle_data_ced = particle_data_ced[...,[1,2,0]]
particle_data_id = particle_data_id[...,[1,2,0]]
particle_data_v = particle_data_v[...,[1,2,0]]

## calculate substructure

In [12]:
print(particle_data_ced.shape)
print(particle_data_id.shape)
print(particle_data_v.shape)

(121351, 2, 279, 3)
(121351, 2, 279, 3)
(121351, 2, 279, 3)


In [13]:
print(particle_data_v.reshape(-1, particle_data_v.shape[-2], particle_data_v.shape[-1]).shape)

(242702, 279, 3)


In [14]:
dump_hlvs(
    particle_data_v.reshape(-1, particle_data_v.shape[-2], particle_data_v.shape[-1]),
    "/beegfs/desy/user/ewencedr/data/lhco/substructure/full_v",
    plot=True,
)

Computing substructure variables:   2%|▏         | 5615/242702 [01:11<49:59, 79.05it/s]  


KeyboardInterrupt: 