In [None]:
import os
import sys

sys.path.append("../")

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import hydra
import numpy as np
import pytorch_lightning as pl
import torch
from omegaconf import OmegaConf

In [None]:
# set env variable DATA_DIR again because of hydra
from dotenv import load_dotenv

load_dotenv()
os.environ["DATA_DIR"] = os.environ.get("DATA_DIR")

In [None]:
# plots and metrics
import matplotlib.pyplot as plt

from src.data.components import calculate_all_wasserstein_metrics
from src.utils.data_generation import generate_data
from src.utils.plotting import apply_mpl_styles, create_and_plot_data, plot_single_jets

apply_mpl_styles()

In [None]:
experiment1 = "plot1.yaml"
experiment2 = "plot2.yaml"

In [None]:
# load everything from experiment config
with hydra.initialize(version_base=None, config_path="../configs/"):
    cfg1 = hydra.compose(config_name="train.yaml", overrides=[f"experiment={experiment1}"])
    # print(OmegaConf.to_yaml(cfg1))

In [None]:
# load everything from experiment config
with hydra.initialize(version_base=None, config_path="../configs/"):
    cfg2 = hydra.compose(config_name="train.yaml", overrides=[f"experiment={experiment2}"])
    # print(OmegaConf.to_yaml(cfg1))

In [None]:
datamodule1 = hydra.utils.instantiate(cfg1.data)
datamodule2 = hydra.utils.instantiate(cfg2.data)
model1 = hydra.utils.instantiate(cfg1.model)
model2 = hydra.utils.instantiate(cfg2.model)

In [None]:
model_name_for_saving = "nb_fm_tops30"

In [None]:
datamodule1.setup()
datamodule2.setup()

In [None]:
test_data1 = np.array(datamodule1.tensor_test)
test_mask1 = np.array(datamodule1.mask_test)
test_cond1 = np.array(datamodule1.tensor_conditioning_test)
val_data1 = np.array(datamodule1.tensor_val)
val_mask1 = np.array(datamodule1.mask_val)
val_cond1 = np.array(datamodule1.tensor_conditioning_val)
train_data1 = np.array(datamodule1.tensor_train)
train_mask1 = np.array(datamodule1.mask_train)
train_cond1 = np.array(datamodule1.tensor_conditioning_train)
means1 = np.array(datamodule1.means)
stds1 = np.array(datamodule1.stds)

In [None]:
print(test_data1.shape)
print(test_mask1.shape)
print(test_cond1.shape)
print(val_data1.shape)
print(val_mask1.shape)
print(val_cond1.shape)
print(train_data1.shape)
print(train_mask1.shape)
print(train_cond1.shape)
print(means1)
print(stds1)

In [None]:
test_data2 = np.array(datamodule2.tensor_test)
test_mask2 = np.array(datamodule2.mask_test)
test_cond2 = np.array(datamodule2.tensor_conditioning_test)
val_data2 = np.array(datamodule2.tensor_val)
val_mask2 = np.array(datamodule2.mask_val)
val_cond2 = np.array(datamodule2.tensor_conditioning_val)
train_data2 = np.array(datamodule2.tensor_train)
train_mask2 = np.array(datamodule2.mask_train)
train_cond2 = np.array(datamodule2.tensor_conditioning_train)
means2 = np.array(datamodule2.means)
stds2 = np.array(datamodule2.stds)

In [None]:
ckpt1 = "/beegfs/desy/user/ewencedr/deep-learning/logs/150 alljets nocond onlymetrics/runs/2023-06-14_18-51-51/checkpoints/epoch_6983_w1m_0.00038674-EMA.ckpt"
ckpt2 = "/beegfs/desy/user/ewencedr/deep-learning/logs/150 alljets condmasspt onlymetrics/runs/2023-06-15_16-28-35/checkpoints/epoch_4986_w1m_0.00014095-EMA.ckpt"
model1 = model1.load_from_checkpoint(ckpt1)
model2 = model2.load_from_checkpoint(ckpt2)

In [None]:
jet_type = "t"

In [None]:
mask1 = test_mask1
data1 = test_data1
cond1 = test_cond1

In [None]:
# select only data, mask and cond for the specified jet type
# also for training data because it is compared to test data later
index_jettype1 = np.squeeze(np.argwhere(np.array(datamodule1.jet_types) == jet_type))

indice_jettype1 = np.squeeze(np.argwhere(cond1[:, index_jettype1] == 1))
indice_jettype_train1 = np.squeeze(np.argwhere(train_cond1[:, index_jettype1] == 1))

mask_jettype1 = mask1[indice_jettype1]
data_jettype1 = data1[indice_jettype1]
cond_jettype1 = cond1[indice_jettype1]
train_mask_jettype1 = train_mask1[indice_jettype_train1]
train_data_jettype1 = train_data1[indice_jettype_train1]
train_cond_jettype1 = train_cond1[indice_jettype_train1]

print(mask_jettype1.shape)
print(data_jettype1.shape)
print(cond_jettype1.shape)
print(train_mask_jettype1.shape)
print(train_data_jettype1.shape)
print(train_cond_jettype1.shape)

In [None]:
mask2 = test_mask2
data2 = test_data2
cond2 = test_cond2

In [None]:
# select only data, mask and cond for the specified jet type
# also for training data because it is compared to test data later
index_jettype2 = np.squeeze(np.argwhere(np.array(datamodule2.jet_types) == jet_type))

indice_jettype2 = np.squeeze(np.argwhere(cond2[:, index_jettype2] == 1))
indice_jettype_train2 = np.squeeze(np.argwhere(train_cond2[:, index_jettype2] == 1))

mask_jettype2 = mask2[indice_jettype2]
data_jettype2 = data2[indice_jettype2]
cond_jettype2 = cond2[indice_jettype2]
train_mask_jettype1 = train_mask1[indice_jettype_train1]
train_data_jettype1 = train_data1[indice_jettype_train1]
train_cond_jettype1 = train_cond1[indice_jettype_train1]

print(mask_jettype1.shape)
print(data_jettype1.shape)
print(cond_jettype1.shape)
print(train_mask_jettype1.shape)
print(train_data_jettype1.shape)
print(train_cond_jettype1.shape)

In [None]:
# fig, data, generation_times = create_and_plot_data(
#    np.array(data_jettype),
#    [model1, model2],
#    cond=[torch.tensor(cond_jettype), torch.tensor(cond_jettype)],
#    save_name="fm_tops_nb",
#    labels=["FM", "2"],
#    mask=mask_jettype,
#    num_jet_samples=len(data_jettype),
#    batch_size=1000,
#    variable_set_sizes=True,
#    normalized_data=[True, True],
#    means=means,
#    stds=stds,
#    save_folder="./logs/nb_plots/",
#    plottype="sim_data",
#    plot_jet_features=True,
#    plot_w_dists=False,
#    plot_selected_multiplicities=False,
#    selected_multiplicities=[1, 3, 5, 10, 20, 30],
#    ode_solver="midpoint",
#    ode_steps=100,
#    bins=100,
#    mass_linear=False,
# )

In [None]:
data1 = np.load(
    "/beegfs/desy/user/ewencedr/deep-learning/logs/150 alljets nocond onlymetrics/runs/2023-06-14_18-51-51/gen_data_t.npy"
)
data2 = np.load(
    "/beegfs/desy/user/ewencedr/deep-learning/logs/150 alljets condmasspt onlymetrics/runs/2023-06-15_16-28-35/gen_data_t.npy"
)

In [None]:
print(data1.shape)
print(data2.shape)

In [None]:
from src.utils.plotting import plot_data, prepare_data_for_plotting

In [None]:
(
    jet_data,
    efps_values,
    pt_selected_particles,
    pt_selected_multiplicities,
) = prepare_data_for_plotting(np.array([data1, data2]))

In [None]:
(
    jet_data_sim,
    efps_sim,
    pt_selected_particles_sim,
    pt_selected_multiplicities_sim,
) = prepare_data_for_plotting([data_jettype1])

In [None]:
print(pt_selected_particles_sim.shape)
print(pt_selected_particles.shape)

In [None]:
print(np.array([data1, data2]).shape)

In [None]:
print(jet_data.shape)
print(data_jettype1.shape)
print(mask_jettype1.shape)
sim_data = np.concatenate([data_jettype2, mask_jettype2], axis=-1)
print(sim_data.shape)

In [None]:
fig = plot_data(
    particle_data=np.array([data1, data2]),
    sim_data=sim_data,
    jet_data_sim=jet_data_sim[0],
    jet_data=jet_data,
    efps_sim=efps_sim[0],
    efps_values=efps_values,
    labels=["all", "all cond"],
    sim_data_label="JetNet",
    plot_jet_features=True,
    plot_w_dists=False,
    plot_efps=False,
    plot_selected_multiplicities=False,
    selected_multiplicities=[20, 30, 40],
    selected_particles=[1, 3, 10],
    pt_selected_particles=pt_selected_particles,
    pt_selected_multiplicities=pt_selected_multiplicities,
    pt_selected_particles_sim=pt_selected_particles_sim[0],
    pt_selected_multiplicities_sim=pt_selected_multiplicities_sim,
    plottype="sim_data",
    save_fig=False,
    variable_jet_sizes_plotting=True,
    bins=100,
    close_fig=False,
)