In [1]:
import torch
import uproot
import random
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader
from tqdm.notebook import tqdm
import lightning as L

In [2]:
# basic random seed
def seed_basic(seed=42):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)


# torch random seed
def seed_torch(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


# basic + torch + lightning
def seed_everything(seed=42):
    seed_basic(seed)
    seed_torch(seed)
    L.seed_everything(seed)


seed_everything()

Seed set to 42


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [4]:
file_sm = uproot.open("../data/SM.root")
file = uproot.open("../data/DM.root")

df_DM = file['LHE'].arrays(file['LHE'].keys(), library="pd")
df_SM = file_sm['LHE'].arrays(file_sm['LHE'].keys(), library="pd")
df_SM['M_phi'] = np.sqrt(df_SM['E_phi']**2 - (df_SM['p_phi_x']**2 + df_SM['p_phi_y']**2 + df_SM['p_phi_z']**2))
df_DM['M_phi'] = np.sqrt(df_DM['E_phi']**2 - (df_DM['p_phi_x']**2 + df_DM['p_phi_y']**2 + df_DM['p_phi_z']**2))

train_raw_SM, test_raw_SM = train_test_split(df_SM, test_size=0.2)
train_raw_SM, val_raw_SM = train_test_split(train_raw_SM, test_size=0.2)

train_raw_DM, test_raw_DM = train_test_split(df_DM, test_size=0.2)
train_raw_DM, val_raw_DM = train_test_split(train_raw_DM, test_size=0.2)

train_raw = pd.concat([train_raw_SM, train_raw_DM], axis=0) # We use the 1/1 mix of SM and DM datasets for training
val_raw = pd.concat([val_raw_SM, val_raw_DM], axis=0)
test_raw = pd.concat([test_raw_SM, test_raw_DM], axis=0)

In [5]:
y_vars = ['p_nu_x', 'p_nu_y', 'p_nu_z', 'p_phi_x', 'p_phi_y', 'p_phi_z']
X_vars = ['p_l_x', 'p_l_y', 'p_l_z', 'p_b_x', 'p_b_y', 'p_b_z', 'p_q_x', 'p_q_y',
          'p_q_z', 'MET_x', 'MET_y', 'MET_z', 'Pt_Lep', 'Eta_Lep', 'Pt_J1', 'Eta_J1',
          'Pt_J2', 'Eta_J2', 'Pt_W', 'Eta_W', 'Pt_top', 'Eta_top', 'mW_inv', 'mtop_inv',
          'MtW', 'MtT', 'SP_LepB', 'SP_LepQ', 'SP_QB', 'SP_LepNu', 'SP_BNu', 'SP_QNu',
          'Ht', 'S_hat', 'Dphi_LepQ', 'Dphi_LepNu', 'Dphi_Wb', 'DR_LepQ', 'DR_LepNu', 'DR_Wb']

In [6]:
scaler_y = StandardScaler()
scaler_y.fit(np.array(train_raw[y_vars]))
scaler_X = StandardScaler()
scaler_X.fit(np.array(train_raw[X_vars]))

X_train_df = pd.DataFrame(scaler_X.transform(np.array(train_raw[X_vars])), columns=X_vars)
X_val_df = pd.DataFrame(scaler_X.transform(np.array(val_raw[X_vars])), columns=X_vars)
X_test_df = pd.DataFrame(scaler_X.transform(np.array(test_raw[X_vars])), columns=X_vars)

y_train_df = pd.DataFrame(scaler_y.transform(np.array(train_raw[y_vars])), columns=y_vars)
y_val_df = pd.DataFrame(scaler_y.transform(np.array(val_raw[y_vars])), columns=y_vars)
y_test_df = pd.DataFrame(scaler_y.transform(np.array(test_raw[y_vars])), columns=y_vars)

In [7]:
from torch.utils.data import Dataset, DataLoader


class CustomDataset(Dataset):
    def __init__(
        self,
        input_values,
        labels,
        transforms=None,
    ):
        self.input_values = torch.from_numpy(input_values).type(torch.FloatTensor)
        self.labels = torch.from_numpy(labels).type(torch.FloatTensor)
        self.transforms = transforms

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        if self.transforms:
            input_value = self.transforms(self.input_values[idx])
        else:
            input_value = self.input_values[idx]
        label = self.labels[idx]
        return input_value, label

In [8]:
train_set = CustomDataset(X_train_df.values, y_train_df.values)
val_set = CustomDataset(X_val_df.values, y_val_df.values)
test_set = CustomDataset(X_test_df.values, y_test_df.values)

train_loader = torch.utils.data.DataLoader(
    train_set, batch_size=1024, shuffle=True
)
val_loader = torch.utils.data.DataLoader(
    val_set, batch_size=1024, shuffle=False
)
test_loader = torch.utils.data.DataLoader(
    test_set, batch_size=1024, shuffle=False
)

In [9]:
import sys
sys.path.append('../src/')
import torch.nn as nn
import torch.nn.functional as F
import sc_models
from torchmetrics.regression import MeanSquaredError, MeanAbsoluteError

In [10]:
class LModel_NF(L.LightningModule):
    def __init__(
            self, model, lr=1e-3,
        ):
        super().__init__()
        self.model = model
        self.criterion = nn.L1Loss()
        self.train_acc = MeanAbsoluteError()
        self.valid_acc = MeanAbsoluteError()
        self.test_acc = MeanAbsoluteError()
        self.lr = lr

    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.model.parameters(), lr = self.lr)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor = 0.1, min_lr = 1e-5)
        return {
            "optimizer": optimizer,
            "lr_scheduler": {
                "scheduler": scheduler,
                "interval": "epoch",
                "monitor": "loss/val", 
            },
        }
    def training_step(self, batch, batch_idx):
        x, y = batch
        out = -self.model.log_prob(inputs=y, context=x)
        loss = -self.model.log_prob(inputs=y, context=x).mean()
        self.log("loss/train", loss.detach().item(), prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        out = -self.model.log_prob(inputs=y, context=x)
        loss = -self.model.log_prob(inputs=y, context=x).mean()
        self.log("loss/val", loss.detach().item())

In [11]:
from lightning.pytorch.callbacks import ModelCheckpoint, EarlyStopping
import warnings
warnings.filterwarnings("ignore")

def create_trainer(name, max_epochs = 10):
    torch.set_float32_matmul_precision('medium')
    trainer = L.Trainer(
        max_epochs=max_epochs,
        num_sanity_val_steps=1,
        log_every_n_steps=10,
        enable_checkpointing = False,
        enable_model_summary = False,
        enable_progress_bar = False,
        callbacks=[EarlyStopping(monitor="loss/val", mode="min", patience = 2)],
    )
    return trainer

In [12]:
import sc_misc
Cos_lep_light_true, _ = sc_misc.calculate_cosine_theory(val_raw_DM[X_vars], val_raw_DM[y_vars])

In [19]:
import optuna
from torchmetrics.regression import MeanAbsoluteError

X_val_DM = pd.DataFrame(scaler_X.transform(np.array(val_raw_DM[X_vars])), columns=X_vars)
mean_absolute_error = MeanAbsoluteError()
def objective(trial):
    _context_size = trial.suggest_int('context_size', 8, 64)
    _num_layers = trial.suggest_int('num_layers', 3, 7)
    _spline_layer = trial.suggest_int('spline_layer', 32, 256)
    _spline_num_layers = trial.suggest_int('spline_num_layers', 2, 5)
    
    nu_flow = sc_models.Nu_flow(encoder = sc_models.DeepSet(len(X_vars), _context_size), target_size = len(y_vars), 
                            masking_order = [1, 1, -1, 1, 1, -1],  # masking is done for (x,y) <-> z for neutrino and mediator
                            num_layers = _num_layers, context_size = _context_size, spline_conf = (_spline_layer, _spline_num_layers, 0.1))
    
    pl_model_nu_flow = LModel_NF(nu_flow)
    trainer = create_trainer("nu_flow", max_epochs = 3)
    trainer.fit(
        model=pl_model_nu_flow,
        train_dataloaders=train_loader,
        val_dataloaders=val_loader
    )
    
    with torch.no_grad():
        pred_nuflows, _= torch.median(nu_flow.sample(num_samples = 5, context=torch.tensor(X_val_DM.values, dtype = torch.float32)), 1)
        pred_nuflows = scaler_y.inverse_transform(pred_nuflows)
    Cos_lep_light_mixed_DM, _ = sc_misc.calculate_cosine(pred_nuflows, val_raw_DM[X_vars])
                                    
    return mean_absolute_error(torch.tensor(Cos_lep_light_mixed_DM), torch.tensor(Cos_lep_light_true))

study = optuna.create_study()
study.optimize(objective, n_trials=100)

`Trainer.fit` stopped: `max_epochs=3` reached.
[I 2024-11-12 19:30:20,593] Trial 89 finished with value: 0.2829580307006836 and parameters: {'context_size': 12, 'num_layers': 7, 'spline_layer': 70, 'spline_num_layers': 3}. Best is trial 62 with value: 0.2757568359375.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
`Trainer.fit` stopped: `max_epochs=3` reached.
[I 2024-11-12 19:37:41,595] Trial 90 finished with value: 0.28401580452919006 and parameters: {'context_size': 10, 'num_layers': 7, 'spline_layer': 109, 'spline_num_layers': 3}. Best is trial 62 with value: 0.2757568359375.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
`Trainer.fit` stopped: `max_epochs=3` reached.
[I 2024-11-12 19:45:14,304] Trial 91 finished with value: 0.285244882106781 and parameters: {'con

In [20]:
study.best_params

{'context_size': 19,
 'num_layers': 7,
 'spline_layer': 108,
 'spline_num_layers': 3}

In [21]:
trials_df = study.trials_dataframe()
trials_df

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_context_size,params_num_layers,params_spline_layer,params_spline_num_layers,state
0,0,0.284137,2024-11-12 09:15:42.045324,2024-11-12 09:22:10.953224,0 days 00:06:28.907900,42,6,32,4,COMPLETE
1,1,0.282535,2024-11-12 09:22:10.954358,2024-11-12 09:25:48.941999,0 days 00:03:37.987641,30,3,48,3,COMPLETE
2,2,0.282604,2024-11-12 09:25:48.943091,2024-11-12 09:30:45.301639,0 days 00:04:56.358548,35,4,202,3,COMPLETE
3,3,0.285604,2024-11-12 09:30:45.302917,2024-11-12 09:34:55.472924,0 days 00:04:10.170007,58,3,214,4,COMPLETE
4,4,0.282965,2024-11-12 09:34:55.473988,2024-11-12 09:40:45.921750,0 days 00:05:50.447762,24,5,150,3,COMPLETE
...,...,...,...,...,...,...,...,...,...,...
95,95,0.279136,2024-11-12 20:07:09.862095,2024-11-12 20:15:25.162229,0 days 00:08:15.300134,16,7,154,5,COMPLETE
96,96,0.283611,2024-11-12 20:15:25.163501,2024-11-12 20:22:47.486641,0 days 00:07:22.323140,15,7,100,3,COMPLETE
97,97,0.278059,2024-11-12 20:22:47.487733,2024-11-12 20:30:57.213981,0 days 00:08:09.726248,20,7,155,5,COMPLETE
98,98,0.283399,2024-11-12 20:30:57.214937,2024-11-12 20:39:11.831944,0 days 00:08:14.617007,28,7,153,5,COMPLETE


In [22]:
trials_df.to_csv("tuning.csv")