In [1]:
import torch
from torch import nn
import numpy as np
import sys
import os
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import importlib

sys.path.append("..")
from model import ParticleEventTransformer
from data import get_database_path, get_h5_files, read_h5_file, select_events
from utils import load_toml_config

import matplotlib.pyplot as plt


device = "cuda" if torch.cuda.is_available() else "mps" if sys.platform == "darwin" else "cpu"
random_seed = 114514
torch.manual_seed(random_seed)
np.random.seed(random_seed)

In [2]:
EMD_config = load_toml_config("EMD")
particle_type_scale = EMD_config['particle_type_scale']

model_hyper_parameters = load_toml_config("Transformer")
output_dim = model_hyper_parameters["output_dim"]

import h5py
embedding_points = h5py.File("../embedding_points.h5", "r")

from analysis import Normalizer
normalizer = Normalizer(*[value for value in embedding_points.values()])

print(normalizer.max)
print(normalizer.min)

[369.1842041  370.31808472 327.48770142 326.72790527]
[-377.18408203 -356.34307861 -320.93734741 -340.06167603]


In [5]:
print(embedding_points.keys())
for key in embedding_points.keys():
    print(key, embedding_points[key].shape)

<KeysViewHDF5 ['SM', 'charged_Higgs', 'leptoquark', 'neutral_Higgs', 'neutral_boson']>
SM (13451915, 4)
charged_Higgs (760272, 4)
leptoquark (340544, 4)
neutral_Higgs (691283, 4)
neutral_boson (55969, 4)


# Dataset

In [4]:
def one_hot(idx, num_classes):
  return np.squeeze(np.eye(num_classes)[idx.reshape(-1)])

In [8]:
class SignalDataset(Dataset):
    def __init__(self, embedding_points, normalizer, label):
        points = np.concatenate([embedding_points[key] for key in embedding_points.keys()])
        self.points = points
        self.normalizer = normalizer
        label_idx = np.concatenate([len(embedding_points[key]) * [i] for i, key in enumerate(embedding_points.keys())])
        self.label = one_hot(label_idx, 5)
        assert len(self.points) == len(self.label)

    def __len__(self):
        return len(self.points)
    
    def __getitem__(self, idx):
        return self.normalizer(self.points[idx]), self.label[idx]


In [10]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_sizes):
        super(MLP, self).__init__()
        def make_layer(in_size, out_size):
            layer = nn.Sequential(
                nn.Linear(in_size, out_size),
                nn.LeakyReLU(),
                # nn.Dropout(0.1)
            )
            nn.init.kaiming_normal_(layer[0].weight, nonlinearity='leaky_relu')
            return layer
        self.layers = nn.Sequential(
            make_layer(input_size, hidden_sizes[0]),
            *[make_layer(hidden_sizes[i], hidden_sizes[i+1]) for i in range(len(hidden_sizes)-1)],
            nn.Linear(hidden_sizes[-1], 5),
            nn.Softmax()
        )
    
    def forward(self, x):
        return self.layers(x).reshape(-1)