Setup

In [1]:
from itertools import product 
from pathlib import Path

import matplotlib.pyplot as plt
import torch

from helpers import models, data, plot

In [2]:
splits = ["train", "val"]
levels = [
    "gen", 
    "det", 
    "det_bkg"
]
q_sq_veto = "resonances"

dset_name = "images"

def make_model_name(level, num_signal): return f"cnn_{level}_{num_signal}"

dc9_new_phys = -0.82

bins_per_dim = 50
num_signal_per_set = [8_000, 16_000, 32_000]
num_sets_per_label = {8_000 : 400, 16_000: 200, 32_000 : 100} 
num_sets_sensitivity = 2_000
bkg_signal_ratio = 0.96
charge_bkg_fraction = 0.33

device = models.select_device()
loss_fn = torch.nn.MSELoss()
lr = 1e-3
lr_reduce_factor = 0.2
lr_reduce_patience = 5
batch_sizes = {8_000 : 128, 16_000 : 64, 32_000 : 32}
epochs = 50
epochs_checkpoint = 1

Device:  cuda


Save standard scaling constants

In [None]:
split = "train"

for level, num_signal in product(levels, num_signal_per_set):

        images_features, images_labels = data.make_images(
            level,
            split,
            q_sq_veto,
            num_signal,
            num_sets_per_label[num_signal],
            bins_per_dim,
            bkg_signal_ratio=bkg_signal_ratio,
            charge_bkg_fraction=charge_bkg_fraction
        )

        std_scale_mean = torch.mean(images_features)
        std_scale_std = torch.std(images_features)

        data.save_dset_file(std_scale_mean, dset_name, level, split, "mean", num_signal_per_set=num_signal)
        data.save_dset_file(std_scale_std, dset_name, level, split, "std", num_signal_per_set=num_signal)

        print(f"finished: {level}, {num_signal}")

Dataset creation

In [None]:
for level, num_signal, split in product(levels, num_signal_per_set, splits): 

    images_features, images_labels = data.make_images(
        level,
        split,
        q_sq_veto,
        num_signal,
        num_sets_per_label[num_signal],
        bins_per_dim,
        bkg_signal_ratio=bkg_signal_ratio,
        charge_bkg_fraction=charge_bkg_fraction
    )

    images_features = data.apply_std_scale(images_features, dset_name, level, num_signal_per_set=num_signal)

    data.save_dset_file(images_features, dset_name, level, split, "features", num_signal_per_set=num_signal)
    data.save_dset_file(images_labels, dset_name, level, split, "labels", num_signal_per_set=num_signal)

    print(f"finished: {level}, {num_signal}, {split}")

Sensitivity datasets

In [None]:
split = "val"

for level, num_signal in product(levels, num_signal_per_set):

    images_features, images_labels = data.make_images(
        level,
        split,
        q_sq_veto,
        num_signal,
        num_sets_sensitivity,
        bins_per_dim,
        label_subset=[dc9_new_phys],
        bkg_signal_ratio=bkg_signal_ratio,
        charge_bkg_fraction=charge_bkg_fraction
    )

    images_features = data.apply_std_scale(images_features, dset_name, level, num_signal_per_set=num_signal)

    data.save_dset_file(images_features, dset_name, level, split, "sens_features", num_signal_per_set=num_signal)
    data.save_dset_file(images_labels, dset_name, level, split, "sens_labels", num_signal_per_set=num_signal)

    print(f"finished: {level}, {num_signal}")


Train models

In [None]:
for level, num_signal in product(levels, num_signal_per_set):

    model = models.CNN_Model()

    model_name = make_model_name(level, num_signal)

    dataset_train = data.Dataset(dset_name, level, "train", num_signal_per_set=num_signal)
    dataset_val = data.Dataset(dset_name, level, "val", num_signal_per_set=num_signal)
    
    models.train(
        model,
        model_name,
        loss_fn,
        dataset_train,
        dataset_val,
        device,
        lr,
        lr_reduce_factor,
        lr_reduce_patience,
        batch_sizes[num_signal],
        batch_sizes[num_signal],
        epochs,
        epochs_checkpoint
    )

    print(f"finished: {level}, {num_signal}")


Evaluate models

Linearity and error

In [None]:
for level, num_signal in product(levels, num_signal_per_set):

    model_name = make_model_name(level, num_signal)
    model = models.CNN_Model()
    model.load_state_dict(models.open_model_state_dict(model_name))
    model.to(device)
    model.eval()
    
    dataset_val = data.Dataset(dset_name, level, "val", num_signal_per_set=num_signal)
    
    preds = models.predict_values_set_model(model, dataset_val.features, device)

    results_lin = models.run_linearity_test(preds, dataset_val.labels)
    results_err = models.run_error_test(preds, dataset_val.labels)

    models.save_test_result(results_lin, "lin", num_signal, model_name)
    models.save_test_result(results_err, "err", num_signal, model_name)

    print(f"finished: {level}, {num_signal}")


finished: gen, 8000
finished: gen, 16000
finished: gen, 32000
finished: det, 8000
finished: det, 16000
finished: det, 32000
finished: det_bkg, 8000
finished: det_bkg, 16000
finished: det_bkg, 32000


Sensitivity

In [None]:
for level, num_signal in product(levels, num_signal_per_set):

    model_name = make_model_name(level, num_signal)
    model = models.CNN_Model()
    model.load_state_dict(models.open_model_state_dict(model_name))

    dataset_val_sens = data.Dataset(dset_name, level, "val", num_signal_per_set=num_signal, sensitivity=True)

    preds = models.predict_values_set_model(model, dataset_val_sens.features, device)

    results_sens = models.run_sensitivity_test(preds, dataset_val_sens.labels)

    model_name = make_model_name(level, num_signal)
    models.save_test_result(results_sens, "sens", num_signal, model_name)

    print(f"finished: {level}, {num_signal}")

Plot results

Linearity

In [6]:
models.open_test_result("lin", 8_000, make_model_name("gen", 8_000))

(array([-2.  , -1.93, -1.85, -1.78, -1.7 , -1.63, -1.56, -1.48, -1.41,
        -1.34, -1.26, -1.19, -1.11, -1.04, -0.97, -0.89, -0.82, -0.75,
        -0.67, -0.6 , -0.52, -0.45, -0.38, -0.3 , -0.23, -0.15, -0.08,
        -0.01,  0.  ,  0.07,  0.14,  0.21,  0.29,  0.36,  0.44,  0.51,
         0.58,  0.66,  0.73,  0.8 ,  0.88,  0.95,  1.03,  1.1 ],
       dtype=float32),
 array([-1.933898  , -1.8918524 , -1.8122401 , -1.7772907 , -1.6918339 ,
        -1.6244377 , -1.5495359 , -1.4659733 , -1.3822739 , -1.3117911 ,
        -1.2458181 , -1.1511545 , -1.0558013 , -1.0210567 , -0.9179155 ,
        -0.83064497, -0.70667595, -0.6824466 , -0.5812459 , -0.5737249 ,
        -0.42535818, -0.3168572 , -0.2827535 , -0.1702128 , -0.14336853,
        -0.14706984,  0.06285138,  0.12070738,  0.06831265,  0.1452591 ,
         0.16342606,  0.2523075 ,  0.2313979 ,  0.26716474,  0.3497586 ,
         0.4083904 ,  0.4396983 ,  0.48184916,  0.48248875,  0.5568276 ,
         0.56465137,  0.5491888 ,  0.6366568

In [4]:
fig, axs = plt.subplots(3, 3, sharex=True, sharey=True, layout="compressed")

fancy_level_names = {
    "gen": "Generator", 
    "det" : "Detector", 
    "det_bkg" : "Detector and Bkg."
}

for (level, num_signal), ax in zip(product(levels, num_signal_per_set), axs.flat):
    
    model_name = make_model_name(level, num_signal)

    result = models.open_test_result("lin", num_signal, model_name)

    plot.plot_linearity(result, ax=ax)

    ax.set_title(
        f"Level: {fancy_level_names[level]}"
        f"\nEvents/set: {num_signal}"
        "\n" + r"Sets/$\delta C_9$: " + f"{num_sets_per_label[num_signal]}", 
        loc="left"
    )

axs.flat[0].legend()
fig.suptitle(f"CNN\n", x=0.02, horizontalalignment="left")
fig.supxlabel(r"Actual $\delta C_9$", fontsize=11, x=0.56, y=-0.06)
fig.supylabel(r"Predicted $\delta C_9$", fontsize=11, y=0.45)

plt.savefig(Path("plots").joinpath("cnn_grid_lin.png"), bbox_inches="tight")
plt.close()