Training a neural network to predict $\delta C_9$

Setup

In [1]:
from pathlib import Path

import matplotlib.pyplot as plt

import torch
from torch import nn

from library.nn_training import select_device, train_and_eval
from library.datasets import Signal_Unbinned_Dataset, Bootstrapped_Signal_Unbinned_Dataset
from library.plotting import plot_loss_curves, plot_prediction_linearity

device = select_device()

Device:  cuda


Load / Generate Datasets

In [2]:
regenerate = True

level = "gen"
save_dir = "../../state/new_physics/data/processed"
raw_signal_dir = "../../state/new_physics/data/raw/signal"
raw_trials = {"train": range(0,5), "eval": range(5,10)}
q_squared_veto = False
std_scale = True

# set datasets #
# num_events_per_set = 42_000
# num_sets_per_label = {"train": 5, "eval": 5}

# set_datasets = {
#     "train": Bootstrapped_Signal_Unbinned_Dataset(level=level, split="train", save_dir=save_dir, feature_names=["q_squared"]), 
#     "eval": Bootstrapped_Signal_Unbinned_Dataset(level=level, split="eval", save_dir=save_dir, feature_names=["q_squared"])
# }

# if regenerate:
#     set_datasets["train"].generate(raw_trials["train"], raw_signal_dir, num_events_per_set, num_sets_per_label["train"], q_squared_veto=q_squared_veto, std_scale=std_scale)
#     set_datasets["eval"].generate(raw_trials["eval"], raw_signal_dir, num_events_per_set, num_sets_per_label["eval"], q_squared_veto=q_squared_veto, std_scale=std_scale)

# set_datasets["train"].load()
# set_datasets["eval"].load()

# event datasets #
event_datasets = {
    "train": Signal_Unbinned_Dataset(level=level, split="train", save_dir=save_dir, feature_names=["q_squared"]),
    "eval": Signal_Unbinned_Dataset(level=level, split="eval", save_dir=save_dir, feature_names=["q_squared"])
}

if regenerate:
    event_datasets["train"].generate(raw_trials["train"], raw_signal_dir, q_squared_veto=q_squared_veto, std_scale=std_scale)
    event_datasets["eval"].generate(raw_trials["eval"], raw_signal_dir, q_squared_veto=q_squared_veto, std_scale=std_scale)

event_datasets["train"].load(device)
event_datasets["eval"].load(device)

opened raw file: [1/175] ..\..\state\new_physics\data\raw\signal\dc9_-0.01_1_re.pkl
opened raw file: [2/175] ..\..\state\new_physics\data\raw\signal\dc9_-0.01_2_re.pkl
opened raw file: [3/175] ..\..\state\new_physics\data\raw\signal\dc9_-0.01_3_re.pkl
opened raw file: [4/175] ..\..\state\new_physics\data\raw\signal\dc9_-0.01_4_re.pkl
opened raw file: [5/175] ..\..\state\new_physics\data\raw\signal\dc9_-0.08_1_re.pkl
opened raw file: [6/175] ..\..\state\new_physics\data\raw\signal\dc9_-0.08_2_re.pkl
opened raw file: [7/175] ..\..\state\new_physics\data\raw\signal\dc9_-0.08_3_re.pkl
opened raw file: [8/175] ..\..\state\new_physics\data\raw\signal\dc9_-0.08_4_re.pkl
opened raw file: [9/175] ..\..\state\new_physics\data\raw\signal\dc9_-0.15_1_re.pkl
opened raw file: [10/175] ..\..\state\new_physics\data\raw\signal\dc9_-0.15_2_re.pkl
opened raw file: [11/175] ..\..\state\new_physics\data\raw\signal\dc9_-0.15_3_re.pkl
opened raw file: [12/175] ..\..\state\new_physics\data\raw\signal\dc9_-0.1

In [4]:
mu = event_datasets["train"].labels.mean()
sigma = event_datasets["train"].labels.std()

event_datasets["train"].labels = (event_datasets["train"].labels - mu) / sigma
event_datasets["eval"].labels = (event_datasets["eval"].labels - mu) / sigma

Train / Load Model

Evaluate Model

In [None]:
class Set_Trained_Regressor_NN(nn.Module):
    """
    Neural network trained on sets (that works event-by-event?).
    """
    def __init__(self, save_dir, nickname):
        super().__init__()

        self.nickname = nickname
        self.save_path = Path(save_dir).joinpath(f"{nickname}.pt")

        self.lin = nn.Sequential(
            nn.Linear(1, 32),
            nn.ReLU(),
            nn.Linear(32, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
        )
        
    def forward(self, x):
        z = self.lin(x)
        return z

retrain = True
save_dir = "../../state/new_physics/models"
nickname = "test1"
model = Set_Trained_Regressor_NN(save_dir, nickname)

if retrain:
    
    learning_rate = 1e-3
    epochs = 20
    train_batch_size = 24_000
    eval_batch_size = 24_000
    loss_fn = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    loss_table = train_and_eval(
        model, 
        event_datasets["train"], event_datasets["eval"], 
        loss_fn, 
        optimizer, 
        epochs, 
        train_batch_size, eval_batch_size, 
        device, 
        move_data=True)
    
    torch.save(model.state_dict(), model.save_path)

    _, ax = plt.subplots()
    skip_first = 2
    plot_loss_curves(loss_table["epoch"][skip_first:], loss_table["train_loss"][skip_first:], loss_table["eval_loss"][skip_first:], ax)
    plt.show()

else:
    model.load_state_dict(torch.load(model.save_path, weights_only=True))
    model.to(device)

In [5]:
split = "eval"

feat = event_datasets[split].features.to(device)
labels = event_datasets[split].labels.to(device)

model.eval()
with torch.no_grad():
    yhat = model(feat)

unique_labels = labels.unique()
separated_yhats = [yhat[torch.where(labels==label)] for label in unique_labels]
avgs = [i.mean() for i in separated_yhats]
stds = [i.std() for i in separated_yhats]

unique_labels = [i.cpu().numpy() for i in unique_labels]
avgs = [i.cpu().numpy() for i in avgs]
stds = [i.cpu().numpy() for i in stds]

In [None]:
_, ax = plt.subplots()

plot_prediction_linearity(
    ax,
    unique_labels, avgs, stds,
    ref_line_buffer=0.05,
    # xlim=(-2.25, 1.35),
    # ylim=(-2.25, 1.35),
    xlabel=r"Actual $\delta C_9$", 
    ylabel=r"Predicted $\delta C_9$"
)


In [None]:
plt.scatter(feat[::100].cpu(), labels[::100].cpu(), alpha=0.01)
plt.scatter(feat[::100].cpu(), yhat[::100].cpu(), alpha=0.01)

In [None]:
from library.datasets import Test_Linear_Dataset

test_linear_dataset = Test_Linear_Dataset()

class Test_Regressor_NN(nn.Module):
    """
    Neural network trained on sets (that works event-by-event?).
    """
    def __init__(self, save_dir, nickname):
        super().__init__()

        self.nickname = nickname
        self.save_path = Path(save_dir).joinpath(f"{nickname}.pt")

        self.lin = nn.Sequential(
            nn.Linear(1, 1),
        )
        
    def forward(self, x):
        z = self.lin(x)
        return z

retrain = True
save_dir = "../../state/new_physics/models"
nickname = "test_testlin_1"
model = Test_Regressor_NN(save_dir, nickname)

if retrain:
    
    learning_rate = 3e-2
    epochs = 75
    train_batch_size = 16
    eval_batch_size = 16
    loss_fn = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    loss_table = train_and_eval(
        model, 
        test_linear_dataset, test_linear_dataset, 
        loss_fn, 
        optimizer, 
        epochs, 
        train_batch_size, eval_batch_size, 
        device, 
        move_data=True)
    
    torch.save(model.state_dict(), model.save_path)

    _, ax = plt.subplots()
    skip_first = 2
    plot_loss_curves(loss_table["epoch"][skip_first:], loss_table["train_loss"][skip_first:], loss_table["eval_loss"][skip_first:], ax)
    plt.show()

else:
    model.load_state_dict(torch.load(model.save_path, weights_only=True))
    model.to(device)


model.eval()
with torch.no_grad():
    yhat = model(test_linear_dataset.features.to(device))

avgs = yhat.mean(dim=1)
stds = yhat.std(dim=1)

_, ax = plt.subplots()

plot_prediction_linearity(
    ax,
    test_linear_dataset.labels.squeeze().cpu().numpy(), avgs.cpu().numpy(), stds.cpu().numpy(),
    ref_line_buffer=0.05,
    # xlim=(-2.25, 1.35),
    # ylim=(-2.25, 1.35),
    xlabel=r"Actual $\delta C_9$", 
    ylabel=r"Predicted $\delta C_9$"
)