In [1]:
import numpy as np
import matplotlib.pyplot as plt


import uproot
import awkward as ak

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from sklearn.model_selection import train_test_split

In [2]:
# Function to calculate weights analytically

def weight_fn(xx1, xx2, xx3, phi, costh):
    weight = 1. + xx1 * costh * costh + 2. * xx2 * costh * np.sqrt(1. - costh * costh) * np.cos(phi) + 0.5 * xx3 * (1. - costh * costh)* np.cos(2. * phi)
    return weight / (1. + costh * costh)

In [3]:
# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Extracting Drell-Yan Angular Coefficients using Neural Network-based Classifiers with E906 LH2 Data

In this notebook, we use deep neural network-based classifiers to extract the DY angular coefficients. As the first step, let's define our classifier.

In [4]:
class BMFClassifier(nn.Module):
    def __init__(self, input_dim: int = 8, output_dim: int = 1, hidden_dim: int = 32):
        super(BMFClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim, bias=True)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim, bias=True)
        self.bn2 = nn.BatchNorm1d(hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, hidden_dim, bias=True)
        self.bn3 = nn.BatchNorm1d(hidden_dim)
        self.fc4 = nn.Linear(hidden_dim, output_dim, bias=True)
        self.bn4 = nn.BatchNorm1d(output_dim)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.bn1(self.fc1(x))
        x = self.relu(x)
        x = self.bn2(self.fc2(x))
        x = self.relu(x)
        x = self.bn3(self.fc3(x))
        x = self.relu(x)
        x = self.bn4(self.fc4(x))
        x = self.sigmoid(x)
        return x

In [5]:
# Module used to add parameter for fitting
class AddParams2Input(nn.Module):
    def __init__(self, params):
        super(AddParams2Input, self).__init__()
        self.params = nn.Parameter(torch.Tensor(params), requires_grad=True)

    def forward(self, inputs):
        batch_params = torch.ones((inputs.size(0), 1), device=inputs.device) * self.params.to(device=inputs.device)
        concatenated = torch.cat([inputs, batch_params], dim=-1)
        return concatenated

In [6]:
# Custom loss function
class BMFLoss(nn.Module):
    def __init__(self):
        super(BMFLoss, self).__init__()
        
    def forward(self, outputs, targets, weights):
        criterion = nn.BCELoss(reduction="none")
        loss = criterion(outputs, targets)
        weighted_loss = loss* weights
        return weighted_loss.mean()

We extract the angular coefficients in two steps:

## Step 1: Parameterize the neural network with angular coefficients

In this step, we parameterize the neural network with $\lambda$, $\mu$, $\nu$ values. This is done during the training step. The input features to the neural network are `mass`, `pT`, `xF`, `phi`, `costh`, `lambda`, `mu`, and `nu`.

In [7]:
# Train step
def train_model(model, train_loader, test_loader, criterion, optimizer, device, epochs, early_stopping_patience):
    best_loss = float('inf')
    best_model_weights = None
    patience_counter = 0

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for batch_inputs, batch_labels, batch_weights in train_loader:
            batch_inputs = batch_inputs.to(device)
            batch_labels = batch_labels.to(device)
            batch_weights = batch_weights.to(device)

            optimizer.zero_grad()
            outputs = model(batch_inputs)
            loss = criterion(outputs, batch_labels, batch_weights)

            loss.backward()
            optimizer.step()
            running_loss += loss.item() * batch_inputs.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)

        # Evaluation
        model.eval()
        with torch.no_grad():
            running_loss = 0.0
            for batch_inputs, batch_labels, batch_weights in test_loader:
                batch_inputs = batch_inputs.to(device)
                batch_labels = batch_labels.to(device)
                batch_weights = batch_weights.to(device)

                outputs = model(batch_inputs)
                loss = criterion(outputs, batch_labels, batch_weights)

                running_loss += loss.item() * batch_inputs.size(0)

            validation_loss = running_loss / len(test_loader.dataset)

            print("Epoch {}: Train Loss = {:.4f}, Test Loss = {:.4f}".format(epoch + 1, epoch_loss, validation_loss))

            # Check for early stopping
            if validation_loss < best_loss:
                best_loss = validation_loss
                best_model_weights = model.state_dict()
                patience_counter = 0
            else:
                patience_counter += 1

            if patience_counter >= early_stopping_patience:
                print("Early stopping at epoch {}".format(epoch))
                break

    return best_model_weights

In [8]:
def reweight_fn(model, X_val):
    # Move the model to CPU for evaluation
    model = model.to(torch.device("cpu"))

    model.eval()
    with torch.no_grad():
        preds = model(torch.Tensor(X_val)).detach().numpy().ravel()
        weights = preds / (1.0 - preds)
    return weights

Let's train the neural network with messy MC data.

## Step 2: Extract the parameters using the gradient descent algorithm

Since we have parameterized the neural network in step 1, we can fix the trained weights in the neural network and extract the angular coefficients by minimizing the loss with the gradient descent algorithm.

In [9]:
# Fit the model
def fit_fn(epochs, add_params_layer, fit_model, data_loader, device, optimizer, loss_fn):
    losses = []
    fit_vals = {
        "lambda": [],
        "mu": [],
        "nu": []
    }

    for epoch in range(epochs):
        add_params_layer.train()
        running_loss = 0.0
        for batch_inputs, batch_labels, batch_weights in data_loader:
            batch_inputs = batch_inputs.to(device)
            batch_labels = batch_labels.to(device)
            batch_weights = batch_weights.to(device)

            # Forward pass
            optimizer.zero_grad()
            param_input = add_params_layer(batch_inputs)
            output = fit_model(param_input)

            # Compute the loss
            loss = loss_fn(output, batch_labels, batch_weights)

            # Backward pass and update weights
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * batch_inputs.size(0)

        epoch_loss = running_loss / len(data_loader.dataset)
        print("epoch : {}, loss = {:.4f}, lambda = {:.4f}, mu = {:.4f}, nu = {:.4f}".format(epoch + 1, epoch_loss,
                                                                                            add_params_layer.params[0].item(),
                                                                                            add_params_layer.params[1].item(),
                                                                                            add_params_layer.params[2].item()))
        losses.append(epoch_loss)
        fit_vals["lambda"].append(add_params_layer.params[0].item())
        fit_vals["mu"].append(add_params_layer.params[1].item())
        fit_vals["nu"].append(add_params_layer.params[2].item())

    return losses, fit_vals

In [23]:
# Define number of epochs
epochs = 5

# Define early stopping patience
early_stopping_patience = 20

# Define number of iterations
iterations = 5

In [25]:
lambda_fit, mu_fit, nu_fit = [], [], []

for i in range(iterations):
    print("*** Iteration {} ***".format(i+1))

    # Load E906 messy MC data
    
    batch_size = 1024
    n_MC_events = 10**6
    
    lambda0, mu0, nu0 = 1.0, 0.0, 0.0

    # Sample lambda, mu, nu values in the range (0.5, 1.5), (-0.5, 0.5), (-0.5, 0.5)
    lambda_vals = np.random.uniform(0.5, 1.5, n_MC_events)
    mu_vals = np.random.uniform(-0.5, 0.5, n_MC_events)
    nu_vals = np.random.uniform(-0.5, 0.5, n_MC_events)
    
    tree = uproot.open("BMFData.root:save")
    events = tree.arrays(["mass", "pT", "xF", "phi", "costh", "true_phi", "true_costh"]).to_numpy()[:2*n_MC_events]
    
    data_array = np.array([list(record) for record in events])
    
    events0, events1 = train_test_split(data_array, test_size=0.5, shuffle=True)
    
    X0 = [(phi, costh, lambda1, mu1, nu1) for phi, costh, lambda1, mu1, nu1 in zip(events0[:, 3], events0[:, 4], lambda_vals, mu_vals, nu_vals)]
    X1 = [(phi, costh, lambda1, mu1, nu1) for phi, costh, lambda1, mu1, nu1 in zip(events1[:, 3], events1[:, 4], lambda_vals, mu_vals, nu_vals)]
    
    weight0 = [weight_fn(lambda0, mu0, nu0, phi, costh) for phi, costh in zip(events0[:, 5], events0[:, 6])]
    weight1 = [weight_fn(lambda1, mu1, nu1, phi, costh) for lambda1, mu1, nu1, phi, costh in zip(lambda_vals, mu_vals, nu_vals, events1[:, 5], events1[:, 6])]
    
    Y0 = np.zeros(n_MC_events)
    Y1 = np.ones(n_MC_events)
    
    X = np.concatenate((X0, X1))
    Y = np.concatenate((Y0, Y1)).reshape(-1, 1)
    weight = np.concatenate((weight0, weight1)).reshape(-1, 1)
    
    
    # Convert to pytorch tensor
    X_tensor = torch.from_numpy(X).float()
    Y_tensor = torch.from_numpy(Y).float()
    W_tensor = torch.from_numpy(weight).float()
    
    # Train test split
    X_train, X_test, Y_train, Y_test, W_train, W_test = train_test_split(X_tensor, Y_tensor, W_tensor, test_size=0.4, shuffle=True)
    
    # Create dataset and data loader
    train_dataset = TensorDataset(X_train, Y_train, W_train)
    test_dataset = TensorDataset(X_test, Y_test, W_test)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    fit_model = BMFClassifier(input_dim=5, hidden_dim=32)
    
    # Model summary
    print("using device : {}".format(device))
    fit_trainable_params = sum(p.numel() for p in fit_model.parameters() if p.requires_grad)
    print(fit_model)
    print("total trainable params in fit model: {}".format(fit_trainable_params))

    # Define the loss function and optimizer
    criterion = BMFLoss()
    optimizer = optim.Adam(fit_model.parameters(), lr=0.001)
    
    # Move the model to GPU if available
    fit_model = fit_model.to(device=device)

    # Compile the train function
    opt_train = torch.compile(train_model, mode="max-autotune")
    
    best_model_weights = opt_train(fit_model, train_loader, test_loader, criterion, optimizer, device, epochs, early_stopping_patience)

    # Load the best model weights
    fit_model.load_state_dict(best_model_weights)

    # Define the parameters
    mu_fit_init = [np.random.uniform(0.5, 1.5, 1)[0], np.random.uniform(-0.5, 0.5, 1)[0], np.random.uniform(-0.5, 0.5, 1)[0]]

    # Create the AddParams2Input layer
    add_params_layer = AddParams2Input(mu_fit_init)

    # Set all weights in fit model to non-trainable
    for param in fit_model.parameters():
        param.requires_grad = False

    # Load real data
    tree1 = uproot.open("LH2Data.root:tree")
    
    data1_1 = tree1.arrays(["mass", "pT", "xF", "phi", "costh", "weight"])
    
    # Number of E906 events
    n_E906_events = data1_1.mass.to_numpy().shape[0]
    
    # Create validation data set
    data0_1, data0_2 = train_test_split(events0, test_size = n_E906_events/n_MC_events, shuffle=True)
    
    X0_val = np.array([(phi, costh) for phi, costh in zip(data0_2[:, 3], data0_2[:, 4])])
    
    X1_val = np.array([(phi, costh) for phi, costh in zip(data1_1.phi, data1_1.costh)])
    
    Y0_val = np.zeros(n_E906_events)
    Y1_val = np.ones(n_E906_events)
    
    weight0_val = [(weight_fn(lambda0, mu0, nu0, phi, costh)) for phi, costh in zip(data0_2[:, 5], data0_2[:, 6])]
    weight1_val = data1_1.weight.to_numpy()
    
    X = np.concatenate((X0_val, X1_val))
    Y = np.concatenate((Y0_val, Y1_val)).reshape(-1, 1)
    weights = np.concatenate((weight0_val, weight1_val)).reshape(-1, 1)
    
    # Define batch size
    batch_size = 1024
    
    # Create PyTorch datasets and dataloaders
    dataset = TensorDataset(torch.Tensor(X), torch.Tensor(Y).float(), torch.Tensor(weights))
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    # Define the loss function and optimizer
    loss_fn = BMFLoss()
    optimizer = torch.optim.Adam(add_params_layer.parameters(), lr=0.001)

    # Transfer models to GPU
    add_params_layer = add_params_layer.to(device)
    fit_model = fit_model.to(device)

    # Model summary
    print("using device : {}".format(device))
    fit_trainable_params = sum(p.numel() for p in fit_model.parameters() if p.requires_grad)
    print(fit_model)
    print("total trainable params in fit model: {}".format(fit_trainable_params))

    total_trainable_params = sum(p.numel() for p in add_params_layer.parameters() if p.requires_grad)
    print(add_params_layer)
    print("total trainable params in fit model: {}".format(total_trainable_params))

    # Fit vals
    epochs = 5

    losses, fit_vals = fit_fn(epochs, add_params_layer, fit_model, data_loader, device, optimizer, loss_fn)
    
    
    lambda_fit.append(fit_vals["lambda"][-1])
    mu_fit.append(fit_vals["mu"][-1])
    nu_fit.append(fit_vals["nu"][-1])

*** Iteration 1 ***
using device : cpu
BMFClassifier(
  (fc1): Linear(in_features=5, out_features=32, bias=True)
  (bn1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=32, out_features=32, bias=True)
  (bn2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc3): Linear(in_features=32, out_features=32, bias=True)
  (bn3): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc4): Linear(in_features=32, out_features=1, bias=True)
  (bn4): BatchNorm1d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
  (sigmoid): Sigmoid()
)
total trainable params in fit model: 2531
Epoch 1: Train Loss = 0.7223, Test Loss = 0.6953
Epoch 2: Train Loss = 0.6927, Test Loss = 0.6925
Epoch 3: Train Loss = 0.6924, Test Loss = 0.6925
Epoch 4: Train Loss = 0.6923, Test Loss = 0.6924
Epoch 5: Train Loss = 0.6923, Test Loss = 0.6924
using device : cpu
BMFCl

In [26]:
print("lambda fit = {:.4f} +/- {:.4f}".format(np.mean(lambda_fit), np.std(lambda_fit)))
print("mu fit = {:.4f} +/- {:.4f}".format(np.mean(mu_fit), np.std(mu_fit)))
print("nu fit = {:.4f} +/- {:.4f}".format(np.mean(nu_fit), np.std(nu_fit)))

lambda fit = 0.9083 +/- 0.2882
mu fit = 0.0309 +/- 0.2403
nu fit = -0.0318 +/- 0.1391
