In [1]:
import numpy as np
import pandas as pd
import awkward as ak
import hist
from hist import Hist
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch import optim
import matplotlib.pyplot as plt
import topcoffea.modules.utils as utils
import pickle
import gzip
import logging
import time

In [2]:
class WeightedDataset(Dataset):
    def __init__(self, data, weights, targets):
        self.data = data
        self.weights = weights
        self.targets = targets

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        weights = self.weights[idx]
        target = self.targets[idx]
        return sample, weights, target

In [3]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.main_module= nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.LeakyReLU(),
            # nn.Linear(128,128),
            # nn.LeakyReLU(),
            nn.Linear(64, 64),
            nn.LeakyReLU(),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.main_module(x)

In [4]:
fSMEFT = "/afs/crc.nd.edu/user/h/hnelson2/dctr/analysis/dctr_SMEFT.pkl.gz"
fpowheg = "/afs/crc.nd.edu/user/h/hnelson2/dctr/analysis/dctr_powheg_skimmed.pkl.gz"

inputs_smeft = pickle.load(gzip.open(fSMEFT)).get()
inputs_powheg = pickle.load(gzip.open(fpowheg))

assert inputs_smeft.shape == inputs_powheg.shape, f"SMEFT and Powheg inputs are not the same shape.\n SMEFT shape: {inputs_smeft.shape} \n Powheg shape:{inputs_powheg.shape}"

In [5]:
rando = 1234

smeft_train = inputs_smeft.sample(frac=0.7, random_state=rando)
smeft_test = inputs_smeft.drop(smeft_train.index)
powheg_train = inputs_powheg.sample(frac=0.7, random_state=rando)
powheg_test = inputs_powheg.drop(powheg_train.index)
truth_smeft = np.ones_like(smeft_train['weights'])
truth_powheg = np.zeros_like(powheg_train['weights'])

weights_smeft = np.ones_like(smeft_train['weights'])
weights_powheg = np.ones_like(powheg_train['weights'])

In [6]:
z = torch.from_numpy(np.concatenate([smeft_train, powheg_train], axis=0).astype(np.float32))
w = torch.from_numpy(np.concatenate([weights_smeft, weights_powheg], axis=0).astype(np.float32))
y = torch.from_numpy(np.concatenate([truth_smeft, truth_powheg], axis=0).astype(np.float32))

In [7]:
test_z = torch.from_numpy(np.concatenate([smeft_test, powheg_test], axis=0).astype(np.float32))
test_y = torch.from_numpy(np.concatenate([np.ones_like(smeft_test['weights']), np.zeros_like(powheg_test['weights'])], axis=0).astype(np.float32))

In [8]:
train_dataset = WeightedDataset(z, w, y)
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)

In [9]:
input_dim = z.shape[1]
model = NeuralNetwork(input_dim)

loss_fn = nn.BCELoss(reduction='mean')
optimizer = optim.Adam(model.parameters(), lr=1e-3)

trainLoss = []
testLoss = []

In [None]:
nepochs = 10

trainLoss = []
trainLossEnd = []
testLoss = []

for epoch in range(nepochs):
    epoch_loss = 0.0
    model.train()   # sets the model in training mode. Crucial for layers that behave differently during training vs evaluation (e.g. dropout, mean, variance)
    for batch_samples, batch_weights, batch_targets in train_dataloader:
        optimizer.zero_grad()                       # clear the gradients from the previous batch
        # forward pass
        outputs = model(batch_samples).squeeze(1)   # perform the forward pass to get the model's predictions
        loss = loss_fn(outputs, batch_targets)     # calculate the loss
        # backward pass
        loss.backward()                             # calculate the gradients of the loss w.r.t. the model's parameters
        optimizer.step()                            # update the model's parameters using the calculated gradients

        epoch_loss += loss.item()

    trainLoss_epoch = epoch_loss / len(train_dataloader)
    trainLossEnd_epoch = loss_fn(model(z).squeeze(1), y)
    testLoss_epoch = loss_fn(model(test_z).squeeze(1), test_y)

    trainLossEnd.append(trainLossEnd_epoch)
    trainLoss.append(trainLoss_epoch)
    testLoss.append(testLoss_epoch)

    print(f"Epoch {epoch+1}/{nepochs}")
    print(f"    TrainLoss: {trainLoss_epoch}")
    print(f"    TrainLossEnd: {trainLossEnd_epoch}")
    print(f"    TestLoss: {testLoss_epoch}")
    print("--------------------")

Epoch 1/10
    TrainLoss: 0.0008219547967386761
    TrainLossEnd: 0.00018521766469348222
    TestLoss: 9.620402124710381e-05
--------------------
Epoch 2/10
    TrainLoss: 0.00010819627537211628
    TrainLossEnd: 0.0
    TestLoss: 0.0
--------------------
Epoch 3/10
    TrainLoss: 0.0
    TrainLossEnd: 0.0
    TestLoss: 0.0
--------------------
Epoch 4/10
    TrainLoss: 0.0
    TrainLossEnd: 0.0
    TestLoss: 0.0
--------------------
Epoch 5/10
    TrainLoss: 0.0
    TrainLossEnd: 0.0
    TestLoss: 0.0
--------------------
Epoch 6/10
    TrainLoss: 0.0
    TrainLossEnd: 0.0
    TestLoss: 0.0
--------------------
Epoch 7/10
    TrainLoss: 0.0
    TrainLossEnd: 0.0
    TestLoss: 0.0
--------------------
Epoch 8/10
    TrainLoss: 0.0
    TrainLossEnd: 0.0
    TestLoss: 0.0
--------------------
Epoch 9/10
    TrainLoss: 0.0
    TrainLossEnd: 0.0
    TestLoss: 0.0
--------------------
