In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler
import torch 
import torch.nn as nn

In [2]:
data = pd.read_csv('dataframe.csv')
data.columns
training_features = ['etmiss', 'mtw', 'leadleptPt', 'leadleptEta',
       'leadleptE', 'leadleptPhi', 'n_TopLRjets',
       'leadTopLRjet_pt', 'leadTopLRjet_eta', 'leadTopLRjet_phi',
       'leadTopLRjet_m', 'leadTopLRjet_Tau32', 'n_jets', 'leadjet_pt',
       'leadjet_eta', 'n_bjets', 'leadbjet_pt', 'leadbjet_eta', 'ttbarMLR']
label = "label"

In [3]:
scaler = StandardScaler()
data[training_features] = scaler.fit_transform(data[training_features])

X = scaler.transform(data[training_features])
y = data[label]
w = data['scaleweight']

x_train, x_test, y_train, y_test, w_train, w_test = train_test_split(X, y, w, test_size=0.2, random_state=42)

In [4]:
x_train_tensor = torch.tensor(x_train, dtype=torch.float32).to(torch.device("mps"))
x_test_tensor = torch.tensor(x_test, dtype=torch.float32).to(torch.device("mps"))
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1).to(torch.device("mps"))
y_test_tensor = torch.tensor(y_test.to_numpy(), dtype=torch.float32).view(-1, 1).to(torch.device("mps"))
w_train_tensor = torch.tensor(w_train, dtype=torch.float32).to(torch.device("mps"))
w_test_tensor = torch.tensor(w_test.to_numpy(), dtype=torch.float32).to(torch.device("mps"))

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, WeightedRandomSampler

mps_device = torch.device("mps:0")
torch.set_default_device(mps_device)

# Print the shape of the training data
print(x_train.shape)

# Define the neural network model
class NeuralNetwork(nn.Module):
    def __init__(self, input_dim):
        super(NeuralNetwork, self).__init__()
        self.hidden1 = nn.Linear(input_dim, 64)
        self.hidden2 = nn.Linear(64, 128)
        self.hidden3 = nn.Linear(128, 256)
        self.hidden4 = nn.Linear(256, 128)
        self.output = nn.Linear(128, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x = self.relu(self.hidden1(x))
        x = self.relu(self.hidden2(x))
        x = self.relu(self.hidden3(x))
        x = self.relu(self.hidden4(x))
        x = self.sigmoid(self.output(x))
        return x

# Create the model instance
input_dim = x_train.shape[1]
model = NeuralNetwork(input_dim).to(mps_device)

# Define the loss function and optimizer
criterion = nn.BCELoss(reduction='none')  # to handle sample weights manually
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Create DataLoader for training and validation data
train_dataset = TensorDataset(torch.tensor(x_train, dtype=torch.float32), 
                              torch.tensor(y_train, dtype=torch.float32), 
                              torch.tensor(w_train, dtype=torch.float32))
train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=False)

val_dataset = TensorDataset(torch.tensor(x_test, dtype=torch.float32), 
                            torch.tensor(y_test.to_numpy(), dtype=torch.float32), 
                            torch.tensor(w_test.to_numpy(), dtype=torch.float32))
val_loader = DataLoader(val_dataset, batch_size=1024, shuffle=False)

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    for x_batch, y_batch, w_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(x_batch).squeeze()
        loss = criterion(outputs, y_batch)
        weighted_loss = (loss * w_batch).mean()  # apply sample weights
        weighted_loss.backward()
        optimizer.step()

    # Validation loop
    model.eval()
    with torch.no_grad():
        val_loss = 0
        correct = 0
        total = 0
        for x_batch, y_batch, w_batch in val_loader:
            outputs = model(x_batch).squeeze()
            loss = criterion(outputs, y_batch)
            weighted_loss = (loss * w_batch).mean()
            val_loss += weighted_loss.item()

            predicted = (outputs > 0.5).float()
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()

        val_loss /= len(val_loader)
        val_accuracy = correct / total
        print(f'Epoch {epoch+1}/{num_epochs}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')


(201676, 19)
Epoch 1/100, Validation Loss: 0.0078, Validation Accuracy: 0.9679
Epoch 2/100, Validation Loss: 0.0075, Validation Accuracy: 0.9679
Epoch 3/100, Validation Loss: 0.0075, Validation Accuracy: 0.9679
Epoch 4/100, Validation Loss: 0.0074, Validation Accuracy: 0.9679
Epoch 5/100, Validation Loss: 0.0074, Validation Accuracy: 0.9679
Epoch 6/100, Validation Loss: 0.0074, Validation Accuracy: 0.9679
Epoch 7/100, Validation Loss: 0.0074, Validation Accuracy: 0.9679
Epoch 8/100, Validation Loss: 0.0074, Validation Accuracy: 0.9679
Epoch 9/100, Validation Loss: 0.0073, Validation Accuracy: 0.9679


KeyboardInterrupt: 