## MLP Smoothing / Merging Baseline

Train a baseline MLP model for smoothing using graph-level features.

Features:
- Number of nodes
- Energy
- Barycenters

In [22]:
import torch
import torch.nn as nn
from torch_geometric.loader import DataLoader
from torch.optim.lr_scheduler import StepLR

from reco.data_utils import BaseTracksterPairs

In [12]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cpu


In [36]:
# pions do not have negative samples for now
ds = BaseTracksterPairs("data", kind="photon")

# normalize features column-wise
ds.data.x = torch.nn.functional.normalize(ds.data.x, p=torch.inf, dim=0)

In [37]:
ds.data.x[0]

tensor([0.0645, 0.0121, 0.1608, 0.0933, 0.1663, 0.2418])

In [38]:
# balance the dataset
pos = ds[ds.data.y == 1]
neg = ds[ds.data.y == 0]
len_neg = len(neg)
len_pos = len(pos)
print(f"Positive: {len_pos}, Negative {len_neg}")
shorter = min(len_neg, len_pos)
test_n = int(shorter / 10)

Positive: 7060, Negative 10784


In [39]:
train_set = pos[:shorter - test_n] + neg[:shorter - test_n]
test_set = pos[shorter - test_n:shorter] + neg[shorter - test_n:shorter]
print(f"Train samples: {len(train_set)}, Test samples: {len(test_set)}")
train_dl = DataLoader(train_set, batch_size=16, shuffle=True)
test_dl = DataLoader(test_set, batch_size=16, shuffle=True)

Train samples: 12708, Test samples: 1412


In [40]:
class BaselineMerger(torch.nn.Module):
    def __init__(self, num_inputs, num_hidden=10):
        super(BaselineMerger, self).__init__()

        self.W1 = nn.Linear(num_inputs, num_hidden)
        self.activation = nn.Sigmoid()

        self.W2 = nn.Linear(num_hidden, 1)
        self.output = nn.Sigmoid()

    def forward(self, data):
        x = self.W1(data.x)
        x = self.activation(x)
        x = self.W2(x)
        return self.output(x)


In [41]:
loss_obj = torch.nn.BCELoss()

def train(model, opt, loader):
    epoch_loss = 0
    for batch in loader:
        model.train()
        batch = batch.to(device)
        opt.zero_grad()
        z = model(batch).reshape(-1)
        loss = loss_obj(z, batch.y.type(torch.float))
        epoch_loss += loss
        loss.backward()
        opt.step()
    return float(epoch_loss)

@torch.no_grad()
def test(model, data):
    total = 0
    correct = 0
    for batch in data:
        model.eval()
        label = batch.y
        batch = batch.to(device)
        z = model(batch).reshape(-1).to("cpu")
        prediction = (z > 0.5).type(torch.int)
        total += len(prediction) 
        correct += sum(prediction == label)
    return (correct / total)

In [None]:
model = BaselineMerger(ds.data.x.shape[1], 128)
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
scheduler = StepLR(optimizer, step_size=50, gamma=0.5)
test_acc = test(model, test_dl)
print(f"Initial acc: {test_acc:.4f}")

for epoch in range(1, 201):
    loss = train(model, optimizer, train_dl)
    scheduler.step()
    if epoch % 10 == 0:
        train_acc = test(model, train_dl)
        test_acc = test(model, test_dl)
        print(f'Epoch: {epoch}, loss: {loss:.4f}, train acc: {train_acc:.4f}, test acc: {test_acc:.4f}')

In [28]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, precision_score, recall_score

pred = []
lab = []
for b in test_dl:
    pred += (model(b) > 0.5).type(torch.int).tolist()
    lab += b.y.tolist()

tn, fp, fn, tp = confusion_matrix(lab, pred).ravel()
print(f"TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}")
print(f'Accuracy: {accuracy_score(lab, pred):.4f}')
print(f'Precision: {precision_score(lab, pred):.4f}')
print(f'Recall: {recall_score(lab, pred):.4f}')

TP: 628, TN: 554, FP: 152, FN: 78
Accuracy: 0.8371
Precision: 0.8051
Recall: 0.8895
