# Medic Network Notebook

In this notebook we will use the medic network declared in the utility file medica.py, for training and testing on our generated dataset. The dataset has been created through the Data_create.ipynb notebook.

In [None]:
# MEDIC NETWORK_v2
# Neural network for collider data: track, tower, missinget -> 4-class probability distribution

import awkward as ak
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
import shutil # To rename directories
from medica import *

## Loading the data for training/testing

In [None]:
# Params
window = 10
json_path = f"Data/training_data_w_{window}.json"
batch_size = 64
lr = 1e-3
epochs = 300
patience = 20

# Load dataset & split
data = read_json_to_awkward(json_path)

# converting awkward array to torch Dataset
dataset = ColliderDataset(data)

# Printing the dataset information
print("Total events in dataset:", len(dataset))
# Peek at one example
track, tower, met, y = dataset[0]
print("Track features:", track.shape[2])
track_features = track.shape[2]
print("Tower fetures:", tower.shape[2])
tower_features = tower.shape[2]
print("Missing ET features:", met.shape[2])
met_features = met.shape[2]

In [None]:
# Splitting the dataset into train, validation, and test sets
n_total = len(dataset)
n_train_val = int(0.8 * n_total)
n_test = n_total - n_train_val 
train_val_set, test_set = random_split(dataset, [n_train_val, n_test])
train_loader = DataLoader(train_val_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_set, batch_size=batch_size)

In [None]:
# Setup model/optimizer/loss/device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
embed_dim = 128
k_fold = 5
criterion = KLDPerSample() # Using the custom KL Divergence loss per sample



# k_fold cross-validation Training
cross_validate_model(
    dataset=train_loader,
    k=k_fold,
    batch_size=batch_size,
    model_class=MEDIC,
    model_kwargs={"d_track": track_features, "d_tower": tower_features, "d_met": met_features, "embed_dim": embed_dim},
    optimizer_class=torch.optim.Adam,
    optimizer_kwargs={"lr": lr},
    criterion=criterion,
    device=device,
    epochs=epochs,
    patience=patience,
)



# Test
metrics_summary = test_model(
    model_class=MEDIC,
    model_kwargs={"d_track": track_features, "d_tower": tower_features, "d_met": met_features, "embed_dim": embed_dim},
    test_loader=test_loader,
    device=device,
    k=k_fold  # same number of folds used in training
)
print(pd.DataFrame([metrics_summary]))

# Rename Analytics directory to another name to save the results
shutil.move("Analytics", f"Analytics_w_{window}") 
print(f"\nAll evaluation plots and metrics are now moved under Analytics_w_{window}.\n")