In [None]:
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import SAGEConv, knn_graph, global_mean_pool,radius_graph
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score

# Step 1: Load the data
data = np.load("./Dataset_Filted/Simulation/gamma/npz", allow_pickle=True)
hitsE = data["hitsE"]
hitsM = data["hitsM"]
labels = data["labels"]

# Assume hitsE and hitsM are lists of arrays, each [n_hits, 3] for x, y, pe
# labels is array of 0/1 for background/signal per event

# Step 2: Build graph list
graph_list = []
for hE, hM, label in zip(hitsE, hitsM, labels):
    # Combine hits from electromagnetic and muon detectors
    if len(hE) > 0 and len(hM) > 0:
        nodes = np.vstack([hE, hM])
    elif len(hE) > 0:
        nodes = hE
    elif len(hM) > 0:
        nodes = hM
    else:
        continue  # Skip empty events

    # Separate position and features
    pos = torch.tensor(nodes[:, :2], dtype=torch.float32)  # [n_nodes, 2] for x, y
    x = torch.tensor(nodes[:, 2:], dtype=torch.float32)    # [n_nodes, 1] for pe

    # Build edges using k-NN based on positions (k=5 neighbors)
    edge_index = knn_graph(pos, k=5, loop=False)  # Undirected graph
    edge_index = radius_graph(pos, r=100.0, loop=False)  # r 是距离阈值，单位与 x, y 相同
    # Create Data object for graph classification
    data_graph = Data(x=x, pos=pos, edge_index=edge_index, y=torch.tensor([label], dtype=torch.long))
    graph_list.append(data_graph)

# Step 3: Split into train and validation sets (80/20)
train_graphs, val_graphs = train_test_split(graph_list, test_size=0.2, random_state=42)

# Step 4: Create data loaders
train_loader = DataLoader(train_graphs, batch_size=32, shuffle=True)
val_loader = DataLoader(val_graphs, batch_size=32, shuffle=False)

# Step 5: Define GNN model for graph classification
class GNNClassifier(torch.nn.Module):
    def __init__(self, in_channels=1, hidden_channels=64, num_classes=2):
        super(GNNClassifier, self).__init__()
        self.conv1 = SAGEConv(in_channels, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, hidden_channels)
        self.fc = torch.nn.Linear(hidden_channels, num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = F.relu(self.conv1(x, edge_index))
        x = F.relu(self.conv2(x, edge_index))
        x = global_mean_pool(x, batch)  # Aggregate node features to graph-level
        x = self.fc(x)
        return x

# Instantiate model, optimizer, and loss
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GNNClassifier().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

# Step 6: Training loop
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for batch in train_loader:
        batch = batch.to(device)
        optimizer.zero_grad()
        out = model(batch)
        loss = criterion(out, batch.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    # Validation
    model.eval()
    val_preds = []
    val_labels = []
    with torch.no_grad():
        for batch in val_loader:
            batch = batch.to(device)
            out = model(batch)
            preds = torch.argmax(out, dim=1).cpu().numpy()
            val_preds.extend(preds)
            val_labels.extend(batch.y.cpu().numpy())
    
    acc = accuracy_score(val_labels, val_preds)
    auc = roc_auc_score(val_labels, val_preds) if len(set(val_labels)) > 1 else 0.0
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader):.4f}, Val Acc: {acc:.4f}, Val AUC: {auc:.4f}")

# Step 7: Final evaluation on validation set (after training)
print("Training complete. Final validation metrics:")
print(f"Accuracy: {acc:.4f}, AUC: {auc:.4f}")

In [1]:
# 扩展原始代码以实现节点类型标签、边权重（距离倒数）、多头注意力（GAT）和分层建图
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GATConv, global_mean_pool
from torch_geometric.nn import knn_graph,radius_graph
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score
import ana
# Step 1: Load the data
# data = np.load("./Dataset_Filted/Simulation/gamma/1e3_1e4/train_dataset_gamma_1e3_1e4_run000.npz", allow_pickle=True)
# hitsE = data["hitsE"]
# hitsM = data["hitsM"]
# parID = data["labels"]

NpzFileList=["./Dataset_Filted/Simulation/gamma/1e4_1e5/train_dataset_gamma_1e4_1e5_run000.npz",
             "./Dataset_Filted/Simulation/proton/1e4_1e5/train_dataset_proton_1e4_1e5_run000.npz",
             "./Dataset_Filted/Simulation/monopole/E1e9/train_dataset_monopole_E1e9.npz",
             ]
sample_num=(20000,20000,-1)

hitsE, hitsM, parID = ana.merge_npzdataset(NpzFileList,sample_num=sample_num)
labels = np.copy(parID)
labels[labels == 1] = 0
labels[labels == 14] = 0
labels[labels == 43] = 1


Using 20000 events from file ./Dataset_Filted/Simulation/gamma/1e4_1e5/train_dataset_gamma_1e4_1e5_run000.npz
Using 20000 events from file ./Dataset_Filted/Simulation/proton/1e4_1e5/train_dataset_proton_1e4_1e5_run000.npz
Using all events (size=21370) from file ./Dataset_Filted/Simulation/monopole/E1e9/train_dataset_monopole_E1e9.npz
Merged 61370 events from 3 files.


In [2]:

# Step 2: Build graph list with node types, edge weights, and hierarchical edges
graph_list = []
for hE, hM, label in zip(hitsE, hitsM, labels):
    # Combine hits from electromagnetic and muon detectors
    if len(hE) > 0 and len(hM) > 0:
        nodes = np.vstack([hE, hM])
        types = np.concatenate([np.zeros(len(hE)), np.ones(len(hM))])  # 0=electromagnetic, 1=muon
    elif len(hE) > 0:
        nodes = hE
        types = np.zeros(len(hE))  # Only electromagnetic
    elif len(hM) > 0:
        nodes = hM
        types = np.ones(len(hM))  # Only muon
    else:
        continue  # Skip empty events

    # Node features: [pe, type]
    x = torch.tensor(np.column_stack([nodes[:, 2], types]), dtype=torch.float32)  # [n_nodes, 2]
    pos = torch.tensor(nodes[:, :2], dtype=torch.float32)  # [n_nodes, 2] for x, y

    # Hierarchical graph construction
    # (1) Intra-type edges: k-NN within electromagnetic and muon hits
    n_e = len(hE)
    n_m = len(hM)
    edge_index_e = knn_graph(pos[:n_e], k=5, loop=False) if n_e > 0 else torch.empty((2, 0), dtype=torch.long)
    edge_index_m = knn_graph(pos[n_e:], k=5, loop=False) if n_m > 0 else torch.empty((2, 0), dtype=torch.long)
    if n_m > 0:
        edge_index_m = edge_index_m + n_e  # Offset muon node indices

    # (2) Inter-type edges: Connect each electromagnetic hit to nearest muon hit
    edge_index_inter = []
    if n_e > 0 and n_m > 0:
        for i in range(n_e):
            # Compute distances from electromagnetic hit i to all muon hits
            dists = torch.norm(pos[i:i+1] - pos[n_e:], dim=1)
            if dists.numel() > 0:
                j = torch.argmin(dists).item()  # Nearest muon hit
                edge_index_inter.append([i, n_e + j])
                edge_index_inter.append([n_e + j, i])  # Undirected
        edge_index_inter = torch.tensor(edge_index_inter, dtype=torch.long).T

    # Combine edges
    edge_index = torch.cat([edge_index_e, edge_index_m, edge_index_inter], dim=1) if edge_index_inter.numel() > 0 else torch.cat([edge_index_e, edge_index_m], dim=1)

    # Edge weights: Inverse distance
    edge_weights = []
    for i, j in edge_index.T:
        dist = torch.norm(pos[i] - pos[j]).item()
        weight = 1.0 / (dist + 1e-6)  # Avoid division by zero
        edge_weights.append(weight)
    edge_weights = torch.tensor(edge_weights, dtype=torch.float32)

    # Create Data object
    data_graph = Data(x=x, pos=pos, edge_index=edge_index, edge_attr=edge_weights, y=torch.tensor([label], dtype=torch.long))
    graph_list.append(data_graph)

# Step 3: Split into train and validation sets
train_graphs, val_graphs = train_test_split(graph_list, test_size=0.2, random_state=42)

# Step 4: Create data loaders
train_loader = DataLoader(train_graphs, batch_size=32, shuffle=True)
val_loader = DataLoader(val_graphs, batch_size=32, shuffle=False)

# Step 5: Define GNN model with GAT (multi-head attention)
class GATClassifier(torch.nn.Module):
    def __init__(self, in_channels=2, hidden_channels=64, num_classes=2, heads=4):
        super(GATClassifier, self).__init__()
        self.conv1 = GATConv(in_channels, hidden_channels, heads=heads, concat=True)
        self.conv2 = GATConv(hidden_channels * heads, hidden_channels, heads=heads, concat=True)
        self.fc = torch.nn.Linear(hidden_channels * heads, num_classes)

    def forward(self, data):
        x, edge_index, edge_attr, batch = data.x, data.edge_index, data.edge_attr, data.batch
        x = F.relu(self.conv1(x, edge_index, edge_attr))
        x = F.relu(self.conv2(x, edge_index, edge_attr))
        x = global_mean_pool(x, batch)  # Graph-level representation
        x = self.fc(x)
        return x

# Instantiate model, optimizer, and loss
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GATClassifier().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()
print("开始训练：")
# Step 6: Training loop
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for batch in train_loader:
        batch = batch.to(device)
        optimizer.zero_grad()
        out = model(batch)
        loss = criterion(out, batch.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    # Validation
    model.eval()
    val_preds = []
    val_labels = []
    with torch.no_grad():
        for batch in val_loader:
            batch = batch.to(device)
            out = model(batch)
            preds = torch.argmax(out, dim=1).cpu().numpy()
            val_preds.extend(preds)
            val_labels.extend(batch.y.cpu().numpy())
    
    acc = accuracy_score(val_labels, val_preds)
    auc = roc_auc_score(val_labels, val_preds) if len(set(val_labels)) > 1 else 0.0
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader):.4f}, Val Acc: {acc:.4f}, Val AUC: {auc:.4f}")

# Step 7: Final evaluation
print("Training complete. Final validation metrics:")
print(f"Accuracy: {acc:.4f}, AUC: {auc:.4f}")

开始训练：
Epoch 1/50, Loss: 0.6203, Val Acc: 0.7644, Val AUC: 0.6851
Epoch 2/50, Loss: 0.4454, Val Acc: 0.7699, Val AUC: 0.7949
Epoch 3/50, Loss: 0.4319, Val Acc: 0.7686, Val AUC: 0.8006
Epoch 4/50, Loss: 0.4181, Val Acc: 0.8139, Val AUC: 0.7904
Epoch 5/50, Loss: 0.4261, Val Acc: 0.7743, Val AUC: 0.7173
Epoch 6/50, Loss: 0.4425, Val Acc: 0.7592, Val AUC: 0.7840
Epoch 7/50, Loss: 0.4201, Val Acc: 0.7995, Val AUC: 0.7536
Epoch 8/50, Loss: 0.4212, Val Acc: 0.7984, Val AUC: 0.7603
Epoch 9/50, Loss: 0.4310, Val Acc: 0.7989, Val AUC: 0.7612
Epoch 10/50, Loss: 0.4271, Val Acc: 0.7959, Val AUC: 0.7956
Epoch 11/50, Loss: 0.4175, Val Acc: 0.8073, Val AUC: 0.7663
Epoch 12/50, Loss: 0.4114, Val Acc: 0.8045, Val AUC: 0.7763
Epoch 13/50, Loss: 0.4167, Val Acc: 0.8035, Val AUC: 0.8058
Epoch 14/50, Loss: 0.4240, Val Acc: 0.7944, Val AUC: 0.7427
Epoch 15/50, Loss: 0.4185, Val Acc: 0.7392, Val AUC: 0.6412
Epoch 16/50, Loss: 0.4182, Val Acc: 0.7973, Val AUC: 0.7888
Epoch 17/50, Loss: 0.4112, Val Acc: 0.7955,

In [1]:
import numpy as np
print(np.array([0,1,1]))

[0 1 1]
