In [1]:
import os.path as osp

import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, ChebConv  # noqa
from torch_geometric.nn import global_max_pool
from torch_geometric.data import Batch, Data

import numpy as np
import h5py

import time


# H5

In [2]:
h5_data = h5py.File("../preprocessing/merger_test.h5", 'r')

print(list(h5_data.keys()))

event_data = h5_data['event_data']
labels = h5_data['labels']

print(event_data.shape, labels.shape)

['angles', 'energies', 'event_data', 'event_ids', 'labels', 'positions', 'root_files']
(2937, 15808, 2) (2937,)


In [3]:
batch_size = 32
max_event = event_data.shape[0]
nodes = event_data.shape[1]

In [4]:
edge_index = torch.zeros([nodes, nodes], dtype=torch.int64)

import pickle
with open("../visualization/edges_dict.pkl", 'rb') as f:
    edges = pickle.load(f)
    
    for k,vs in edges.items():
        for v in vs:
            edge_index[k,v] = 1
            
edge_index=edge_index.to_sparse()._indices()

In [5]:
datas = []

for i in np.random.randint(0, max_event, batch_size): 
    x = torch.from_numpy(event_data[i])
    
    y = torch.tensor([labels[i]], dtype=torch.int64)
    
    data = Data(x=x, y=y, edge_index=edge_index)
    datas.append(data)

batch = Batch.from_data_list(datas)

print(batch.edge_index.shape)
print(batch.x.to_sparse())

torch.Size([2, 2129664])
tensor(indices=tensor([[    13,     13,     20,  ..., 505798, 505820, 505820],
                       [     0,      1,      0,  ...,      1,      0,      1]]),
       values=tensor([   1.2413,  988.0000,    1.2055,  ..., 1004.9000,
                         1.5430,  993.0000]),
       size=(505856, 2), nnz=129966, layout=torch.sparse_coo)


# Network stuff

In [6]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(2, 16, cached=True)
        self.conv2 = GCNConv(16, 16, cached=True)
        self.conv3 = GCNConv(16, 5, cached=True)

#         self.conv1 = ChebConv(2, 16, K=2)
#         self.conv2 = ChebConv(16, 5, K=2)

    def forward(self, x, edge_index, batch):
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, training=self.training)
        x = F.relu(self.conv2(x, edge_index))
        x = F.dropout(x, training=self.training)
        x = self.conv3(x, edge_index)
        x = global_max_pool(x, batch)
        return F.log_softmax(x, dim=1)

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model= Net().to(device)
batch = batch.to(device)
# data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)


In [8]:
def train():
    model.train()
    optimizer.zero_grad()
#     output = model(batch.x.to_sparse(), batch.edge_index, batch.batch)
    output = model(batch.x, batch.edge_index, batch.batch)

    F.nll_loss(output, batch.y).backward()
    optimizer.step()


def test():
    model.eval()
    logits = model(batch.x, batch.edge_index, batch.batch)
    pred = logits.argmax(1)
    acc = pred.eq(batch.y).sum().item() / batch.y.shape[0]
    return acc

In [9]:
start = time.time()

best_acc = 0
batch.x = batch.x.to_sparse()
for epoch in range(1, 201):
    train()
#     train_acc = test()
#     if train_acc > best_acc:
#         best_acc = train_acc
#     log = 'Epoch: {:03d}, Train: {:.4f}, Best: {:.4f}'
#     print(log.format(epoch, train_acc, best_acc))
print(time.time() - start)

14.588780164718628
