# Object condensation using CLUE3D

Goal:
- start with layer-clusters (x,y,z,e)
- run edgeconv
- collapse to tracksters
- run edgeconv
- fully connected
- query edges
- output

In [1]:
import torch
import torch.nn as nn
import torch_geometric.transforms as T
import torch_geometric.utils as geo_utils

from torch.optim import SGD
from torch.optim.lr_scheduler import CosineAnnealingLR
import sklearn.metrics as metrics

from reco.model import EdgeConvNet
from torch_geometric.nn import EdgeConv, DynamicEdgeConv, global_mean_pool

from reco.learn import train_edge_pred, test_edge_pred
from reco.dataset import PointCloudSet
from reco.loss import FocalLoss
from reco.training import split_geo_train_test

data_root = "data"
ds_name = "MultiParticle"
raw_dir = f"/Users/ecuba/data/{ds_name}"

device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [2]:
transform = T.Compose([T.NormalizeFeatures()])

ds = PointCloudSet(
    ds_name,
    data_root,
    raw_dir,
    transform=transform, # todo: z-axis transformation
    N_FILES=1,
)

positive_edge_fr = float(sum(ds.data.y) / len(ds.data.y))
print(f"Positive edge ratio: {positive_edge_fr:.3f}") 
train_dl, test_dl = split_geo_train_test(ds, batch_size=1)
ds.data

Positive edge ratio: 0.254
Train set: 90, Test set: 10


Data(x=[100895, 4], edge_index=[2, 32294], y=[32294], trackster_index=[100895])

In [3]:
class PointCloudNet(nn.Module):
    def __init__(self, input_dim=4, output_dim=1, aggr='add', dropout=0.2):
        super(PointCloudNet, self).__init__()

        lc_hdim1 = 32        
        lc_hdim2 = 64

        tr_hdim1 = 64
        tr_hdim2 = 64

        fc_hdim = 128

        k=8

        # EdgeConv on LC
        self.lc_conv1 = DynamicEdgeConv(nn=EdgeConvNet(input_dim, lc_hdim1), aggr=aggr, k=k)
        self.lc_conv2 = DynamicEdgeConv(nn=EdgeConvNet(lc_hdim1, lc_hdim2), aggr=aggr, k=k)

        # EdgeConv on Tracksters
        self.trackster_conv1 = DynamicEdgeConv(nn=EdgeConvNet(lc_hdim2, tr_hdim1), aggr=aggr, k=k)
        self.trackster_conv2 = DynamicEdgeConv(nn=EdgeConvNet(tr_hdim1, tr_hdim2), aggr=aggr, k=k)

        # Edge features from node embeddings for classification
        self.edgenetwork = nn.Sequential(
            nn.Linear(2 * tr_hdim2, fc_hdim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(fc_hdim, output_dim),
            nn.Sigmoid()
        )
            
    def forward(self, X, query, tr_index):        

        # tr index has gaps due to wrong reindexation on pytorch geometric with batch_size > 1
        # last = -1
        # idx = -1
        # tridx2lc = {}
        # l_tr_index = tr_index.tolist()
        # new_idx = [0] * len(l_tr_index)
        # for i, tr_i in enumerate(l_tr_index):
        #     if tr_i != last:
        #         last = tr_i
        #         idx += 1
        #         tridx2lc[idx] = []

        #     new_idx[i] = idx
        #     tridx2lc[idx].append(i)

        # build knn edges within each trackster
        # lc_edges = []   # knn edges witin a trackster

        H = self.lc_conv1(X)
        H = self.lc_conv2(H)

        # apply per-trackster pooling of some kind (mean, add, topK, self-attention)
        TX = global_mean_pool(H, tr_index)

        H = self.trackster_conv1(TX)
        H = self.trackster_conv2(H)

        src, dst = query
        return self.edgenetwork(torch.cat([H[src], H[dst]], dim=-1)).squeeze(-1)

In [4]:
model = PointCloudNet(input_dim=ds.data.x.shape[1])
epochs = 50

# loss_func = F.binary_cross_entropy_with_logits
# alpha - percentage of negative edges
loss_func = FocalLoss(alpha=1.-positive_edge_fr, gamma=2)

model = model.to(device)
optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)
scheduler = CosineAnnealingLR(optimizer, epochs, eta_min=1e-3)

for epoch in range(epochs):

    train_loss, train_true, train_pred = train_edge_pred(
        model,
        device,
        optimizer,
        loss_func,
        train_dl
    )
    
    train_acc = metrics.accuracy_score(train_true, (train_pred > 0.5).astype(int))
    scheduler.step()

    if epoch % 2 == 0:
        test_loss, test_true, test_pred = test_edge_pred(model, device, loss_func, test_dl)
        test_acc = metrics.accuracy_score(test_true, (test_pred > 0.5).astype(int))
        print(
            f"Epoch {epoch}:",
            f"\ttrain loss:{train_loss:.2f}\ttrain acc: {train_acc:.3f}",
            f"\t test loss:{test_loss:.2f} \t test acc: {test_acc:.3f}"
        )

Epoch 0: 	train loss:5.88	train acc: 0.676 	 test loss:9.65 	 test acc: 0.573
Epoch 2: 	train loss:4.11	train acc: 0.741 	 test loss:58.05 	 test acc: 0.433
Epoch 4: 	train loss:3.72	train acc: 0.748 	 test loss:69.88 	 test acc: 0.453
Epoch 6: 	train loss:3.41	train acc: 0.760 	 test loss:89.09 	 test acc: 0.434
Epoch 8: 	train loss:3.20	train acc: 0.765 	 test loss:75.57 	 test acc: 0.448
Epoch 10: 	train loss:3.02	train acc: 0.769 	 test loss:70.89 	 test acc: 0.447
Epoch 12: 	train loss:2.81	train acc: 0.779 	 test loss:91.76 	 test acc: 0.458
Epoch 14: 	train loss:2.65	train acc: 0.783 	 test loss:87.81 	 test acc: 0.454
Epoch 16: 	train loss:2.50	train acc: 0.788 	 test loss:98.93 	 test acc: 0.456
Epoch 18: 	train loss:2.40	train acc: 0.791 	 test loss:86.43 	 test acc: 0.473
Epoch 20: 	train loss:2.27	train acc: 0.796 	 test loss:100.10 	 test acc: 0.472
Epoch 22: 	train loss:2.16	train acc: 0.802 	 test loss:89.32 	 test acc: 0.460
Epoch 24: 	train loss:2.05	train acc: 0.807 	