In [7]:
import numpy as np
import pandas as pd
import os
import networkx as nx
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
from sklearn.metrics import classification_report
from build_graph_data import *


from collections import Counter
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

In [8]:
trainpath = '../../data/top30groups/traindata/train100.csv'
traindata = pd.read_csv(trainpath, encoding='ISO-8859-1')
testpath = '../../data/top30groups/testdata/test100.csv'
testdata = pd.read_csv(testpath, encoding='ISO-8859-1')

In [9]:
# Read and preprocess
#traindata_uniques = create_data(traindata)
#testdata_uniques = create_data(testdata)

# Build graph data
#adj_train, idx_train, coord_train, X_train, label_index = build_graph_data(traindata_uniques)

#adj_test, idx_test, coord_test, X_test, _ = build_graph_data(testdata_uniques)

# Step 1: Merge train and test to get global coordinate space
combined_df = pd.concat([traindata, testdata])
combined_unique = create_data(combined_df)
_, _, coord_to_index, global_feature_matrix, global_label_index = build_graph_data(combined_unique)

# Step 2: Filter to only first attack per group in train/test separately
train_unique = create_data(traindata)
test_unique = create_data(testdata)

# Step 3: Build graph data using shared coord_to_index and feature matrix
adj_train, idx_train, _, _, _ = build_graph_data(train_unique, coord_to_index=coord_to_index)
adj_test, idx_test, _, _, _ = build_graph_data(test_unique, coord_to_index=coord_to_index)

# Step 4: Use the same global_feature_matrix and label_index
X_train = X_test = global_feature_matrix
label_index = global_label_index


       longitude   latitude                                             gname
1472  124.599145   6.637796                            Abu Sayyaf Group (ASG)
820    28.427501 -26.141151          African National Congress (South Africa)
874    44.371773  33.303566                                  Al-Qaida in Iraq
662    39.174905  21.543155          Al-Qaida in the Arabian Peninsula (AQAP)
229    45.326115   2.059819                                        Al-Shabaab
837    -2.052454  43.189944               Basque Fatherland and Freedom (ETA)
674    13.141459  11.840929                                        Boko Haram
524    87.319000  22.424000    Communist Party of India - Maoist (CPI-Maoist)
1832    8.742907  41.930607         Corsican National Liberation Front (FLNC)
60     37.613808  48.851688                         Donetsk People's Republic
289   -90.529068  14.622869  Farabundo Marti National Liberation Front (FMLN)
1095    8.885243   9.933381                                 Fula

# Data is now sorted by date and gname

In [10]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class PyTorchGCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, num_classes):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)


In [None]:
from torch_geometric.data import Data
from scipy.sparse import coo_matrix

print(len(label_index))

from torch_geometric.data import Data
from scipy.sparse import coo_matrix

def run_epoch(model, adjacency_matrices, first_attack_indices, feature_matrix, label_index, is_training=True):
    correct = 0
    total = 0

    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    loss_fn = torch.nn.NLLLoss()

    model.train() if is_training else model.eval()
    for group_name, A_np in adjacency_matrices.items():
        if group_name not in label_index:
            continue  # skip if label not found

        label = torch.tensor([label_index[group_name]], dtype=torch.long)
        node_idx = node_indices[group_name][0]  # get the first node only

        A_coo = coo_matrix(A_np)
        edge_index = torch.tensor(np.vstack((A_coo.row, A_coo.col)), dtype=torch.long)
        x = torch.tensor(feature_matrix, dtype=torch.float32)
        data = Data(x=x, edge_index=edge_index)

        output = model(data.x, data.edge_index)
        pred = output[node_idx]
        loss = loss_fn(pred.unsqueeze(0), label)

        if is_training:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        predicted_class = pred.argmax().item()
        correct += int(predicted_class == label.item())
        total += 1

    return correct / total if total > 0 else 0.0



30


In [15]:
model = PyTorchGCN(in_channels=X_train.shape[1], hidden_channels=16, num_classes=len(label_index))

for epoch in range(10):
    train_acc = run_epoch(model, adj_train, idx_train, X_train, label_index, is_training=True)
    test_acc = run_epoch(model, adj_test, idx_test, X_test, label_index, is_training=False)
    print(f"Epoch {epoch+1} - Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}")


RuntimeError: Expected target size [1, 30], got [1]