# Import and Read data


In [17]:
import sys
sys.path.append('.')  # Add current directory to path
from build_graph_and_train import *

In [18]:
partition = 300

In [19]:
df = pd.read_csv(f"../../../data/top30groups/LongLatCombined/combined/combined{partition}.csv")

In [20]:
from sklearn.preprocessing import StandardScaler

# Columns to exclude from scaling
exclude_cols = ['gname', 'longitude', 'latitude']

# Columns to scale
scale_cols = [col for col in df.columns if col not in exclude_cols]

# Scale only selected columns
scaler = StandardScaler()
df[scale_cols] = scaler.fit_transform(df[scale_cols])

In [21]:
import os 
if not os.path.isdir(f"Results{partition}"):
    os.mkdir(f"Results{partition}")

# Create longlat feature

In [22]:
geodata = ['longitude', 'latitude']
combined_geo = df.copy()
combined_geo['longlat'] = list(zip(df['longitude'], df['latitude']))
combined_geo = combined_geo.drop(columns=geodata)

In [23]:
import ast

def to_tuple_if_needed(val):
    if isinstance(val, str):
        return ast.literal_eval(val)
    return val  # already a tuple

combined_geo['longlat'] = combined_geo['longlat'].apply(to_tuple_if_needed)

# Weapon type prediction

In [24]:
label_index = {g: i for i, g in enumerate(sorted(df['gname'].unique()))}
continuous_cols = ['weaptype1', 'nkill', 'targtype1', 'attacktype1']
y_preds = []
y_trues = []
for i in range(len(continuous_cols)):
    data, y_gcn, y_nrf, non_geo_features, train_mask, test_mask, row_to_node_index, index_to_label = build_graph_data(combined_geo, label_index, continuous_col=continuous_cols[i])
    args = {
        'partition': f"gtd{partition}",
        'embed_dim': 16,
        'lr': 0.01,
        'epochs': 300,
        'feat_dropout': 0,
        'n_tree': 80,
        'tree_depth': 10,
        'tree_feature_rate': 0.5,
        'n_class': len(label_index)
    }
    best_acc, best_epoch, best_precision, best_recall, best_f1, y_pred_decoded, y_true_decoded = train_joint(data, data.edge_index, y_gcn, y_nrf, non_geo_features, train_mask, test_mask, args, row_to_node_index, index_to_label)
    y_preds.append(y_pred_decoded)
    y_trues.append(y_true_decoded)
    
    with open(f"Results{partition}/Results_{continuous_cols[i]}_prediction", "w") as f:
        f.write(f"Best acc: {best_acc} in epoch {best_epoch} for {continuous_cols[i]} prediction\n")
        f.write(f"Best recall: {best_recall} in epoch {best_epoch} for {continuous_cols[i]} prediction\n")
        f.write(f"Best precision: {best_precision} in epoch {best_epoch} for {continuous_cols[i]} prediction\n")
        f.write(f"Best f1: {best_f1} in epoch {best_epoch} for {continuous_cols[i]} prediction\n")

  y_nrf = torch.tensor(y_nrf, dtype=torch.long).to(device)


Epoch 01 | GCN MSE Loss: 124.9910 | NRF Loss: 3.4014 | JOINT Loss: 128.3923 | NRF Acc: 0.0485
Epoch 02 | GCN MSE Loss: 93.8608 | NRF Loss: 3.3810 | JOINT Loss: 97.2418 | NRF Acc: 0.0855
Epoch 03 | GCN MSE Loss: 69.1521 | NRF Loss: 3.3903 | JOINT Loss: 72.5423 | NRF Acc: 0.1231
Epoch 04 | GCN MSE Loss: 48.4180 | NRF Loss: 3.3553 | JOINT Loss: 51.7733 | NRF Acc: 0.1200
Epoch 05 | GCN MSE Loss: 32.7939 | NRF Loss: 3.3435 | JOINT Loss: 36.1374 | NRF Acc: 0.0774
Epoch 06 | GCN MSE Loss: 21.5381 | NRF Loss: 3.3424 | JOINT Loss: 24.8805 | NRF Acc: 0.0864
Epoch 07 | GCN MSE Loss: 13.4576 | NRF Loss: 3.3367 | JOINT Loss: 16.7943 | NRF Acc: 0.0712
Epoch 08 | GCN MSE Loss: 8.3471 | NRF Loss: 3.3367 | JOINT Loss: 11.6838 | NRF Acc: 0.0921
Epoch 09 | GCN MSE Loss: 6.0087 | NRF Loss: 3.3291 | JOINT Loss: 9.3378 | NRF Acc: 0.1259
Epoch 10 | GCN MSE Loss: 5.4951 | NRF Loss: 3.3238 | JOINT Loss: 8.8189 | NRF Acc: 0.0883
Epoch 11 | GCN MSE Loss: 5.9363 | NRF Loss: 3.3345 | JOINT Loss: 9.2708 | NRF Acc: 

  y_nrf = torch.tensor(y_nrf, dtype=torch.long).to(device)


Epoch 01 | GCN MSE Loss: 600.4127 | NRF Loss: 3.4002 | JOINT Loss: 603.8129 | NRF Acc: 0.0081
Epoch 02 | GCN MSE Loss: 430.5045 | NRF Loss: 3.3862 | JOINT Loss: 433.8907 | NRF Acc: 0.1762
Epoch 03 | GCN MSE Loss: 299.4756 | NRF Loss: 3.3916 | JOINT Loss: 302.8671 | NRF Acc: 0.1056
Epoch 04 | GCN MSE Loss: 201.1831 | NRF Loss: 3.3821 | JOINT Loss: 204.5651 | NRF Acc: 0.0738
Epoch 05 | GCN MSE Loss: 131.1800 | NRF Loss: 3.3929 | JOINT Loss: 134.5729 | NRF Acc: 0.0809
Epoch 06 | GCN MSE Loss: 84.8803 | NRF Loss: 3.3993 | JOINT Loss: 88.2796 | NRF Acc: 0.0729
Epoch 07 | GCN MSE Loss: 58.4400 | NRF Loss: 3.3938 | JOINT Loss: 61.8338 | NRF Acc: 0.0693
Epoch 08 | GCN MSE Loss: 47.8323 | NRF Loss: 3.3850 | JOINT Loss: 51.2173 | NRF Acc: 0.0615
Epoch 09 | GCN MSE Loss: 48.6120 | NRF Loss: 3.3785 | JOINT Loss: 51.9905 | NRF Acc: 0.0653
Epoch 10 | GCN MSE Loss: 56.1294 | NRF Loss: 3.3724 | JOINT Loss: 59.5017 | NRF Acc: 0.0656
Epoch 11 | GCN MSE Loss: 66.0192 | NRF Loss: 3.3706 | JOINT Loss: 69.3

  y_nrf = torch.tensor(y_nrf, dtype=torch.long).to(device)


Epoch 01 | GCN MSE Loss: 584.5886 | NRF Loss: 3.4018 | JOINT Loss: 587.9904 | NRF Acc: 0.0500
Epoch 02 | GCN MSE Loss: 400.0031 | NRF Loss: 3.3889 | JOINT Loss: 403.3921 | NRF Acc: 0.1465
Epoch 03 | GCN MSE Loss: 251.3280 | NRF Loss: 3.4005 | JOINT Loss: 254.7285 | NRF Acc: 0.1177
Epoch 04 | GCN MSE Loss: 138.5014 | NRF Loss: 3.3790 | JOINT Loss: 141.8804 | NRF Acc: 0.1075
Epoch 05 | GCN MSE Loss: 61.5247 | NRF Loss: 3.3771 | JOINT Loss: 64.9018 | NRF Acc: 0.0841
Epoch 06 | GCN MSE Loss: 18.1348 | NRF Loss: 3.3953 | JOINT Loss: 21.5301 | NRF Acc: 0.0634
Epoch 07 | GCN MSE Loss: 3.8823 | NRF Loss: 3.4007 | JOINT Loss: 7.2830 | NRF Acc: 0.0637
Epoch 08 | GCN MSE Loss: 11.9771 | NRF Loss: 3.3855 | JOINT Loss: 15.3626 | NRF Acc: 0.0706
Epoch 09 | GCN MSE Loss: 33.2913 | NRF Loss: 3.3622 | JOINT Loss: 36.6535 | NRF Acc: 0.0800
Epoch 10 | GCN MSE Loss: 56.9474 | NRF Loss: 3.3489 | JOINT Loss: 60.2963 | NRF Acc: 0.0781
Epoch 11 | GCN MSE Loss: 74.6175 | NRF Loss: 3.3361 | JOINT Loss: 77.9536 

  y_nrf = torch.tensor(y_nrf, dtype=torch.long).to(device)


Epoch 01 | GCN MSE Loss: 244.4090 | NRF Loss: 3.4003 | JOINT Loss: 247.8092 | NRF Acc: 0.0104
Epoch 02 | GCN MSE Loss: 149.9415 | NRF Loss: 3.3872 | JOINT Loss: 153.3287 | NRF Acc: 0.0855
Epoch 03 | GCN MSE Loss: 92.7895 | NRF Loss: 3.3949 | JOINT Loss: 96.1844 | NRF Acc: 0.1418
Epoch 04 | GCN MSE Loss: 64.4788 | NRF Loss: 3.3773 | JOINT Loss: 67.8561 | NRF Acc: 0.1255
Epoch 05 | GCN MSE Loss: 54.8618 | NRF Loss: 3.3678 | JOINT Loss: 58.2296 | NRF Acc: 0.1186
Epoch 06 | GCN MSE Loss: 54.3624 | NRF Loss: 3.3605 | JOINT Loss: 57.7229 | NRF Acc: 0.1241
Epoch 07 | GCN MSE Loss: 54.5739 | NRF Loss: 3.3513 | JOINT Loss: 57.9252 | NRF Acc: 0.1331
Epoch 08 | GCN MSE Loss: 50.7647 | NRF Loss: 3.3389 | JOINT Loss: 54.1036 | NRF Acc: 0.1227
Epoch 09 | GCN MSE Loss: 42.3274 | NRF Loss: 3.3332 | JOINT Loss: 45.6606 | NRF Acc: 0.1118
Epoch 10 | GCN MSE Loss: 31.2879 | NRF Loss: 3.3293 | JOINT Loss: 34.6172 | NRF Acc: 0.1210
Epoch 11 | GCN MSE Loss: 20.3667 | NRF Loss: 3.3167 | JOINT Loss: 23.6833 | 

In [25]:
def plot_confusion_matrix(y_true, y_pred, labels, continuous_col):
    from sklearn.metrics import confusion_matrix
    import matplotlib.pyplot as plt
    import seaborn as sns
    import numpy as np

    cm = confusion_matrix(y_true, y_pred, labels=labels)
    cm_normalized = cm.astype('float') / cm.sum(axis=1, keepdims=True)

    plt.figure(figsize=(18, 16))
    sns.heatmap(cm_normalized,
                annot=True,
                fmt=".2f",
                xticklabels=labels,
                yticklabels=labels,
                cmap="viridis",
                square=True,
                linewidths=0.5,
                cbar_kws={"shrink": 0.8})

    plt.title(f"Normalized Confusion Matrix", fontsize=18)
    plt.xlabel("Predicted Label", fontsize=14)
    plt.ylabel("True Label", fontsize=14)
    plt.xticks(rotation=90)
    plt.yticks(rotation=0)
    plt.tight_layout()

    # Save the figure
    save_path = f"Results{partition}/cm_{partition}_{continuous_col}.png"
    plt.savefig(save_path, dpi=300)
    plt.close()

    print(f"Saved confusion matrix for partition {partition} to {save_path}")


In [26]:
for i in range(len(continuous_cols)):
    plot_confusion_matrix(y_preds[i], y_trues[i], sorted(df['gname'].unique()), continuous_cols[i])

Saved confusion matrix for partition 300 to Results300/cm_300_weaptype1.png
Saved confusion matrix for partition 300 to Results300/cm_300_nkill.png
Saved confusion matrix for partition 300 to Results300/cm_300_targtype1.png
Saved confusion matrix for partition 300 to Results300/cm_300_attacktype1.png
