# Import and Read data


In [1]:
import sys
sys.path.append('.')  # Add current directory to path
from build_graph_and_train import *



In [2]:
partition = 100

In [3]:
df = pd.read_csv(f"../../../data/top30groups/LongLatCombined/combined/combined{partition}.csv")

In [4]:
from sklearn.preprocessing import StandardScaler

# Columns to exclude from scaling
exclude_cols = ['gname', 'longitude', 'latitude']

# Columns to scale
scale_cols = [col for col in df.columns if col not in exclude_cols]

# Scale only selected columns
scaler = StandardScaler()
df[scale_cols] = scaler.fit_transform(df[scale_cols])

In [5]:
import os 
if not os.path.isdir(f"Results{partition}"):
    os.mkdir(f"Results{partition}")

# Create longlat feature

In [6]:
geodata = ['longitude', 'latitude']
combined_geo = df.copy()
combined_geo['longlat'] = list(zip(df['longitude'], df['latitude']))
combined_geo = combined_geo.drop(columns=geodata)

In [7]:
import ast

def to_tuple_if_needed(val):
    if isinstance(val, str):
        return ast.literal_eval(val)
    return val  # already a tuple

combined_geo['longlat'] = combined_geo['longlat'].apply(to_tuple_if_needed)

# Weapon type prediction

In [8]:
label_index = {g: i for i, g in enumerate(sorted(df['gname'].unique()))}
continuous_cols = ['weaptype1', 'nkill', 'targtype1', 'attacktype1']
y_preds = []
y_trues = []
for i in range(len(continuous_cols)):
    data, y_gcn, y_nrf, non_geo_features, train_mask, test_mask, row_to_node_index, index_to_label = build_graph_data(combined_geo, label_index, continuous_col=continuous_cols[i])
    args = {
        'partition': f"gtd{partition}",
        'embed_dim': 16,
        'lr': 0.01,
        'epochs': 200,
        'feat_dropout': 0,
        'n_tree': 80,
        'tree_depth': 10,
        'tree_feature_rate': 0.5,
        'n_class': len(label_index)
    }
    best_acc, best_epoch, best_precision, best_recall, best_f1, y_pred_decoded, y_true_decoded = train_joint(data, data.edge_index, y_gcn, y_nrf, non_geo_features, train_mask, test_mask, args, row_to_node_index, index_to_label)
    y_preds.append(y_pred_decoded)
    y_trues.append(y_true_decoded)
    
    with open(f"Results{partition}/Results_{continuous_cols[i]}_prediction", "w") as f:
        f.write(f"Best acc: {best_acc} in epoch {best_epoch} for {continuous_cols[i]} prediction\n")
        f.write(f"Best recall: {best_recall} in epoch {best_epoch} for {continuous_cols[i]} prediction\n")
        f.write(f"Best precision: {best_precision} in epoch {best_epoch} for {continuous_cols[i]} prediction\n")
        f.write(f"Best f1: {best_f1} in epoch {best_epoch} for {continuous_cols[i]} prediction\n")

  y_nrf = torch.tensor(y_nrf, dtype=torch.long).to(device)


Epoch 01 | GCN MSE Loss: 336.1260 | NRF Loss: 3.4046 | JOINT Loss: 339.5306 | NRF Acc: 0.0354
Epoch 02 | GCN MSE Loss: 242.0694 | NRF Loss: 3.3971 | JOINT Loss: 245.4664 | NRF Acc: 0.1258
Epoch 03 | GCN MSE Loss: 167.2011 | NRF Loss: 3.3883 | JOINT Loss: 170.5893 | NRF Acc: 0.1349
Epoch 04 | GCN MSE Loss: 110.9979 | NRF Loss: 3.3880 | JOINT Loss: 114.3860 | NRF Acc: 0.1229
Epoch 05 | GCN MSE Loss: 72.3038 | NRF Loss: 3.3792 | JOINT Loss: 75.6830 | NRF Acc: 0.1132
Epoch 06 | GCN MSE Loss: 48.9870 | NRF Loss: 3.3786 | JOINT Loss: 52.3656 | NRF Acc: 0.0686
Epoch 07 | GCN MSE Loss: 37.8554 | NRF Loss: 3.3889 | JOINT Loss: 41.2443 | NRF Acc: 0.0635
Epoch 08 | GCN MSE Loss: 35.0880 | NRF Loss: 3.3884 | JOINT Loss: 38.4764 | NRF Acc: 0.0280
Epoch 09 | GCN MSE Loss: 36.8613 | NRF Loss: 3.3773 | JOINT Loss: 40.2386 | NRF Acc: 0.0566
Epoch 10 | GCN MSE Loss: 39.9127 | NRF Loss: 3.3637 | JOINT Loss: 43.2765 | NRF Acc: 0.0921
Epoch 11 | GCN MSE Loss: 42.0577 | NRF Loss: 3.3552 | JOINT Loss: 45.412

  y_nrf = torch.tensor(y_nrf, dtype=torch.long).to(device)


Epoch 01 | GCN MSE Loss: 733.8256 | NRF Loss: 3.4001 | JOINT Loss: 737.2257 | NRF Acc: 0.0515
Epoch 02 | GCN MSE Loss: 590.9288 | NRF Loss: 3.4027 | JOINT Loss: 594.3315 | NRF Acc: 0.0309
Epoch 03 | GCN MSE Loss: 468.9973 | NRF Loss: 3.4037 | JOINT Loss: 472.4010 | NRF Acc: 0.0309
Epoch 04 | GCN MSE Loss: 367.4003 | NRF Loss: 3.4102 | JOINT Loss: 370.8105 | NRF Acc: 0.0320
Epoch 05 | GCN MSE Loss: 284.8886 | NRF Loss: 3.4121 | JOINT Loss: 288.3007 | NRF Acc: 0.0326
Epoch 06 | GCN MSE Loss: 218.6493 | NRF Loss: 3.4070 | JOINT Loss: 222.0563 | NRF Acc: 0.0343
Epoch 07 | GCN MSE Loss: 166.1862 | NRF Loss: 3.3996 | JOINT Loss: 169.5859 | NRF Acc: 0.0406
Epoch 08 | GCN MSE Loss: 125.4651 | NRF Loss: 3.3931 | JOINT Loss: 128.8582 | NRF Acc: 0.0400
Epoch 09 | GCN MSE Loss: 94.4895 | NRF Loss: 3.3946 | JOINT Loss: 97.8841 | NRF Acc: 0.0354
Epoch 10 | GCN MSE Loss: 71.1041 | NRF Loss: 3.3957 | JOINT Loss: 74.4999 | NRF Acc: 0.0314
Epoch 11 | GCN MSE Loss: 53.3883 | NRF Loss: 3.3938 | JOINT Loss

  y_nrf = torch.tensor(y_nrf, dtype=torch.long).to(device)


Epoch 01 | GCN MSE Loss: 123.7037 | NRF Loss: 3.4016 | JOINT Loss: 127.1052 | NRF Acc: 0.0366
Epoch 02 | GCN MSE Loss: 66.9451 | NRF Loss: 3.3824 | JOINT Loss: 70.3275 | NRF Acc: 0.1589
Epoch 03 | GCN MSE Loss: 30.4130 | NRF Loss: 3.3853 | JOINT Loss: 33.7983 | NRF Acc: 0.0623
Epoch 04 | GCN MSE Loss: 15.2805 | NRF Loss: 3.3996 | JOINT Loss: 18.6801 | NRF Acc: 0.0429
Epoch 05 | GCN MSE Loss: 15.0468 | NRF Loss: 3.4006 | JOINT Loss: 18.4475 | NRF Acc: 0.0749
Epoch 06 | GCN MSE Loss: 21.9200 | NRF Loss: 3.3876 | JOINT Loss: 25.3075 | NRF Acc: 0.0829
Epoch 07 | GCN MSE Loss: 29.3123 | NRF Loss: 3.3729 | JOINT Loss: 32.6853 | NRF Acc: 0.1001
Epoch 08 | GCN MSE Loss: 32.9265 | NRF Loss: 3.3627 | JOINT Loss: 36.2891 | NRF Acc: 0.1161
Epoch 09 | GCN MSE Loss: 31.3930 | NRF Loss: 3.3500 | JOINT Loss: 34.7430 | NRF Acc: 0.1144
Epoch 10 | GCN MSE Loss: 25.9006 | NRF Loss: 3.3378 | JOINT Loss: 29.2384 | NRF Acc: 0.1286
Epoch 11 | GCN MSE Loss: 18.7845 | NRF Loss: 3.3252 | JOINT Loss: 22.1096 | NR

  y_nrf = torch.tensor(y_nrf, dtype=torch.long).to(device)


Epoch 01 | GCN MSE Loss: 313.6182 | NRF Loss: 3.4027 | JOINT Loss: 317.0208 | NRF Acc: 0.0835
Epoch 02 | GCN MSE Loss: 227.4713 | NRF Loss: 3.3910 | JOINT Loss: 230.8623 | NRF Acc: 0.1224
Epoch 03 | GCN MSE Loss: 154.9099 | NRF Loss: 3.3810 | JOINT Loss: 158.2909 | NRF Acc: 0.1630
Epoch 04 | GCN MSE Loss: 99.3510 | NRF Loss: 3.3694 | JOINT Loss: 102.7205 | NRF Acc: 0.1618
Epoch 05 | GCN MSE Loss: 58.9415 | NRF Loss: 3.3556 | JOINT Loss: 62.2971 | NRF Acc: 0.1509
Epoch 06 | GCN MSE Loss: 32.0149 | NRF Loss: 3.3426 | JOINT Loss: 35.3575 | NRF Acc: 0.1549
Epoch 07 | GCN MSE Loss: 17.5687 | NRF Loss: 3.3338 | JOINT Loss: 20.9025 | NRF Acc: 0.1487
Epoch 08 | GCN MSE Loss: 13.5753 | NRF Loss: 3.3209 | JOINT Loss: 16.8962 | NRF Acc: 0.1635
Epoch 09 | GCN MSE Loss: 16.7302 | NRF Loss: 3.2957 | JOINT Loss: 20.0259 | NRF Acc: 0.2127
Epoch 10 | GCN MSE Loss: 23.7093 | NRF Loss: 3.2630 | JOINT Loss: 26.9723 | NRF Acc: 0.2236
Epoch 11 | GCN MSE Loss: 31.8101 | NRF Loss: 3.2408 | JOINT Loss: 35.0509

In [9]:
def plot_confusion_matrix(y_true, y_pred, labels, continuous_col):
    from sklearn.metrics import confusion_matrix
    import matplotlib.pyplot as plt
    import seaborn as sns
    import numpy as np

    cm = confusion_matrix(y_true, y_pred, labels=labels)
    cm_normalized = cm.astype('float') / cm.sum(axis=1, keepdims=True)

    plt.figure(figsize=(18, 16))
    sns.heatmap(cm_normalized,
                annot=True,
                fmt=".2f",
                xticklabels=labels,
                yticklabels=labels,
                cmap="viridis",
                square=True,
                linewidths=0.5,
                cbar_kws={"shrink": 0.8})

    plt.title(f"Normalized Confusion Matrix", fontsize=18)
    plt.xlabel("Predicted Label", fontsize=14)
    plt.ylabel("True Label", fontsize=14)
    plt.xticks(rotation=90)
    plt.yticks(rotation=0)
    plt.tight_layout()

    # Save the figure
    save_path = f"Results{partition}/cm_{partition}_{continuous_col}.png"
    plt.savefig(save_path, dpi=300)
    plt.close()

    print(f"Saved confusion matrix for partition {partition} to {save_path}")


In [10]:
for i in range(len(continuous_cols)):
    plot_confusion_matrix(y_preds[i], y_trues[i], sorted(df['gname'].unique()), continuous_cols[i])

Saved confusion matrix for partition 100 to Results100/cm_100_weaptype1.png
Saved confusion matrix for partition 100 to Results100/cm_100_nkill.png
Saved confusion matrix for partition 100 to Results100/cm_100_targtype1.png


  cm_normalized = cm.astype('float') / cm.sum(axis=1, keepdims=True)


Saved confusion matrix for partition 100 to Results100/cm_100_attacktype1.png
