In [1]:
import warnings
warnings.filterwarnings( 'ignore' )
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, TimeSeriesSplit
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.impute import SimpleImputer
import sys

In [2]:
partition = 478

In [3]:
!{sys.executable} -m pip install pytorch-tabnet --upgrade

Collecting pytorch-tabnet
  Using cached pytorch_tabnet-4.1.0-py3-none-any.whl.metadata (15 kB)
Using cached pytorch_tabnet-4.1.0-py3-none-any.whl (44 kB)
Installing collected packages: pytorch-tabnet
Successfully installed pytorch-tabnet-4.1.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [4]:
trainpath = f'../../../../data/top30groups/LongLatCombined/train1/train{partition}.csv'
testpath = f'../../../../data/top30groups/LongLatCombined/test1/test{partition}.csv'

traindata = pd.read_csv(trainpath, encoding='ISO-8859-1')
testdata = pd.read_csv(testpath, encoding='ISO-8859-1')

In [5]:
import torch
print(torch.cuda.is_available())

True


In [6]:
torch.cuda.empty_cache()

In [7]:
import torch
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import LabelEncoder

def split_data(dftrain, dftest):
    Xtrain = dftrain.drop(columns=['gname']).values.astype(float)
    Ytrain = dftrain['gname'].values
    Xtest = dftest.drop(columns=['gname']).values.astype(float)
    Ytest = dftest['gname'].values

    le = LabelEncoder()
    Ytrain = le.fit_transform(Ytrain)
    Ytest = le.transform(Ytest)

    #y_pred_decoded = model.label_encoder.inverse_transform(y_pred)
    y_true_decoded = le.inverse_transform(Ytest)

    return Xtrain, Ytrain, Xtest, Ytest, y_true_decoded, le



In [8]:
from sklearn.base import BaseEstimator, ClassifierMixin
import numpy as np
from pytorch_tabnet.tab_model import TabNetClassifier

class TabNetClassifierWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, n_d=8, n_a=8, n_steps=3, gamma=1.3, lambda_sparse=1e-3, optimizer_params=None):
        self.n_d = n_d
        self.n_a = n_a
        self.n_steps = n_steps
        self.gamma = gamma
        self.lambda_sparse = lambda_sparse
        self.optimizer_params = optimizer_params or {'lr': 0.01}
        self.model = None

    def fit(self, X, y):
        self.model = TabNetClassifier(
            n_d=self.n_d,
            n_a=self.n_a,
            n_steps=self.n_steps,
            gamma=self.gamma,
            lambda_sparse=self.lambda_sparse,
            optimizer_params=self.optimizer_params,
            seed=42,
            verbose=0
        )

        self.model.fit(
            X, y,
            eval_set=[(X, y)],
            max_epochs=500,
            patience=120,
            batch_size=1024,
            virtual_batch_size=128,
            eval_metric=['accuracy']
        )

        self.classes_ = np.unique(y)  
        return self

    def predict(self, X):
        return self.model.predict(X)

    def score(self, X, y):
        preds = self.predict(X)
        return (preds == y).mean()


In [9]:
#from pytorch_tabnet.sklearn import TabNetClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor

def find_best_tabnet(Xtrain, Ytrain, n_iter=20):
    print("Starting TabNet grid search")
    print("CUDA available:", torch.cuda.is_available())

    param_dist = {
        'n_d': [8, 16, 24],
        'n_a': [8, 16, 24],
        'n_steps': [3, 4, 5],
        'gamma': [1.0, 1.3, 1.5],
        'lambda_sparse': [1e-4, 1e-3, 1e-2],
        'optimizer_params': [{'lr': 0.01}]
    }

    random_search = RandomizedSearchCV(
        estimator=TabNetClassifierWrapper(),
        param_distributions=param_dist,
        n_iter=n_iter,
        cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=42),
        scoring='accuracy',
        verbose=1,
        n_jobs=-1,
        random_state=42
    )

    random_search.fit(Xtrain, Ytrain)
    print("Best parameters:", random_search.best_params_)
    print("Best accuracy:", random_search.best_score_)

    return random_search.best_params_



In [10]:
from pytorch_tabnet.callbacks import Callback
import time

class EpochTimer(Callback):
    def __init__(self):
        self.epoch_times = []

    def on_epoch_begin(self, epoch_idx, logs=None):
        self.start_time = time.time()

    def on_epoch_end(self, epoch_idx, logs=None):
        duration = time.time() - self.start_time
        self.epoch_times.append(duration)
        print(f" Epoch {epoch_idx + 1} took {duration:.2f} seconds")


In [11]:
Xtrain, Ytrain, Xtest, Ytest, Ytest_decoded, le = split_data(traindata, testdata)
best_tabnet_params = find_best_tabnet(Xtrain, Ytrain)

from sklearn.model_selection import train_test_split

# Split 80% train, 20% validation
X_tr, X_val, y_tr, y_val = train_test_split(
    Xtrain, Ytrain,
    test_size=0.2,
    random_state=42,
    stratify=Ytrain
)

# Re-initialize TabNet with best params
final_model = TabNetClassifier(
    **{k: v for k, v in best_tabnet_params.items()},
    verbose=1,
    seed=42
)

epoch_timer = EpochTimer()

# Retrain on full training data
final_model.fit(
    X_tr, y_tr,
    eval_set=[(X_val, y_val)],
    max_epochs=1000,
    patience=200,
    batch_size=1024,
    virtual_batch_size=128,
    callbacks=[epoch_timer]
)



Starting TabNet grid search
CUDA available: True
Fitting 5 folds for each of 20 candidates, totalling 100 fits



Early stopping occurred at epoch 459 with best_epoch = 339 and best_val_0_accuracy = 0.95634





Early stopping occurred at epoch 380 with best_epoch = 260 and best_val_0_accuracy = 0.94511




Stop training because you reached max_epochs = 500 with best_epoch = 499 and best_val_0_accuracy = 0.96045




Stop training because you reached max_epochs = 500 with best_epoch = 460 and best_val_0_accuracy = 0.96919




Stop training because you reached max_epochs = 500 with best_epoch = 499 and best_val_0_accuracy = 0.96607
Stop training because you reached max_epochs = 500 with best_epoch = 499 and best_val_0_accuracy = 0.96382
Stop training because you reached max_epochs = 500 with best_epoch = 492 and best_val_0_accuracy = 0.96881




Stop training because you reached max_epochs = 500 with best_epoch = 390 and best_val_0_accuracy = 0.97081




Stop training because you reached max_epochs = 500 with best_epoch = 478 and best_val_0_accuracy = 0.96083




Stop training because you reached max_epochs = 500 with best_epoch = 484 and best_val_0_accuracy = 0.97168
Stop training because you reached max_epochs = 500 with best_epoch = 462 and best_val_0_accuracy = 0.9602





Early stopping occurred at epoch 425 with best_epoch = 305 and best_val_0_accuracy = 0.936





Early stopping occurred at epoch 458 with best_epoch = 338 and best_val_0_accuracy = 0.9491





Early stopping occurred at epoch 467 with best_epoch = 347 and best_val_0_accuracy = 0.94449





Early stopping occurred at epoch 496 with best_epoch = 376 and best_val_0_accuracy = 0.96519




Stop training because you reached max_epochs = 500 with best_epoch = 499 and best_val_0_accuracy = 0.96095




Stop training because you reached max_epochs = 500 with best_epoch = 409 and best_val_0_accuracy = 0.95646




Stop training because you reached max_epochs = 500 with best_epoch = 490 and best_val_0_accuracy = 0.96158




Stop training because you reached max_epochs = 500 with best_epoch = 454 and best_val_0_accuracy = 0.96307
Stop training because you reached max_epochs = 500 with best_epoch = 471 and best_val_0_accuracy = 0.95971




Stop training because you reached max_epochs = 500 with best_epoch = 466 and best_val_0_accuracy = 0.96844




Stop training because you reached max_epochs = 500 with best_epoch = 417 and best_val_0_accuracy = 0.9486
Stop training because you reached max_epochs = 500 with best_epoch = 463 and best_val_0_accuracy = 0.95709
Stop training because you reached max_epochs = 500 with best_epoch = 494 and best_val_0_accuracy = 0.95908




Stop training because you reached max_epochs = 500 with best_epoch = 473 and best_val_0_accuracy = 0.95322




Stop training because you reached max_epochs = 500 with best_epoch = 495 and best_val_0_accuracy = 0.95971




Stop training because you reached max_epochs = 500 with best_epoch = 469 and best_val_0_accuracy = 0.96295




Stop training because you reached max_epochs = 500 with best_epoch = 486 and best_val_0_accuracy = 0.95072




Stop training because you reached max_epochs = 500 with best_epoch = 493 and best_val_0_accuracy = 0.96095
Stop training because you reached max_epochs = 500 with best_epoch = 484 and best_val_0_accuracy = 0.95821




Stop training because you reached max_epochs = 500 with best_epoch = 471 and best_val_0_accuracy = 0.95609
Stop training because you reached max_epochs = 500 with best_epoch = 462 and best_val_0_accuracy = 0.94561




Stop training because you reached max_epochs = 500 with best_epoch = 485 and best_val_0_accuracy = 0.96682




Stop training because you reached max_epochs = 500 with best_epoch = 448 and best_val_0_accuracy = 0.96357
Stop training because you reached max_epochs = 500 with best_epoch = 480 and best_val_0_accuracy = 0.96582




Stop training because you reached max_epochs = 500 with best_epoch = 478 and best_val_0_accuracy = 0.97156




Stop training because you reached max_epochs = 500 with best_epoch = 465 and best_val_0_accuracy = 0.96719
Stop training because you reached max_epochs = 500 with best_epoch = 456 and best_val_0_accuracy = 0.96332




Stop training because you reached max_epochs = 500 with best_epoch = 474 and best_val_0_accuracy = 0.96332
Stop training because you reached max_epochs = 500 with best_epoch = 425 and best_val_0_accuracy = 0.95721




Stop training because you reached max_epochs = 500 with best_epoch = 491 and best_val_0_accuracy = 0.95958

Early stopping occurred at epoch 420 with best_epoch = 300 and best_val_0_accuracy = 0.94548




Stop training because you reached max_epochs = 500 with best_epoch = 443 and best_val_0_accuracy = 0.96245




Stop training because you reached max_epochs = 500 with best_epoch = 425 and best_val_0_accuracy = 0.96245




Stop training because you reached max_epochs = 500 with best_epoch = 494 and best_val_0_accuracy = 0.95983
Stop training because you reached max_epochs = 500 with best_epoch = 480 and best_val_0_accuracy = 0.96083




Stop training because you reached max_epochs = 500 with best_epoch = 449 and best_val_0_accuracy = 0.95908




Stop training because you reached max_epochs = 500 with best_epoch = 487 and best_val_0_accuracy = 0.95871

Early stopping occurred at epoch 467 with best_epoch = 347 and best_val_0_accuracy = 0.95472




Stop training because you reached max_epochs = 500 with best_epoch = 483 and best_val_0_accuracy = 0.94973




Stop training because you reached max_epochs = 500 with best_epoch = 484 and best_val_0_accuracy = 0.95185





Early stopping occurred at epoch 408 with best_epoch = 288 and best_val_0_accuracy = 0.936




Stop training because you reached max_epochs = 500 with best_epoch = 491 and best_val_0_accuracy = 0.95197




Stop training because you reached max_epochs = 500 with best_epoch = 465 and best_val_0_accuracy = 0.94898
Stop training because you reached max_epochs = 500 with best_epoch = 452 and best_val_0_accuracy = 0.95534




Stop training because you reached max_epochs = 500 with best_epoch = 456 and best_val_0_accuracy = 0.95384




Stop training because you reached max_epochs = 500 with best_epoch = 422 and best_val_0_accuracy = 0.94673




Stop training because you reached max_epochs = 500 with best_epoch = 470 and best_val_0_accuracy = 0.94536





Early stopping occurred at epoch 488 with best_epoch = 368 and best_val_0_accuracy = 0.93263




Stop training because you reached max_epochs = 500 with best_epoch = 465 and best_val_0_accuracy = 0.939




Stop training because you reached max_epochs = 500 with best_epoch = 484 and best_val_0_accuracy = 0.95147




Stop training because you reached max_epochs = 500 with best_epoch = 495 and best_val_0_accuracy = 0.92303
Stop training because you reached max_epochs = 500 with best_epoch = 458 and best_val_0_accuracy = 0.94923




Stop training because you reached max_epochs = 500 with best_epoch = 489 and best_val_0_accuracy = 0.94885





Early stopping occurred at epoch 425 with best_epoch = 305 and best_val_0_accuracy = 0.92777
Stop training because you reached max_epochs = 500 with best_epoch = 471 and best_val_0_accuracy = 0.93538
Stop training because you reached max_epochs = 500 with best_epoch = 388 and best_val_0_accuracy = 0.94598




Stop training because you reached max_epochs = 500 with best_epoch = 494 and best_val_0_accuracy = 0.94





Early stopping occurred at epoch 374 with best_epoch = 254 and best_val_0_accuracy = 0.9496

Early stopping occurred at epoch 460 with best_epoch = 340 and best_val_0_accuracy = 0.94112





Early stopping occurred at epoch 467 with best_epoch = 347 and best_val_0_accuracy = 0.94573




Stop training because you reached max_epochs = 500 with best_epoch = 456 and best_val_0_accuracy = 0.94162




Stop training because you reached max_epochs = 500 with best_epoch = 490 and best_val_0_accuracy = 0.95035
Stop training because you reached max_epochs = 500 with best_epoch = 429 and best_val_0_accuracy = 0.95085




Stop training because you reached max_epochs = 500 with best_epoch = 463 and best_val_0_accuracy = 0.9607




Stop training because you reached max_epochs = 500 with best_epoch = 498 and best_val_0_accuracy = 0.94698




Stop training because you reached max_epochs = 500 with best_epoch = 470 and best_val_0_accuracy = 0.97293




Stop training because you reached max_epochs = 500 with best_epoch = 486 and best_val_0_accuracy = 0.96919
Stop training because you reached max_epochs = 500 with best_epoch = 497 and best_val_0_accuracy = 0.95372
Stop training because you reached max_epochs = 500 with best_epoch = 499 and best_val_0_accuracy = 0.95097
Stop training because you reached max_epochs = 500 with best_epoch = 495 and best_val_0_accuracy = 0.96994
Stop training because you reached max_epochs = 500 with best_epoch = 474 and best_val_0_accuracy = 0.95272




Stop training because you reached max_epochs = 500 with best_epoch = 468 and best_val_0_accuracy = 0.95235
Stop training because you reached max_epochs = 500 with best_epoch = 492 and best_val_0_accuracy = 0.97368




Stop training because you reached max_epochs = 500 with best_epoch = 470 and best_val_0_accuracy = 0.96794




Stop training because you reached max_epochs = 500 with best_epoch = 426 and best_val_0_accuracy = 0.94124





Early stopping occurred at epoch 385 with best_epoch = 265 and best_val_0_accuracy = 0.92927

Early stopping occurred at epoch 488 with best_epoch = 368 and best_val_0_accuracy = 0.95883




Stop training because you reached max_epochs = 500 with best_epoch = 451 and best_val_0_accuracy = 0.93787




Stop training because you reached max_epochs = 500 with best_epoch = 490 and best_val_0_accuracy = 0.96582
Stop training because you reached max_epochs = 500 with best_epoch = 483 and best_val_0_accuracy = 0.96108
Stop training because you reached max_epochs = 500 with best_epoch = 450 and best_val_0_accuracy = 0.96482




Stop training because you reached max_epochs = 500 with best_epoch = 417 and best_val_0_accuracy = 0.9511




Stop training because you reached max_epochs = 500 with best_epoch = 490 and best_val_0_accuracy = 0.93513
Stop training because you reached max_epochs = 500 with best_epoch = 485 and best_val_0_accuracy = 0.94311




Stop training because you reached max_epochs = 500 with best_epoch = 489 and best_val_0_accuracy = 0.95684




Stop training because you reached max_epochs = 500 with best_epoch = 405 and best_val_0_accuracy = 0.94548
Stop training because you reached max_epochs = 500 with best_epoch = 419 and best_val_0_accuracy = 0.95447




Stop training because you reached max_epochs = 500 with best_epoch = 495 and best_val_0_accuracy = 0.95671




Stop training because you reached max_epochs = 500 with best_epoch = 496 and best_val_0_accuracy = 0.938




Stop training because you reached max_epochs = 500 with best_epoch = 478 and best_val_0_accuracy = 0.96118
Best parameters: {'optimizer_params': {'lr': 0.01}, 'n_steps': 3, 'n_d': 8, 'n_a': 16, 'lambda_sparse': 0.0001, 'gamma': 1.0}
Best accuracy: 0.9315369261477044
epoch 0  | loss: 3.80875 | val_0_accuracy: 0.07086 |  0:00:00s
 Epoch 1 took 0.17 seconds
epoch 1  | loss: 3.35883 | val_0_accuracy: 0.05439 |  0:00:00s
 Epoch 2 took 0.16 seconds
epoch 2  | loss: 3.18871 | val_0_accuracy: 0.07984 |  0:00:00s
 Epoch 3 took 0.17 seconds
epoch 3  | loss: 3.0548  | val_0_accuracy: 0.10729 |  0:00:00s
 Epoch 4 took 0.17 seconds
epoch 4  | loss: 2.9076  | val_0_accuracy: 0.10429 |  0:00:00s
 Epoch 5 took 0.16 seconds
epoch 5  | loss: 2.7534  | val_0_accuracy: 0.09431 |  0:00:00s
 Epoch 6 took 0.16 seconds
epoch 6  | loss: 2.60771 | val_0_accuracy: 0.13273 |  0:00:01s
 Epoch 7 took 0.17 seconds
epoch 7  | loss: 2.44963 | val_0_accuracy: 0.15519 |  0:00:01s
 Epoch 8 took 0.16 seconds
epoch 8  | lo

In [12]:
_, _, _, _, _, le = split_data(traindata, testdata)


In [13]:
import os
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

file_path = os.path.join("results", f"gtd{partition}.txt")

# Predict class indices for test set
y_pred = final_model.predict(Xtest)
y_proba = final_model.predict_proba(Xtest)
y_pred_decoded = le.inverse_transform(y_pred)
y_true_decoded = le.inverse_transform(Ytest)

# Make sure the directory exists
os.makedirs("results", exist_ok=True)

# Compute accuracy from decoded labels
acc = accuracy_score(y_true_decoded, y_pred_decoded)

# Write metrics to file
with open(file_path, "w") as file:
    file.write(f"Accuracy: {acc:.4f}\n")
    file.write(f"Precision weighted: {precision_score(y_true_decoded, y_pred_decoded, average='weighted'):.4f}\n")
    file.write(f"Recall weighted: {recall_score(y_true_decoded, y_pred_decoded, average='weighted'):.4f}\n")
    file.write(f"F1 Score weighted: {f1_score(y_true_decoded, y_pred_decoded, average='weighted'):.4f}\n")
    file.write(f"Precision micro: {precision_score(y_true_decoded, y_pred_decoded, average='micro'):.4f}\n")
    file.write(f"Recall micro: {recall_score(y_true_decoded, y_pred_decoded, average='micro'):.4f}\n")
    file.write(f"F1 Score micro: {f1_score(y_true_decoded, y_pred_decoded, average='micro'):.4f}\n")
    file.write(f"Precision macro: {precision_score(y_true_decoded, y_pred_decoded, average='macro'):.4f}\n")
    file.write(f"Recall macro: {recall_score(y_true_decoded, y_pred_decoded, average='macro'):.4f}\n")
    file.write(f"F1 Score macro: {f1_score(y_true_decoded, y_pred_decoded, average='macro'):.4f}\n")
    file.write(f"roc auc weighted: {roc_auc_score(y_true_decoded, y_proba, multi_class='ovr', average='weighted'):.4f}\n")
    file.write(f"roc auc macro: {roc_auc_score(y_true_decoded, y_proba, multi_class='ovr', average='macro'):.4f}\n")
    file.write(f"roc auc micro: {roc_auc_score(y_true_decoded, y_proba, multi_class='ovr', average='micro'):.4f}\n")

with open(f"results/epoch_time_gtd{partition}.txt", "w") as f:
    f.write('\n'.join(str(x) for x in epoch_timer.epoch_times))



In [14]:
print(classification_report(Ytest_decoded, y_pred_decoded))

                                                  precision    recall  f1-score   support

                          Abu Sayyaf Group (ASG)       0.90      0.99      0.94       144
        African National Congress (South Africa)       0.99      1.00      1.00       144
                                Al-Qaida in Iraq       0.74      0.78      0.76       144
        Al-Qaida in the Arabian Peninsula (AQAP)       0.87      0.94      0.90       144
                                      Al-Shabaab       0.99      1.00      0.99       144
             Basque Fatherland and Freedom (ETA)       1.00      0.98      0.99       144
                                      Boko Haram       0.93      0.99      0.96       144
  Communist Party of India - Maoist (CPI-Maoist)       0.88      0.88      0.88       144
       Corsican National Liberation Front (FLNC)       0.99      1.00      0.99       144
                       Donetsk People's Republic       0.99      1.00      1.00       144
Farabundo

In [15]:
def plot_confusion_matrix(y_true, y_pred, labels):
    from sklearn.metrics import confusion_matrix
    import matplotlib.pyplot as plt
    import seaborn as sns
    import numpy as np

    cm = confusion_matrix(y_true, y_pred, labels=labels)
    cm_normalized = cm.astype('float') / cm.sum(axis=1, keepdims=True)

    plt.figure(figsize=(18, 16))
    sns.heatmap(cm_normalized,
                annot=True,
                fmt=".2f",
                xticklabels=labels,
                yticklabels=labels,
                cmap="viridis",
                square=True,
                linewidths=0.5,
                cbar_kws={"shrink": 0.8})

    plt.title(f"Normalized Confusion Matrix", fontsize=18)
    plt.xlabel("Predicted Label", fontsize=14)
    plt.ylabel("True Label", fontsize=14)
    plt.xticks(rotation=90)
    plt.yticks(rotation=0)
    plt.tight_layout()

    # Save the figure
    save_path = f"results/confusion_matrix_partition_{partition}.png"
    plt.savefig(save_path, dpi=300)
    plt.close()

    print(f"Saved confusion matrix for partition {partition} to {save_path}")


In [16]:
# Get all unique class labels from the truths
class_labels = np.unique(Ytest_decoded)

plot_confusion_matrix(Ytest_decoded, y_pred_decoded, labels=class_labels)



Saved confusion matrix for partition 478 to results/confusion_matrix_partition_478.png
