In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from pytorch_tabnet.tab_model import TabNetClassifier
from statistics import mean
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import numpy as np

In [6]:
def get_model():
    model = TabNetClassifier(verbose=0, seed=42)
    return model

In [3]:
dataset_path = 'dataset.csv'
dataset = pd.read_csv(dataset_path)

In [4]:
dataset

Unnamed: 0,Cografya,Yerlesim,AracYogunlugu,YayaYogunlugu,VeriHizi,Guvenilirlik,Gecikme,Kapsama,Standart
0,5,6,6,7,9,6,4,6,1
1,6,2,6,1,3,2,6,7,0
2,5,1,6,9,3,6,6,5,1
3,8,2,3,5,7,5,7,9,0
4,5,7,4,8,5,9,5,6,1
...,...,...,...,...,...,...,...,...,...
9995,4,6,4,4,1,9,9,9,0
9996,1,10,6,10,6,4,8,5,0
9997,4,4,2,6,4,4,9,5,0
9998,4,9,7,2,5,4,2,7,0


In [5]:
x = dataset.iloc[:, :-1]
y = dataset.iloc[:, -1:]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, shuffle=True, stratify=y)

In [30]:
num_folds = 5
skf = StratifiedKFold(n_splits=num_folds, random_state=42, shuffle=True)
batch_sizes = [2**i for i in range(7,11)]
results = []

for batch_size in batch_sizes:
    accuracy_list = []
    for train_cv_idx, val_idx in skf.split(x_train, y_train):
        # TabNet models are compatible with NumPy array (Do not use Pandas DataFrame!)
        x_train_cv, x_val = x_train.iloc[train_cv_idx].values, x_train.iloc[val_idx].values
        y_train_cv, y_val = y_train.iloc[train_cv_idx].values.ravel(), y_train.iloc[val_idx].values.ravel()
        model = get_model()
        
        model.fit(x_train_cv, y_train_cv,
                  eval_set=[(x_val, y_val)],
                  eval_metric=['accuracy'],
                  max_epochs=100, patience=15, batch_size=batch_size)
        
        pred = model.predict(x_val)
        accuracy = accuracy_score(y_val, pred)
        accuracy_list.append(accuracy)
    print('Validation accuracy for batch size =\t', batch_size, '\t:', mean(accuracy_list))
    results.append(mean(accuracy_list))
    
max_val = max(results)
max_idx = results.index(max_val)

print('*' * 50)
print('The best validation accuracy was obtained with batch size =', batch_sizes[max_idx], 'as', max_val)


Early stopping occurred at epoch 80 with best_epoch = 65 and best_val_0_accuracy = 0.8875
Best weights from best epoch are automatically used!

Early stopping occurred at epoch 35 with best_epoch = 20 and best_val_0_accuracy = 0.86062
Best weights from best epoch are automatically used!

Early stopping occurred at epoch 65 with best_epoch = 50 and best_val_0_accuracy = 0.87438
Best weights from best epoch are automatically used!

Early stopping occurred at epoch 60 with best_epoch = 45 and best_val_0_accuracy = 0.865
Best weights from best epoch are automatically used!

Early stopping occurred at epoch 70 with best_epoch = 55 and best_val_0_accuracy = 0.88875
Best weights from best epoch are automatically used!
Validation accuracy for batch size =	 128 	: 0.87525

Early stopping occurred at epoch 77 with best_epoch = 62 and best_val_0_accuracy = 0.8875
Best weights from best epoch are automatically used!

Early stopping occurred at epoch 50 with best_epoch = 35 and best_val_0_accuracy

In [34]:
model = get_model()

model.fit(x_train.values, y_train.values.ravel(),
          eval_set=[(x_train.values, y_train.values.ravel())],
          eval_metric=['accuracy'],
          max_epochs=100, patience=15, batch_size=batch_sizes[max_idx])

pred = model.predict(x_test.values)

cm = confusion_matrix(y_test, pred)
print(cm)

Stop training because you reached max_epochs = 100 with best_epoch = 97 and best_val_0_accuracy = 0.90375
Best weights from best epoch are automatically used!
[[1073  118]
 [ 123  686]]


In [35]:
print('Test accuracy:', accuracy_score(y_test, pred))

Test accuracy: 0.8795


In [36]:
label_names = ['3GPP', 'IEEE']
report = classification_report(y_test, pred, target_names=label_names)
print(report)

              precision    recall  f1-score   support

        3GPP       0.90      0.90      0.90      1191
        IEEE       0.85      0.85      0.85       809

    accuracy                           0.88      2000
   macro avg       0.88      0.87      0.87      2000
weighted avg       0.88      0.88      0.88      2000



In [37]:
model.save_model('tabnet_model')

Successfully saved model at tabnet_model.zip


'tabnet_model.zip'