# Loading dataset

## Loading from HuggingFace

In [1]:
!pip install huggingface_hub -q
from huggingface_hub import notebook_login
notebook_login('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')

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m




VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [2]:
!pip install datasets -q

from datasets import load_dataset

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [3]:
dataset = load_dataset("gosamab/binetflow-dataset")

In [4]:
import pandas as pd

train_split = dataset['train']
test_split = dataset['test']

train_df = train_split.to_pandas()
test_df = test_split.to_pandas()

print(train_df.info())
print(train_df.describe())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16166417 entries, 0 to 16166416
Data columns (total 17 columns):
 #   Column             Dtype  
---  ------             -----  
 0   Duration           float64
 1   SrcPort            int64  
 2   DstPort            int64  
 3   sTos               int64  
 4   dTos               int64  
 5   TotPkts            int64  
 6   TotBytes           int64  
 7   SrcBytes           float64
 8   Label              object 
 9   label_binary       int64  
 10  label_multi        object 
 11  source_file        object 
 12  dataset            object 
 13  PktByteRatio       float64
 14  BytePerPkt         float64
 15  SrcByteRatio       float64
 16  __index_level_0__  int64  
dtypes: float64(5), int64(8), object(4)
memory usage: 2.0+ GB
None
           Duration       SrcPort       DstPort          sTos          dTos  \
count  1.616642e+07  1.616642e+07  1.616642e+07  1.616642e+07  1.616642e+07   
mean   2.849303e+02  3.766306e+04  7.102033e+03  8.5

## Selecting features

In [5]:
!pip install scikit-learn -q
from sklearn.model_selection import train_test_split
import pandas as pd

features = ['Duration', 'SrcPort', 'DstPort', 'sTos', 'dTos', 'TotPkts', 'TotBytes']
target = 'label_binary'

for col in features:
    if pd.api.types.is_numeric_dtype(train_df[col]):
        train_df[col] = train_df[col].fillna(train_df[col].mean())

X_train = train_df[features]
y_train = train_df[target]

X_test = test_df[features]
y_test = test_df[target]

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


# Binary Class Models

## Logistic Regression Model

In [10]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train, y_train)

lr_y_pred = lr_model.predict(X_test)

accuracy = accuracy_score(y_test, lr_y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.9780458560285193


In [11]:
from sklearn.metrics import precision_recall_fscore_support

precision, recall, fscore, _ = precision_recall_fscore_support(y_test, lr_y_pred, average='weighted')
print(f"Weighted Average Precision: {precision}")
print(f"Weighted Average Recall: {recall}")
print(f"Weighted Average F-score: {fscore}")

precision_macro, recall_macro, fscore_macro, _ = precision_recall_fscore_support(y_test, lr_y_pred, average='macro')
print(f"Macro Average Precision: {precision_macro}")
print(f"Macro Average Recall: {recall_macro}")
print(f"Macro Average F-score: {fscore_macro}")

Weighted Average Precision: 0.9565746538469136
Weighted Average Recall: 0.9780458560285193
Weighted Average F-score: 0.967191106887551
Macro Average Precision: 0.4890231700087391
Macro Average Recall: 0.49999974701976635
Macro Average F-score: 0.494450547264976


## XGBoost Model

In [14]:
!pip install xgboost -q

import xgboost as xgb

dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

params = {
    'objective': 'binary:logistic',
    'tree_method': 'hist',
    'device': 'cuda',
    'eval_metric': 'logloss',
}

model = xgb.train(params, dtrain, num_boost_round=100)

xgb_y_pred = model.predict(dtest)

xgb_y_pred_binary = [1 if pred >= 0.5 else 0 for pred in xgb_y_pred]

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [15]:
from sklearn.metrics import accuracy_score

xgb_accuracy = accuracy_score(y_test, xgb_y_pred_binary)
print(f"XGBoost Accuracy: {xgb_accuracy}")

xgb_precision, xgb_recall, xgb_fscore, _ = precision_recall_fscore_support(y_test, xgb_y_pred_binary, average='weighted')
print(f"XGBoost Weighted Average Precision: {xgb_precision}")
print(f"XGBoost Weighted Average Recall: {xgb_recall}")
print(f"XGBoost Weighted Average F-score: {xgb_fscore}")

xgb_precision_macro, xgb_recall_macro, xgb_fscore_macro, _ = precision_recall_fscore_support(y_test, xgb_y_pred_binary, average='macro')
print(f"XGBoost Macro Average Precision: {xgb_precision_macro}")
print(f"XGBoost Macro Average Recall: {xgb_recall_macro}")
print(f"XGBoost Macro Average F-score: {xgb_fscore_macro}")

XGBoost Accuracy: 0.9806022655350373
XGBoost Weighted Average Precision: 0.980811786726317
XGBoost Weighted Average Recall: 0.9806022655350373
XGBoost Weighted Average F-score: 0.9807055005870191
XGBoost Macro Average Precision: 0.7736415433068919
XGBoost Macro Average Recall: 0.7795295296944875
XGBoost Macro Average F-score: 0.776552109535178


## LSTM Model

In [17]:
import torch

if torch.cuda.is_available():
    print(f"CUDA is available. Device count: {torch.cuda.device_count()}")
    print(f"Current device: {torch.cuda.current_device()}")
    print(f"Device name: {torch.cuda.get_device_name(torch.cuda.current_device())}")
else:
    print("CUDA is NOT available.")

CUDA is available. Device count: 1
Current device: 0
Device name: NVIDIA A40


In [21]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

input_size = X_train.shape[1]
hidden_size = 64
num_layers = 2
num_classes = 2
learning_rate = 0.001
num_epochs = 2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LSTMModel(input_size, hidden_size, num_layers, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

total_step = len(train_loader)

from tqdm import tqdm

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    with tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}") as pbar:
        for i, (data, labels) in enumerate(pbar):
            data = data.to(device)
            labels = labels.to(device)

            data = data.unsqueeze(1)

            outputs = model(data)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            pbar.set_postfix(loss=loss.item())

Epoch 1/2: 100%|██████████| 63151/63151 [09:02<00:00, 116.34it/s, loss=0.0242] 
Epoch 2/2: 100%|██████████| 63151/63151 [08:44<00:00, 120.30it/s, loss=0.00537]


In [22]:
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
import torch

X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)

model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for data, labels in tqdm(test_loader, desc="Evaluating LSTM"):
        data = data.to(device).unsqueeze(1)
        labels = labels.to(device)
        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        all_preds.append(predicted.cpu())
        all_labels.append(labels.cpu())

all_preds = torch.cat(all_preds)
all_labels = torch.cat(all_labels)

lstm_accuracy = (all_preds == all_labels).sum().item() / len(all_labels)
print(f"LSTM Test Accuracy: {lstm_accuracy:.4f}")

Evaluating LSTM: 100%|██████████| 3947/3947 [02:09<00:00, 30.37it/s]

LSTM Test Accuracy: 0.9907





In [23]:
from sklearn.metrics import precision_recall_fscore_support

lstm_precision, lstm_recall, lstm_fscore, _ = precision_recall_fscore_support(
    all_labels, all_preds, average='weighted'
)
print(f"LSTM Weighted Average Precision: {lstm_precision:.4f}")
print(f"LSTM Weighted Average Recall: {lstm_recall:.4f}")
print(f"LSTM Weighted Average F-score: {lstm_fscore:.4f}")

lstm_precision_macro, lstm_recall_macro, lstm_fscore_macro, _ = precision_recall_fscore_support(
    all_labels, all_preds, average='macro'
)
print(f"LSTM Macro Average Precision: {lstm_precision_macro:.4f}")
print(f"LSTM Macro Average Recall: {lstm_recall_macro:.4f}")
print(f"LSTM Macro Average F-score: {lstm_fscore_macro:.4f}")

LSTM Weighted Average Precision: 0.9901
LSTM Weighted Average Recall: 0.9907
LSTM Weighted Average F-score: 0.9903
LSTM Macro Average Precision: 0.9160
LSTM Macro Average Recall: 0.8541
LSTM Macro Average F-score: 0.8824


## SVM

In [12]:
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

svm_model = LinearSVC(max_iter=10000, dual=False)
svm_model.fit(X_train, y_train)

In [13]:
svm_y_pred = svm_model.predict(X_test)

svm_accuracy = accuracy_score(y_test, svm_y_pred)
print(f"Linear SVM Accuracy: {svm_accuracy:.4f}")

svm_precision, svm_recall, svm_fscore, _ = precision_recall_fscore_support(
    y_test, svm_y_pred, average='weighted'
)
print(f"Weighted Precision: {svm_precision:.4f}")
print(f"Weighted Recall:    {svm_recall:.4f}")
print(f"Weighted F-score:   {svm_fscore:.4f}")

svm_precision_macro, svm_recall_macro, svm_fscore_macro, _ = precision_recall_fscore_support(
    y_test, svm_y_pred, average='macro'
)
print(f"Macro Precision: {svm_precision_macro:.4f}")
print(f"Macro Recall:    {svm_recall_macro:.4f}")
print(f"Macro F-score:   {svm_fscore_macro:.4f}")

Linear SVM Accuracy: 0.9780
Weighted Precision: 0.9566
Weighted Recall:    0.9780
Weighted F-score:   0.9672
Macro Precision: 0.4890
Macro Recall:    0.5000
Macro F-score:   0.4945


# Multi-Class Models

## Logistic Regression

In [17]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, precision_recall_fscore_support, accuracy_score
from sklearn.model_selection import train_test_split

label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

lr_model_multi = OneVsRestClassifier(LogisticRegression(max_iter=1000, solver='lbfgs'))
lr_model_multi.fit(X_train, y_train_encoded)

In [21]:
y_pred = lr_model_multi.predict(X_test)

accuracy = accuracy_score(y_test_encoded, y_pred)
weighted_precision, weighted_recall, weighted_f1, _ = precision_recall_fscore_support(
    y_test_encoded, y_pred, average='weighted'
)
macro_precision, macro_recall, macro_f1, _ = precision_recall_fscore_support(
    y_test_encoded, y_pred, average='macro'
)

print(f"Logistic Regression Multi-class Accuracy: {accuracy:.4f}")
print(f"Weighted Precision: {weighted_precision:.4f}")
print(f"Weighted Recall:    {weighted_recall:.4f}")
print(f"Weighted F1-Score:  {weighted_f1:.4f}")
print(f"Macro Precision:    {macro_precision:.4f}")
print(f"Macro Recall:       {macro_recall:.4f}")
print(f"Macro F1-Score:     {macro_f1:.4f}")

Logistic Regression Multi-class Accuracy: 0.9780
Weighted Precision: 0.9566
Weighted Recall:    0.9780
Weighted F1-Score:  0.9672
Macro Precision:    0.4890
Macro Recall:       0.5000
Macro F1-Score:     0.4945


## XGBoost

In [19]:
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report

label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

xgb_model_multi = xgb.XGBClassifier(
    objective='multi:softmax',
    num_class=len(label_encoder.classes_),
    eval_metric='mlogloss',
    tree_method='hist',
    device='cuda',
    use_label_encoder=False,
    max_depth=6,
    learning_rate=0.1,
    n_estimators=100
)

xgb_model_multi.fit(X_train, y_train_encoded)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [22]:
y_pred = xgb_model_multi.predict(X_test)

accuracy = accuracy_score(y_test_encoded, y_pred)

weighted_precision, weighted_recall, weighted_f1, _ = precision_recall_fscore_support(
    y_test_encoded, y_pred, average='weighted'
)

macro_precision, macro_recall, macro_f1, _ = precision_recall_fscore_support(
    y_test_encoded, y_pred, average='macro'
)

print(f"XGBoost Multi-class Accuracy: {accuracy:.4f}")
print(f"Weighted Precision: {weighted_precision:.4f}")
print(f"Weighted Recall:    {weighted_recall:.4f}")
print(f"Weighted F1-Score:  {weighted_f1:.4f}")
print(f"Macro Precision:    {macro_precision:.4f}")
print(f"Macro Recall:       {macro_recall:.4f}")
print(f"Macro F1-Score:     {macro_f1:.4f}")

XGBoost Multi-class Accuracy: 0.9970
Weighted Precision: 0.9970
Weighted Recall:    0.9970
Weighted F1-Score:  0.9970
Macro Precision:    0.9744
Macro Recall:       0.9556
Macro F1-Score:     0.9648


## LSTM Model

In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report

label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_encoded, dtype=torch.long)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_encoded, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)

class LSTMMulti(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMMulti, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        return self.fc(out[:, -1, :])

input_size = X_train.shape[1]
num_classes = len(label_encoder.classes_)
model = LSTMMulti(input_size, hidden_size=64, num_layers=2, num_classes=num_classes).to('cuda')
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

from tqdm import tqdm
for epoch in range(2):
    model.train()
    with tqdm(train_loader, desc=f"LSTM Epoch {epoch+1}") as pbar:
        for batch_x, batch_y in pbar:
            batch_x = batch_x.unsqueeze(1).to('cuda')
            batch_y = batch_y.to('cuda')

            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            pbar.set_postfix(loss=loss.item())

LSTM Epoch 1: 100%|██████████| 63151/63151 [08:43<00:00, 120.70it/s, loss=0.0034] 
LSTM Epoch 2: 100%|██████████| 63151/63151 [08:16<00:00, 127.08it/s, loss=0.00491]


In [24]:
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for batch_x, batch_y in tqdm(test_loader, desc="Evaluating LSTM"):
        batch_x = batch_x.unsqueeze(1).to('cuda')
        outputs = model(batch_x)
        _, predicted = torch.max(outputs.data, 1)
        all_preds.append(predicted.cpu())
        all_labels.append(batch_y)

all_preds = torch.cat(all_preds)
all_labels = torch.cat(all_labels)

acc = accuracy_score(all_labels, all_preds)
weighted_p, weighted_r, weighted_f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='weighted')
macro_p, macro_r, macro_f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='macro')

print(f"LSTM Multi-class Accuracy: {acc:.4f}")
print(f"Weighted Precision: {weighted_p:.4f}")
print(f"Weighted Recall:    {weighted_r:.4f}")
print(f"Weighted F1-Score:  {weighted_f1:.4f}")
print(f"Macro Precision:    {macro_p:.4f}")
print(f"Macro Recall:       {macro_r:.4f}")
print(f"Macro F1-Score:     {macro_f1:.4f}")

Evaluating LSTM: 100%|██████████| 3947/3947 [02:06<00:00, 31.26it/s]


LSTM Multi-class Accuracy: 0.9901
Weighted Precision: 0.9909
Weighted Recall:    0.9901
Weighted F1-Score:  0.9904
Macro Precision:    0.8685
Macro Recall:       0.9196
Macro F1-Score:     0.8923


## SVM Model

In [25]:
from sklearn.svm import LinearSVC
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report

label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

svm_model_multi = LinearSVC(max_iter=10000, dual=False)
svm_model_multi.fit(X_train, y_train_encoded)

In [26]:
y_pred = svm_model_multi.predict(X_test)

acc = accuracy_score(y_test_encoded, y_pred)
weighted_p, weighted_r, weighted_f1, _ = precision_recall_fscore_support(y_test_encoded, y_pred, average='weighted')
macro_p, macro_r, macro_f1, _ = precision_recall_fscore_support(y_test_encoded, y_pred, average='macro')

print(f"Linear SVM Multi-class Accuracy: {acc:.4f}")
print(f"Weighted Precision: {weighted_p:.4f}")
print(f"Weighted Recall:    {weighted_r:.4f}")
print(f"Weighted F1-Score:  {weighted_f1:.4f}")
print(f"Macro Precision:    {macro_p:.4f}")
print(f"Macro Recall:       {macro_r:.4f}")
print(f"Macro F1-Score:     {macro_f1:.4f}")

Linear SVM Multi-class Accuracy: 0.9780
Weighted Precision: 0.9566
Weighted Recall:    0.9780
Weighted F1-Score:  0.9672
Macro Precision:    0.4890
Macro Recall:       0.5000
Macro F1-Score:     0.4945
