In [64]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [65]:
RANDOM_SEED = 42
DATA_PATH = "data/"
DATA_FILE = "processed_traffic.parquet"

In [66]:
data = pd.read_parquet(DATA_PATH + DATA_FILE)
X, y = data.drop(columns=["Attack Name", "Label"]), data["Label"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_SEED)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((49742, 47), (12436, 47), (49742,), (12436,))

In [67]:
X_train.head()

Unnamed: 0,Src Port,Dst Port,Flow Duration,Total Fwd Packet,Total Bwd packets,Total Length of Fwd Packet,Total Length of Bwd Packet,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Mean,...,Bwd Packet/Bulk Avg,Bwd Bulk Rate Avg,FWD Init Win Bytes,Bwd Init Win Bytes,Fwd Act Data Pkts,Fwd Seg Size Min,Active Mean,Active Std,Idle Mean,Idle Std
50459,1883,52303,588990,2,4,0.0,15.0,0.0,0.0,3.75,...,0,0,509,502,0,32,0.0,0.0,0.0,0.0
43594,56478,18665,103667254,10,0,636.0,0.0,63.6,54.738165,0.0,...,0,0,14726,0,5,32,1618428.0,1759747.0,19438710.0,9732065.0
44213,45125,1883,110000796,16,16,178.0,4.0,11.125,6.075909,0.25,...,0,0,502,64,13,32,189683.5,421911.2,9810381.0,400337.8
17134,43231,1883,119998680,17,17,194.0,4.0,11.411765,5.990188,0.235294,...,0,0,502,64,14,32,173572.2,405264.6,9826317.0,385951.9
13320,40571,1883,61047003,5,4,17.0,4.0,3.4,5.458938,1.0,...,0,0,502,64,2,32,2600.0,0.0,59999800.0,0.0


In [68]:
y_train.head()

50459    1
43594    1
44213    1
17134    0
13320    0
Name: Label, dtype: int64

In [69]:
# Normalize the features
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)
X_train.head()

Unnamed: 0,Src Port,Dst Port,Flow Duration,Total Fwd Packet,Total Bwd packets,Total Length of Fwd Packet,Total Length of Bwd Packet,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Mean,...,Bwd Packet/Bulk Avg,Bwd Bulk Rate Avg,FWD Init Win Bytes,Bwd Init Win Bytes,Fwd Act Data Pkts,Fwd Seg Size Min,Active Mean,Active Std,Idle Mean,Idle Std
0,-1.759531,1.793392,-1.315682,-0.133439,-0.134831,-0.063223,-0.029439,-0.331845,-0.339586,-0.169665,...,-0.084939,-0.014693,-0.435118,-0.222006,-0.124596,0.640622,-0.238747,-0.284902,-0.709551,-0.206578
1,1.306773,0.201716,0.701594,-0.119532,-0.143527,-0.061783,-0.0295,0.144995,0.123035,-0.205212,...,-0.084939,-0.014693,0.24784,-0.259501,-0.115646,0.640622,0.08692,0.386412,0.153447,4.085911
2,0.669137,-0.592371,0.825543,-0.109101,-0.108743,-0.06282,-0.029483,-0.248436,-0.288235,-0.202842,...,-0.084939,-0.014693,-0.435455,-0.254721,-0.101326,0.640622,-0.200578,-0.12395,-0.274011,-0.030003
3,0.562761,-0.592371,1.021205,-0.107363,-0.106569,-0.062783,-0.029483,-0.246285,-0.28896,-0.202981,...,-0.084939,-0.014693,-0.435455,-0.254721,-0.099536,0.640622,-0.20382,-0.1303,-0.273303,-0.036348
4,0.413364,-0.592371,-0.132499,-0.128224,-0.134831,-0.063184,-0.029483,-0.306354,-0.29345,-0.195733,...,-0.084939,-0.014693,-0.435455,-0.254721,-0.121016,0.640622,-0.238224,-0.284902,1.954191,-0.206578


In [70]:
import torch

# Convert to tensors
X_train_tensor, y_train_tensor = torch.tensor(X_train.values, dtype=torch.float32), torch.tensor(y_train.values, dtype=torch.long)
X_test_tensor, y_test_tensor = torch.tensor(X_test.values, dtype=torch.float32), torch.tensor(y_test.values, dtype=torch.long)
X_train_tensor.shape, y_train_tensor.shape, X_test_tensor.shape, y_test_tensor.shape

(torch.Size([49742, 47]),
 torch.Size([49742]),
 torch.Size([12436, 47]),
 torch.Size([12436]))

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class IDSModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(IDSModel, self).__init__()
        
        self.net = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, num_classes)
        )
    
    def forward(self, x):
        return self.net(x)

In [72]:
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import classification_report

def training_loop(dataloader, epochs, model, criterion, optimizer):
    for epoch in range(epochs):
        running_loss = 0.0
        for inputs, labels in dataloader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(dataloader)}")
    print("Training complete.")

def train_model(X_train, y_train, input_size, hidden_size, num_classes, batch_size=64, epochs=10, learning_rate=0.001):
    torch.manual_seed(RANDOM_SEED)

    dataset = TensorDataset(X_train, y_train)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    model = IDSModel(input_size, hidden_size, num_classes)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    training_loop(dataloader, epochs, model, criterion, optimizer)
    return model

def evaluate_model(model, X_test, y_test):
    model.eval()
    with torch.no_grad():
        outputs = model(X_test)
        _, predicted = torch.max(outputs, 1)
    print(classification_report(y_test, predicted))

In [73]:
model32 = train_model(X_train_tensor, y_train_tensor, input_size=X_train_tensor.shape[1], hidden_size=128, num_classes=len(y.unique()), epochs=20)
evaluate_model(model32, X_test_tensor, y_test_tensor)

Epoch 1/20, Loss: 0.32794344720160135
Epoch 2/20, Loss: 0.2917717887629733
Epoch 3/20, Loss: 0.2833323386093185
Epoch 4/20, Loss: 0.2791682837439993
Epoch 5/20, Loss: 0.2716293597933872
Epoch 6/20, Loss: 0.26506821984535317
Epoch 7/20, Loss: 0.25847680366820724
Epoch 8/20, Loss: 0.2495082677330027
Epoch 9/20, Loss: 0.23858846098682568
Epoch 10/20, Loss: 0.22824930485845532
Epoch 11/20, Loss: 0.21696934954845049
Epoch 12/20, Loss: 0.20879326258956007
Epoch 13/20, Loss: 0.20132798349106526
Epoch 14/20, Loss: 0.19597998227933686
Epoch 15/20, Loss: 0.18973874877707358
Epoch 16/20, Loss: 0.18445781569994968
Epoch 17/20, Loss: 0.18014004827982202
Epoch 18/20, Loss: 0.1784494127168447
Epoch 19/20, Loss: 0.17370629881372182
Epoch 20/20, Loss: 0.17233818754132424
Training complete.
              precision    recall  f1-score   support

           0       0.91      0.98      0.94      6476
           1       0.98      0.89      0.93      5960

    accuracy                           0.94     1243