In [92]:
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split

In [93]:
DATASET_PATH = '../datasets/data/different_features/'
MODEL_PATH = '../models/different_features_model.pth'
EPOCHS = 20
BATCH_SIZE = 1
LEARNING_RATE = 0.00001

In [94]:
CATEGORIES = [
    'games',
    'music',
    'social_network',
    'video_hosting',
    'cloud_service',
    'e-mail',
    'other'
]

In [95]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [96]:
data = pd.read_csv(DATASET_PATH + 'traffic_features.csv', delimiter=',')

In [97]:
data.head()

Unnamed: 0,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Max,...,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,57.857927,3896,3895,2363942,2369599,1309,52,606.761294,442.746474,1331,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,games
1,60.046756,5955,5954,5067377,5047463,1342,50,850.94492,393.229049,1342,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,games
2,47.894942,16,7060,1056,7946200,66,66,66.0,0.0,3855,...,32,2.245078,1.220733,4.354972,1.101462,2.245078,1.220733,4.354972,1.101462,games
3,56.5757,12,4092,792,5314966,66,66,66.0,0.0,1506,...,32,2.437204,1.077658,4.558837,1.028184,2.437204,1.077658,4.558837,1.028184,games
4,59.827636,0,147,0,64804,0,0,0.0,0.0,16140,...,0,2.624479,1.184121,4.886324,1.201119,2.624479,1.184121,4.886324,1.201119,games


In [98]:
data['Label'].unique()

array(['games', 'music', 'social_network', 'video_hosting',
       'cloud_service', 'e-mail', 'other'], dtype=object)

In [99]:
le = LabelEncoder()
data['Label'] = le.fit_transform(data['Label'])
print(f"Закодированные метки: {dict(zip(le.classes_, range(len(le.classes_))))}")

Закодированные метки: {'cloud_service': 0, 'e-mail': 1, 'games': 2, 'music': 3, 'other': 4, 'social_network': 5, 'video_hosting': 6}


In [100]:
# Разделение признаков и меток
X = data.drop('Label', axis=1).values
y = data['Label'].values

# Нормализация признаков
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Разделение на train и validation
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=13052003, stratify=y
)
print(f"Train: {X_train.shape[0]} строк, Validation: {X_val.shape[0]} строк")

# Преобразование в тензоры
X_train_tensor = torch.FloatTensor(X_train).to(device)
y_train_tensor = torch.LongTensor(y_train).to(device)
X_val_tensor = torch.FloatTensor(X_val).to(device)
y_val_tensor = torch.LongTensor(y_val).to(device)

# Создание DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

Train: 168 строк, Validation: 42 строк


In [101]:
class TrafficClassifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(TrafficClassifier, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Dropout(0.3),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        return self.model(x)

In [102]:
input_size = X_train.shape[1]
num_classes = len(CATEGORIES)
model = TrafficClassifier(input_size, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [103]:
print("\nОбучение модели...")
for epoch in range(EPOCHS):
    model.train()
    train_loss = 0
    train_correct = 0
    train_total = 0

    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        train_total += y_batch.size(0)
        train_correct += (predicted == y_batch).sum().item()

    train_loss /= len(train_loader)
    train_accuracy = 100 * train_correct / train_total

    # Валидация
    model.eval()
    val_loss = 0
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += y_batch.size(0)
            val_correct += (predicted == y_batch).sum().item()

    val_loss /= len(val_loader)
    val_accuracy = 100 * val_correct / val_total

    print(f"Эпоха {epoch+1}/{EPOCHS}: "
          f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, "
          f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%")

# Сохранение модели
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
torch.save(model.state_dict(), MODEL_PATH)
print(f"\nМодель сохранена в {MODEL_PATH}")

# Итоговые метки
print(f"\nСоответствие меток: {dict(zip(range(len(le.classes_)), le.classes_))}")


Обучение модели...
Эпоха 1/20: Train Loss: 1.9515, Train Accuracy: 14.29%, Val Loss: 1.9505, Val Accuracy: 14.29%
Эпоха 2/20: Train Loss: 1.9542, Train Accuracy: 14.88%, Val Loss: 1.9480, Val Accuracy: 14.29%
Эпоха 3/20: Train Loss: 1.9554, Train Accuracy: 14.29%, Val Loss: 1.9454, Val Accuracy: 14.29%
Эпоха 4/20: Train Loss: 1.9453, Train Accuracy: 16.67%, Val Loss: 1.9428, Val Accuracy: 14.29%
Эпоха 5/20: Train Loss: 1.9430, Train Accuracy: 15.48%, Val Loss: 1.9401, Val Accuracy: 14.29%
Эпоха 6/20: Train Loss: 1.9394, Train Accuracy: 16.67%, Val Loss: 1.9375, Val Accuracy: 14.29%
Эпоха 7/20: Train Loss: 1.9415, Train Accuracy: 14.29%, Val Loss: 1.9348, Val Accuracy: 14.29%
Эпоха 8/20: Train Loss: 1.9388, Train Accuracy: 18.45%, Val Loss: 1.9323, Val Accuracy: 16.67%
Эпоха 9/20: Train Loss: 1.9379, Train Accuracy: 16.07%, Val Loss: 1.9298, Val Accuracy: 16.67%
Эпоха 10/20: Train Loss: 1.9334, Train Accuracy: 17.86%, Val Loss: 1.9273, Val Accuracy: 16.67%
Эпоха 11/20: Train Loss: 1.93