In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import os, glob
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input, Conv1D, MaxPooling1D, Flatten, LSTM, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
def load_dataset_from_structure(root_path):
    data = []
    for file in root_path.glob('*/*/*.csv'):
        try:
            df = pd.read_csv(file)

            category = file.parents[1].name  # DDoS, DoS, etc.
            attack = file.parent.name        # DDoS ICMP, DoS TCP, etc.
            label_class = 'Benign' if category.upper() == 'BENIGN' else 'Attack'

            df['category'] = category
            df['attack'] = attack
            df['class'] = label_class

            data.append(df)
        except Exception as e:
            print(f"[ERROR] Failed to read file {file}: {e}")
    return pd.concat(data, ignore_index=True)

# Load train
train_root = Path('../../../Data/CICIoMT2024/train')
train_df = load_dataset_from_structure(train_root)

# Load test
test_root = Path('../../../Data/CICIoMT2024/test')
test_df = load_dataset_from_structure(test_root)

# Cek ringkasan
print("Train set:", train_df.shape)
print(train_df[['category', 'attack', 'class']].value_counts())
print("\nTest set:", test_df.shape)
print(test_df[['category', 'attack', 'class']].value_counts())

# Gabungkan train dan test menjadi satu DataFrame
df = pd.concat([train_df, test_df], ignore_index=True)

In [None]:
# Encode label (binary: Attack/Benign)
le = LabelEncoder()
y_encoded = le.fit_transform(df['category'])
y_categorical = to_categorical(y_encoded)

# Features
X = df.drop(['class', 'category', 'attack'], axis=1, errors='ignore').select_dtypes(include=[np.number])
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Reshape for sequence models
timesteps = 3
features_per_step = X_scaled.shape[1] // timesteps
X_seq = X_scaled[:, :timesteps * features_per_step].reshape((-1, timesteps, features_per_step))

# Split
X_train_ann, X_test_ann, y_train, y_test = train_test_split(X_scaled, y_categorical, test_size=0.2, random_state=42)
X_train_seq, X_test_seq, _, _ = train_test_split(X_seq, y_categorical, test_size=0.2, random_state=42)

In [None]:
# Define Models
ann = Sequential([
    Input(shape=(X_train_ann.shape[1],)),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(y_categorical.shape[1], activation='softmax')
])
ann.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

dnn = Sequential([
    Input(shape=(X_train_ann.shape[1],)),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(y_categorical.shape[1], activation='softmax')
])
dnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

mlp = Sequential([
    Input(shape=(X_train_ann.shape[1],)),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(y_categorical.shape[1], activation='softmax')
])
mlp.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

cnn = Sequential([
    Input(shape=(timesteps, features_per_step)),
    Conv1D(64, kernel_size=2, activation='relu'),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(y_categorical.shape[1], activation='softmax')
])
cnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

lstm = Sequential([
    Input(shape=(timesteps, features_per_step)),
    LSTM(64),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dense(y_categorical.shape[1], activation='softmax')
])
lstm.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

bilstm = Sequential([
    Input(shape=(timesteps, features_per_step)),
    Bidirectional(LSTM(64)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dense(y_categorical.shape[1], activation='softmax')
])
bilstm.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Simpan semua model
models = {
    "ANN": (ann, X_train_ann, X_test_ann),
    "DNN": (dnn, X_train_ann, X_test_ann),
    "MLP": (mlp, X_train_ann, X_test_ann),
    "CNN": (cnn, X_train_seq, X_test_seq),
    "LSTM": (lstm, X_train_seq, X_test_seq),
    "BiLSTM": (bilstm, X_train_seq, X_test_seq)
}

In [None]:
# Evaluasi dan visualisasi
from sklearn.metrics import classification_report, confusion_matrix

y_test_labels = np.argmax(y_test, axis=1)
label_names = le.inverse_transform(np.arange(y_test.shape[1]))

results = {}
for name, (model, X_train, X_test) in models.items():
    print(f"Evaluating model: {name}")
    y_pred_proba = model.predict(X_test)
    y_pred = np.argmax(y_pred_proba, axis=1)

    print(f"\nClassification Report - {name}")
    report = classification_report(y_test_labels, y_pred, target_names=label_names, output_dict=True)
    results[name] = report
    print(classification_report(y_test_labels, y_pred, target_names=label_names))

    cm = confusion_matrix(y_test_labels, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', xticklabels=label_names, yticklabels=label_names, cmap='Blues')
    plt.title(f"Confusion Matrix - {name}")
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()

# F1-Score summary
df_f1 = pd.DataFrame({model: pd.DataFrame(report).loc["f1-score"] for model, report in results.items()})
df_f1.drop(index=["accuracy", "macro avg", "weighted avg"], inplace=True)
df_f1.plot(kind='bar', figsize=(12, 6), title='F1-Score per Class per Model')
plt.ylabel("F1-Score")
plt.xticks(rotation=45, ha='right')
plt.grid(True, linestyle='--', linewidth=0.5)
plt.tight_layout()
plt.show()