In [2]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.5-cp310-cp310-manylinux2014_x86_64.whl (98.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.2/98.2 MB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: catboost
Successfully installed catboost-1.2.5


In [3]:
import pickle
import numpy as np
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, BaggingClassifier
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

In [4]:
# Đọc dữ liệu từ file data.pkl.File nay chua du lieu da duoc can bang voi WGAN va ENN-Kmean
with open('/content/drive/MyDrive/nsl-kdd-balanced.pkl', 'rb') as file:
    data_loaded = pickle.load(file)

X_train = data_loaded['X_train']
X_test = data_loaded['X_test']
y_train = data_loaded['y_train']
y_test = data_loaded['y_test']

In [5]:
unique_values, counts = np.unique(y_train, return_counts=True)

# Hiển thị các giá trị và tần suất xuất hiện tương ứng
for value, count in zip(unique_values, counts):
    print(f'{value}: {count}')

0: 14000
1: 14000
2: 14000
3: 14000
4: 14000


In [6]:
unique_values, counts = np.unique(y_test, return_counts=True)

# Hiển thị các giá trị và tần suất xuất hiện tương ứng
for value, count in zip(unique_values, counts):
    print(f'{value}: {count}')

0: 6000
1: 6000
2: 2421
3: 67
4: 2885


In [7]:
X_train[:10]

array([[0.        , 0.5       , 0.34782609, 0.1       , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.00195695, 0.00391389, 0.        ,
        0.        , 1.        , 1.        , 1.        , 0.        ,
        1.        , 0.39215686, 1.        , 1.        , 0.        ,
        0.01      , 0.16      , 0.        , 0.        , 1.        ,
        1.        ],
       [0.        , 0.5       , 0.34782609, 0.1       , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.00195695, 0.00391389, 0.        ,
        0.        , 1.        , 1.        , 1.        , 0.        ,
        1.        , 0.40392

In [8]:
y_train[:5]

array([0, 0, 0, 0, 0])

In [9]:
# Assuming X_train, X_test, y_train, y_test are already defined and preprocessed.
num_classes = len(np.unique(np.concatenate([y_train, y_test])))

# Define and train models
xgb = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
xgb.fit(X_train, y_train)

cbt = CatBoostClassifier(verbose=0)
cbt.fit(X_train, y_train)

gbm = GradientBoostingClassifier()
gbm.fit(X_train, y_train)

bme = BaggingClassifier(base_estimator=RandomForestClassifier(), n_estimators=10)
bme.fit(X_train, y_train)

# Deep Neural Network
dnn = Sequential([
    Dense(128, activation='relu', input_dim=X_train.shape[1]),
    Dense(64, activation='relu'),
    Dense(num_classes, activation='softmax')
])
dnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
dnn.fit(X_train, to_categorical(y_train, num_classes=num_classes), epochs=10, batch_size=32, verbose=0)

# Predict probabilities
prob_xgb = xgb.predict_proba(X_test)
prob_cbt = cbt.predict_proba(X_test)
prob_gbm = gbm.predict_proba(X_test)
prob_bme = bme.predict_proba(X_test)
prob_dnn = dnn.predict(X_test)  # This already gives probabilities due to softmax







In [10]:
def evaluate_model(y_true, predictions):
    acc = accuracy_score(y_true, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, predictions, average='weighted')
    return acc, precision, recall, f1

In [11]:
# Convert predicted probabilities to class labels for evaluation
predictions_xgb = np.argmax(prob_xgb, axis=1)
predictions_cbt = np.argmax(prob_cbt, axis=1)
predictions_gbm = np.argmax(prob_gbm, axis=1)
predictions_bme = np.argmax(prob_bme, axis=1)
predictions_dnn = np.argmax(prob_dnn, axis=1)

# Evaluate each model
results_xgb = evaluate_model(y_test, predictions_xgb)
results_cbt = evaluate_model(y_test, predictions_cbt)
results_gbm = evaluate_model(y_test, predictions_gbm)
results_bme = evaluate_model(y_test, predictions_bme)
results_dnn = evaluate_model(y_test, predictions_dnn)

# Print results for each model
print("XGB - Accuracy: {:.4f}, Precision: {:.4f}, Recall: {:.4f}, F1-Score: {:.4f}".format(*results_xgb))
print("CBT - Accuracy: {:.4f}, Precision: {:.4f}, Recall: {:.4f}, F1-Score: {:.4f}".format(*results_cbt))
print("GBM - Accuracy: {:.4f}, Precision: {:.4f}, Recall: {:.4f}, F1-Score: {:.4f}".format(*results_gbm))
print("BME - Accuracy: {:.4f}, Precision: {:.4f}, Recall: {:.4f}, F1-Score: {:.4f}".format(*results_bme))
print("DNN - Accuracy: {:.4f}, Precision: {:.4f}, Recall: {:.4f}, F1-Score: {:.4f}".format(*results_dnn))


XGB - Accuracy: 0.7327, Precision: 0.8137, Recall: 0.7327, F1-Score: 0.6887
CBT - Accuracy: 0.7303, Precision: 0.8070, Recall: 0.7303, F1-Score: 0.6773
GBM - Accuracy: 0.7334, Precision: 0.8070, Recall: 0.7334, F1-Score: 0.6926
BME - Accuracy: 0.7130, Precision: 0.8098, Recall: 0.7130, F1-Score: 0.6679
DNN - Accuracy: 0.7219, Precision: 0.7504, Recall: 0.7219, F1-Score: 0.6653


In [12]:
# Ensemble learning

# Weighted average of probabilities
ensemble_probabilities = np.average(
    [prob_xgb, prob_cbt, prob_gbm, prob_bme, prob_dnn],
    axis=0,
    weights=[0.3, 0.2, 0.2, 0.2, 0.1]
)

# Final prediction is the class with the highest average probability
ensemble_predictions = np.argmax(ensemble_probabilities, axis=1)

# Evaluate ensemble
ensemble_results = evaluate_model(y_test, ensemble_predictions)

print("Ensemble Accuracy:", ensemble_results[0])
print("Ensemble Precision:", ensemble_results[1])
print("Ensemble Recall:", ensemble_results[2])
print("Ensemble F1-Score:", ensemble_results[3])

Ensemble Accuracy: 0.7307315950037414
Ensemble Precision: 0.8158890203917817
Ensemble Recall: 0.7307315950037414
Ensemble F1-Score: 0.6857472428199289


In [13]:
def evaluate_model_per_class(y_true, predictions):
    """ Evaluate the model and return precision, recall, and F1-score for each class """
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, predictions, average=None)
    acc = accuracy_score(y_true, predictions)
    return acc, precision, recall, f1

# Evaluate each model
results_xgb = evaluate_model_per_class(y_test, predictions_xgb)
results_cbt = evaluate_model_per_class(y_test, predictions_cbt)
results_gbm = evaluate_model_per_class(y_test, predictions_gbm)
results_bme = evaluate_model_per_class(y_test, predictions_bme)
results_dnn = evaluate_model_per_class(y_test, predictions_dnn)

# Print results for each model
def print_results(model_name, results):
    acc, precision, recall, f1 = results
    print(f"{model_name} - Accuracy: {acc:.4f}")
    print("Class-wise Precision:", precision)
    print("Class-wise Recall:", recall)
    print("Class-wise F1-Score:", f1)
    print()  # Print a newline for better separation

print_results("XGB", results_xgb)
print_results("CBT", results_cbt)
print_results("GBM", results_gbm)
print_results("BME", results_bme)
print_results("DNN", results_dnn)

XGB - Accuracy: 0.7327
Class-wise Precision: [0.59377226 0.9598369  0.81724422 0.8        0.96446701]
Class-wise Recall: [0.9725     0.78466667 0.81825692 0.23880597 0.06585789]
Class-wise F1-Score: [0.73734757 0.86345713 0.81775026 0.36781609 0.12329656]

CBT - Accuracy: 0.7303
Class-wise Precision: [0.59050415 0.96178093 0.81174957 0.86666667 0.92982456]
Class-wise Recall: [0.97216667 0.81366667 0.7819083  0.3880597  0.01837088]
Class-wise F1-Score: [0.7347273  0.88154568 0.79654955 0.53608247 0.03602991]

GBM - Accuracy: 0.7334
Class-wise Precision: [0.61688794 0.95823617 0.72227909 1.         0.95454545]
Class-wise Recall: [0.968      0.75333333 0.87443205 0.02985075 0.10190641]
Class-wise F1-Score: [0.75355174 0.84351964 0.79110613 0.05797101 0.18415283]

BME - Accuracy: 0.7130
Class-wise Precision: [0.5657525  0.9608985  0.85204567 0.66666667 0.97101449]
Class-wise Recall: [0.973      0.77       0.73977695 0.05970149 0.04644714]
Class-wise F1-Score: [0.71548502 0.85492228 0.79195