In [1]:
import pandas as pd
import joblib
import json
import warnings

warnings.filterwarnings("ignore")

In [2]:
features_path = 'GA_output_ET.json'

def load_features(file_path):
    with open(file_path, 'r') as file:
        features = json.load(file)
    return features

features = load_features(features_path)

In [3]:
features

{'SYN': ['Src Port',
  'Flow Duration',
  'Tot Fwd Pkts',
  'Tot Bwd Pkts',
  'TotLen Fwd Pkts',
  'TotLen Bwd Pkts',
  'Fwd Pkt Len Min',
  'Fwd Pkt Len Std',
  'Bwd Pkt Len Max',
  'Bwd Pkt Len Mean',
  'Bwd Pkt Len Std',
  'Flow Byts/s',
  'Flow IAT Mean',
  'Flow IAT Std',
  'Flow IAT Max',
  'Flow IAT Min',
  'Fwd IAT Tot',
  'Fwd IAT Mean',
  'Fwd IAT Std',
  'Fwd IAT Max',
  'Fwd IAT Min',
  'Bwd IAT Mean',
  'Bwd IAT Std',
  'Bwd IAT Max',
  'Bwd IAT Min',
  'Bwd Pkts/s',
  'Pkt Len Max',
  'Pkt Len Mean',
  'Pkt Len Std',
  'SYN Flag Cnt',
  'PSH Flag Cnt',
  'ACK Flag Cnt',
  'Pkt Size Avg',
  'Fwd Seg Size Avg',
  'Bwd Seg Size Avg',
  'Subflow Fwd Pkts',
  'Active Std',
  'Active Min',
  'Idle Mean',
  'Idle Std',
  'Idle Max',
  'Idle Min',
  'Label'],
 'HTTP': ['Dst Port',
  'Protocol',
  'Tot Fwd Pkts',
  'Tot Bwd Pkts',
  'TotLen Fwd Pkts',
  'TotLen Bwd Pkts',
  'Fwd Pkt Len Max',
  'Fwd Pkt Len Min',
  'Fwd Pkt Len Mean',
  'Bwd Pkt Len Max',
  'Bwd Pkt Len Min',
  'B

In [4]:
model_paths =  {
    'SYN': './models/RF_SYN_1_model.pkl',
    'HTTP': './models/RF_HTTP_1_model.pkl',
    'ACK': './models/RF_ACK_1_model.pkl',
    'UDP': './models/RF_UDP_1_model.pkl',
    'ARP': './models/RF_ARP_1_model.pkl',
    'SP': './models/RF_SP_1_model.pkl',
    'BF': './models/RF_BF_1_model.pkl',
}

In [5]:
def predict_attack(input_data):
    results = []
    for attack, model_path in model_paths.items():
        # Tải mô hình và đặc trưng tương ứng
        model = joblib.load(model_path)
        _features = features[attack]

        # Lọc dữ liệu đầu vào theo đặc trưng
        filtered_data = input_data[_features[:-1]] 

        # Dự đoán
        predictions = model.predict(filtered_data)

        # Lưu kết quả
        for pred in predictions:
            results.append({"Attack_Type": attack, "Prediction": int(pred)})

    return results



def summarize_results(results):
    # Chuyển kết quả thành DataFrame
    results_df = pd.DataFrame(results)
    attack_predictions = results_df[results_df["Prediction"] == 1]

    if not attack_predictions.empty:
        # Đếm số dòng dữ liệu theo từng kiểu tấn công
        attack_counts = attack_predictions.groupby("Attack_Type").size()
        return attack_counts.to_dict()
    else:
        return {}


In [10]:
test_data = pd.read_csv("../csvs/mitm-arpspoofing-4-dec.pcap_Flow.csv")

results = predict_attack(test_data)
    
# Tổng hợp kết quả
final_results = summarize_results(results)
    
if final_results:
    print("Detected Attacks:")
    for attack_type, count in final_results.items():
        print(f" - Attack Type: {attack_type}, Count: {count}")
else:
    print("No attacks detected.")


Detected Attacks:
 - Attack Type: ACK, Count: 18
 - Attack Type: ARP, Count: 235
 - Attack Type: BF, Count: 5
 - Attack Type: HTTP, Count: 39
 - Attack Type: SP, Count: 120
 - Attack Type: SYN, Count: 2
 - Attack Type: UDP, Count: 3
