In [2]:
!pip install scapy

Collecting scapy
  Using cached scapy-2.6.1-py3-none-any.whl.metadata (5.6 kB)
Using cached scapy-2.6.1-py3-none-any.whl (2.4 MB)
Installing collected packages: scapy
Successfully installed scapy-2.6.1


In [1]:
from scapy.all import rdpcap, IP, TCP, UDP, Raw
import pandas as pd

packets = rdpcap("sample_traffic.pcap")
data = []

for i, pkt in enumerate(packets):
    if IP in pkt:
        src_ip = pkt[IP].src
        dst_ip = pkt[IP].dst
        proto = "Other"
        dst_port = None
        src_port = None
        flags = None
        ttl = pkt[IP].ttl
        payload_len = len(pkt[Raw].load) if Raw in pkt else 0
        tcp_window = pkt[TCP].window if TCP in pkt else None
        tcp_flags_int = pkt[TCP].flags.value if TCP in pkt else None

        if TCP in pkt:
            proto = "TCP"
            dst_port = pkt[TCP].dport
            src_port = pkt[TCP].sport
            flags = pkt[TCP].flags
        elif UDP in pkt:
            proto = "UDP"
            dst_port = pkt[UDP].dport
            src_port = pkt[UDP].sport

        data.append({
            "src_ip": src_ip,
            "dst_ip": dst_ip,
            "src_port": src_port,
            "dst_port": dst_port,
            "protocol": proto,
            "packet_length": len(pkt),
            "payload_len": payload_len,
            "ttl": ttl,
            "tcp_flags": str(flags),
            "tcp_flags_int": tcp_flags_int,
            "tcp_window": tcp_window,
            "index": i  # for ordering
        })

df = pd.DataFrame(data)
df.to_csv("packets_enriched.csv", index=False)
print("✅ 已成功擷取更多封包特徵，存為 packets_enriched.csv")


✅ 已成功擷取更多封包特徵，存為 packets_enriched.csv


In [9]:
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler, LabelEncoder

# 讀入資料
df = pd.read_csv("packets.csv")

# 對 protocol 進行 label encoding（TCP/UDP → 0/1）
df['protocol'] = LabelEncoder().fit_transform(df['protocol'])

# 特徵欄位（你可以選擇加更多）
features = ['protocol', 'dst_port', 'packet_length']

# 資料標準化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df[features])

# 訓練 Isolation Forest 模型
model = IsolationForest(contamination=0.1, random_state=42)
model.fit(X_scaled)

# 預測：1 表正常，-1 表異常
df['prediction'] = model.predict(X_scaled)
df['anomaly'] = df['prediction'].apply(lambda x: 1 if x == -1 else 0)

# 顯示部分異常資料
print("🚨 偵測到的異常封包：")
print(df[df['anomaly'] == 1][['src_ip', 'dst_ip', 'protocol', 'dst_port', 'packet_length']].head())

# 輸出結果
df.to_csv("predicted_traffic.csv", index=False)
print("✅ 已將含異常預測的資料儲存為 predicted_traffic.csv")


🚨 偵測到的異常封包：
            src_ip          dst_ip  protocol  dst_port  packet_length
13  10.130.188.207    3.233.158.24         1     443.0           1195
15  10.130.188.207   34.144.254.29         1     443.0            453
38   142.250.77.14  10.130.188.207         2   64214.0            648
84  17.248.223.130  10.130.188.207         1   54560.0            112
85  17.248.223.130  10.130.188.207         1   54560.0             97
✅ 已將含異常預測的資料儲存為 predicted_traffic.csv
