In [1]:
import os
import csv
import sys
from scapy.all import rdpcap, IP, TCP, UDP
from pathlib import Path

from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn
from rich.live import Live
import pandas as pd
from send2trash import send2trash
import yaml
from dacite import from_dict
from dataclasses import asdict
import warnings
from datetime import datetime, time, timezone
from zoneinfo import ZoneInfo

In [9]:
def get_all_files():
    all_files = [database for database in root_path.glob("*.pcap")]
    return all_files


def get_flow_key(pkt):
    if IP in pkt:
        proto = pkt[IP].proto
        ip_src = pkt[IP].src
        ip_dst = pkt[IP].dst

        if proto == 6 and TCP in pkt:
            sport = pkt[TCP].sport
            dport = pkt[TCP].dport
        elif proto == 17 and UDP in pkt:
            sport = pkt[UDP].sport
            dport = pkt[UDP].dport
        else:
            return None

        return (ip_src, sport, ip_dst, dport, proto)
    return None


attack_schedule = {
    datetime(2017, 7, 3).date(): [],  # Benign traffic only

    datetime(2017, 7, 4).date(): [
        {"type": "Malicious/BruteForce", "src": "205.174.165.73", "dst": "205.174.165.68", "start": time(9, 20), "end": time(10, 20)},
        {"type": "Malicious/BruteForce", "src": "205.174.165.73", "dst": "205.174.165.68", "start": time(14, 0), "end": time(15, 0)},
    ],

    datetime(2017, 7, 5).date(): [
        {"type": "Malicious/DDoS",    "src": "205.174.165.73", "dst": "205.174.165.68", "start": time(9, 47), "end": time(10, 10)},
        {"type": "Malicious/DDoS", "src": "205.174.165.73", "dst": "205.174.165.68", "start": time(10, 14), "end": time(10, 35)},
        {"type": "Malicious/DDoS",         "src": "205.174.165.73", "dst": "205.174.165.68", "start": time(10, 43), "end": time(11, 0)},
        {"type": "Malicious/DDoS",    "src": "205.174.165.73", "dst": "205.174.165.68", "start": time(11, 10), "end": time(11, 23)},
        {"type": "Malicious/Exploit","src": "205.174.165.73","dst": "205.174.165.66", "start": time(15, 12), "end": time(15, 32)},
    ],

    datetime(2017, 7, 6).date(): [
        {"type": "Malicious/BruteForce", "src": "205.174.165.73", "dst": "205.174.165.68", "start": time(9, 20), "end": time(10, 0)},
        {"type": "Malicious/WebAttack",        "src": "205.174.165.73", "dst": "205.174.165.68", "start": time(10, 15), "end": time(10, 35)},
        {"type": "Malicious/WebAttack",       "src": "205.174.165.73", "dst": "205.174.165.68", "start": time(10, 40), "end": time(10, 42)},
        {"type": "Malicious/Exploit", "src": "205.174.165.73", "dst": "192.168.10.8", "start": time(14, 19), "end": time(14, 35)},
        {"type": "Malicious/Infiltration",  "src": "205.174.165.73", "dst": "192.168.10.25", "start": time(14, 53), "end": time(15, 0)},
        {"type": "Malicious/Infiltration",   "src": "205.174.165.73", "dst": "192.168.10.8",  "start": time(15, 4),  "end": time(15, 45)},
    ],

    datetime(2017, 7, 7).date(): [
        {"type": "Malicious/Botnet", "src": "205.174.165.73", "dst": "192.168.10.8",  "start": time(10, 2), "end": time(11, 2)},
        {"type": "Malicious/Infiltration",    "src": "205.174.165.73", "dst": "205.174.165.68", "start": time(13, 55), "end": time(15, 29)},
        {"type": "Malicious/DDoS",   "src": "205.174.165.69", "dst": "205.174.165.68", "start": time(15, 56), "end": time(16, 16)},
        {"type": "Malicious/DDoS",   "src": "205.174.165.70", "dst": "205.174.165.68", "start": time(15, 56), "end": time(16, 16)},
        {"type": "Malicious/DDoS",   "src": "205.174.165.71", "dst": "205.174.165.68", "start": time(15, 56), "end": time(16, 16)},
    ]
}


def infer_threat(pkt_time, ip_src, ip_dst):
    tz = ZoneInfo('America/Halifax')  
    dt = datetime.fromtimestamp(float(pkt_time), tz=tz)
    current_date = dt.date()
    current_time = dt.time()

    print(current_time)

    attacks_today = attack_schedule.get(current_date, [])
    
    for attack in attacks_today:
        src_match = ip_src == attack["src"] and ip_dst == attack["dst"]
        dst_match = ip_dst == attack["src"] and ip_src == attack["dst"]
        
        if (src_match or dst_match) and attack["start"] <= current_time <= attack["end"]:
            return f"Malicious/{attack['type']}"

    return "Benign/None"



def process_pcap_to_csv(pcap_path, output_dir):
    # print(pcap_path)
    packets = rdpcap(pcap_path)

    flows = {} 

    for pkt in packets:
        key = get_flow_key(pkt)
        if not key:
            continue
        if key not in flows:
            flows[key] = []
        flows[key].append(pkt)

    os.makedirs(output_dir, exist_ok=True)

    # i = len list files from folder
    for i, (key, pkts) in enumerate(flows.items()):
        
        ip_src, sport, ip_dst, dport, proto = key
        
        start_time = pkts[0].time
        
        
        times = [round(pkt.time - start_time, 6) for pkt in pkts]
        sizes = [len(pkt) for pkt in pkts]

        # Inferir ameaça com base no primeiro pacote
        threat = infer_threat(start_time, ip_src, ip_dst)
        threat_dir = os.path.join(output_dir, threat)
        os.makedirs(threat_dir, exist_ok=True)

        if i == 0:
            new_flow_id = sum(1 for f in Path(threat_dir).iterdir() if f.is_file())
            i = i + new_flow_id

        flow_id = f"flow_{i}"

        row = [
            flow_id,
            ip_src,
            sport,
            ip_dst,
            dport,
            proto,
            0,  # id1 
            0,  # id2 
            0.0  # tempo inicial
        ] + times[1:] + [''] + sizes

        csv_name = os.path.join(threat_dir, f"{flow_id}.csv")
        with open(csv_name, "w", newline="") as f:
            writer = csv.writer(f)
            writer.writerow(row)


def get_vpn_class(pcap_name: str) -> str:
    name = pcap_name.lower()
    
    crypto_label = next((label for label, keywords in crypto.items() if any(k in name for k in keywords)), None)
    traffic_label = next((label for label, keywords in traffic.items() if any(k in name for k in keywords)), 'Browsing')

    return f"{traffic_label}_{crypto_label}" if crypto_label else traffic_label

crypto = {
    'VPN': ['vpn'],
    'nonVPN': ['nonvpn', 'nontor', 'nontor'],
    'Tor': ['tor']
}

traffic = {
    'Video': ['video', 'youtube', 'vimeo', 'netflix'],
    'VOIP': ['voip', 'voice', 'audio', 'spotify'],
    'FileTransfer': ['filetransfer', 'ftps', 'sftp', 'p2p', 'bittorrent', 'scp', 'file', 'transfer'],
    'Chat': ['chat', 'email', 'mail'],
    'Browsing': ['browsing']
}

In [10]:
process_pcap_to_csv(r'C:\Users\Felipe Castro\Desktop\disciplinas\intelicomp\snn-darknet-traffic-classification\Dataset_raw\CIC-IDS-2017\Friday-WorkingHours_00000_20170707085939.pcap', r'C:\Users\Felipe Castro\Desktop\disciplinas\intelicomp\snn-darknet-traffic-classification\notebooks')

08:59:50.315195
08:59:50.315459
08:59:50.316273
08:59:50.316526
09:00:34.845380
09:00:35.164943
09:00:35.165169
09:00:35.215632
09:00:35.215812
09:00:35.215873
09:00:35.215989
09:00:35.217384
09:00:35.217456
09:00:35.217525
09:00:35.217591
09:00:35.233370
09:00:35.233474
09:00:35.233603
09:00:35.233728
09:00:35.337052
09:00:35.337112
09:00:35.338671
09:00:35.338727
09:00:35.342358
09:00:35.342407
09:00:35.357533
09:00:35.357609
09:00:35.374630
09:00:35.374671
09:00:35.387229
09:00:35.387423
09:00:35.387853
09:00:35.387971
09:00:35.416115
09:00:35.416156
09:00:35.417351
09:00:35.417394
09:00:35.491827
09:00:35.491884
09:00:35.493599
09:00:35.493616
09:00:41.926822
09:00:41.927025
09:00:41.928726
09:00:41.928827
09:00:41.928937
09:00:41.929012
09:00:41.948666
09:00:41.949002
09:00:41.949198
09:00:41.949374
09:00:41.949996
09:00:42.208259
09:00:42.208361
09:00:42.208498
09:00:42.208549
09:00:42.232025
09:00:42.232269
09:00:42.232743
09:00:42.232935
09:00:42.328069
09:00:42.328349
09:00:42

In [11]:
process_pcap_to_csv(r'C:\Users\Felipe Castro\Desktop\disciplinas\intelicomp\snn-darknet-traffic-classification\Dataset_raw\CIC-IDS-2017\Friday-WorkingHours_00099_20170707165148.pcap', r'C:\Users\Felipe Castro\Desktop\disciplinas\intelicomp\snn-darknet-traffic-classification\notebooks')

16:51:48.987972
16:51:48.988326
16:51:49.003602
16:51:49.024850
16:51:49.381709
16:51:49.405063
16:51:49.712795
16:51:49.797157
16:51:50.224796
16:51:50.248643
16:51:50.358147
16:51:50.381768
16:51:50.815064
16:51:50.838816
16:51:51.123792
16:51:51.158245
16:51:51.160184
16:51:51.181903
16:51:51.458246
16:51:51.482024
16:51:51.497693
16:51:51.521375
16:51:51.558279
16:51:51.581997
16:51:51.839058
16:51:51.862522
16:51:51.958298
16:51:51.983591
16:51:52.137766
16:51:52.159746
16:51:52.272805
16:51:52.296367
16:51:52.299350
16:51:52.336000
16:51:52.336795
16:51:52.360403
16:51:52.558304
16:51:52.580346
16:51:52.697349
16:51:52.697353
16:51:52.719315
16:51:52.733987
16:51:52.784796
16:51:52.809280
16:51:52.944796
16:51:53.028398
16:51:53.151741
16:51:53.197794
16:51:53.754883
16:51:53.755104
16:51:53.808794
16:51:53.808797
16:51:53.832741
16:51:53.892420
16:51:55.088796
16:51:55.204869
16:51:55.344793
16:51:55.460789
16:51:55.600796
16:51:55.600801
16:51:55.623793
16:51:55.722007
16:51:55