In [None]:
import gzip
import time
import os
import concurrent.futures
from collections import defaultdict, Counter
import numpy as np
import matplotlib.pyplot as plt
import datetime
from scapy.all import PcapReader, IP, TCP, UDP
import psutil
import ipaddress

# --- CONFIG ---
PCAP_DIR = 'PCAP-20181103'
QUANTUMS = [300, 600, 900]  # Time windows in seconds (5/10/15 mins)
PLOT_OUTPUT_DIR = 'entropy_charts'
MAX_WORKERS = min(4, os.cpu_count() - 1)  # Conservative parallel processing
MEMORY_LIMIT_MB = 4000
K_VALUES = [3, 4, 5, 6]  # For IP partitioning

# --- Setup ---
os.makedirs(PLOT_OUTPUT_DIR, exist_ok=True)

# --- HELPERS ---
def check_system_limits():
    avail_mem = psutil.virtual_memory().available / (1024**2)
    print(f"Available memory: {avail_mem:.0f} MB")


def process_single_packet(pkt):
    if IP in pkt:
        ip_layer = pkt[IP]
        ts = int(pkt.time)
        src_ip = ip_layer.src
        dst_ip = ip_layer.dst
        proto = ip_layer.proto
        sport = dport = None

        if TCP in pkt:
            transport = pkt[TCP]
            sport, dport = transport.sport, transport.dport
        elif UDP in pkt:
            transport = pkt[UDP]
            sport, dport = transport.sport, transport.dport

        return (ts, src_ip, dst_ip, sport, dport, proto)
    return None


def process_pcap_file(file_path):
    packet_info = []
    try:
        with gzip.open(file_path, 'rb') as f:
            for pkt in PcapReader(f):
                result = process_single_packet(pkt)
                if result:
                    packet_info.append(result)
        return packet_info
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return []


def process_all_packets(directory):
    files = sorted(
        os.path.join(directory, f)
        for f in os.listdir(directory)
        if f.endswith('.pcap.gz')
    )
    if not files:
        print("No PCAP files found!")
        return []

    with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        results = list(executor.map(process_pcap_file, files))

    all_packets = [pkt for result in results for pkt in result]
    return all_packets


def get_quantum_start(ts, quantum_secs):
    return ts - (ts % quantum_secs)


def calculate_entropy(values):
    if not values:
        return 0
    counts = np.array(list(Counter(values).values()))
    probs = counts / counts.sum()
    return -np.sum(probs * np.log2(probs + 1e-10))


def ip_to_k_bits(ip, k):
    try:
        ip_int = int(ipaddress.IPv4Address(ip))
        return ip_int >> (32 - k)
    except:
        return -1


# --- MAIN ---
def main():
    print("\n=== PCAP ENTROPY ANALYZER ===\n")
    check_system_limits()

    packets = process_all_packets(PCAP_DIR)
    print(f"\nLoaded {len(packets):,} packets.")

    for quantum in QUANTUMS:
        entropy_results = {'sport_count': [], 'dport_count': [], 'sport_size': [], 'dport_size': [],
                           'saddr_count': {k: [] for k in K_VALUES}, 'daddr_count': {k: [] for k in K_VALUES},
                           'saddr_size': {k: [] for k in K_VALUES}, 'daddr_size': {k: [] for k in K_VALUES}}

        stats = defaultdict(list)

        for pkt in packets:
            ts, saddr, daddr, sport, dport, proto = pkt
            q_start = get_quantum_start(ts, quantum)
            stats[q_start].append(pkt)

        times = sorted(stats.keys())

        for t in times:
            pkt_list = stats[t]

            sport_ports = []
            dport_ports = []
            sport_sizes = Counter()
            dport_sizes = Counter()

            saddr_k = {k: [] for k in K_VALUES}
            daddr_k = {k: [] for k in K_VALUES}
            saddr_sizes_k = {k: Counter() for k in K_VALUES}
            daddr_sizes_k = {k: Counter() for k in K_VALUES}

            for ts, saddr, daddr, sport, dport, proto in pkt_list:
                pkt_size = 1  # Count each packet as 1; can use real size if needed

                # Ports
                if sport and (0 <= sport <= 49151):
                    sport_ports.append(sport)
                    sport_sizes[sport] += pkt_size
                if dport and (0 <= dport <= 49151):
                    dport_ports.append(dport)
                    dport_sizes[dport] += pkt_size

                # IPs
                for k in K_VALUES:
                    saddr_bits = ip_to_k_bits(saddr, k)
                    daddr_bits = ip_to_k_bits(daddr, k)
                    if saddr_bits != -1:
                        saddr_k[k].append(saddr_bits)
                        saddr_sizes_k[k][saddr_bits] += pkt_size
                    if daddr_bits != -1:
                        daddr_k[k].append(daddr_bits)
                        daddr_sizes_k[k][daddr_bits] += pkt_size

            # Calculate entropies
            entropy_results['sport_count'].append(calculate_entropy(sport_ports))
            entropy_results['sport_size'].append(calculate_entropy(list(sport_sizes.values())))
            entropy_results['dport_count'].append(calculate_entropy(dport_ports))
            entropy_results['dport_size'].append(calculate_entropy(list(dport_sizes.values())))

            for k in K_VALUES:
                entropy_results['saddr_count'][k].append(calculate_entropy(saddr_k[k]))
                entropy_results['saddr_size'][k].append(calculate_entropy(list(saddr_sizes_k[k].values())))
                entropy_results['daddr_count'][k].append(calculate_entropy(daddr_k[k]))
                entropy_results['daddr_size'][k].append(calculate_entropy(list(daddr_sizes_k[k].values())))

        # --- Plotting ---
        print(f"Plotting results for {quantum//60}min windows...")
        timepoints = list(range(len(entropy_results['sport_count'])))

        # Ports
        plt.figure(figsize=(12, 6))
        plt.plot(timepoints, entropy_results['sport_count'], marker='o', label='sport_count')
        plt.plot(timepoints, entropy_results['dport_count'], marker='o', label='dport_count')
        plt.plot(timepoints, entropy_results['sport_size'], marker='x', label='sport_size')
        plt.plot(timepoints, entropy_results['dport_size'], marker='x', label='dport_size')
        plt.title(f"Port Entropies Over Time ({quantum//60} min)")
        plt.xlabel("Time Points")
        plt.ylabel("Entropy")
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        plt.savefig(os.path.join(PLOT_OUTPUT_DIR, f"port_entropy_{quantum//60}min.png"))
        plt.close()

        # IPs
        for k in K_VALUES:
            plt.figure(figsize=(12, 6))
            plt.plot(timepoints, entropy_results['saddr_count'][k], marker='o', label=f'saddr_count_k{k}')
            plt.plot(timepoints, entropy_results['daddr_count'][k], marker='o', label=f'daddr_count_k{k}')
            plt.plot(timepoints, entropy_results['saddr_size'][k], marker='x', label=f'saddr_size_k{k}')
            plt.plot(timepoints, entropy_results['daddr_size'][k], marker='x', label=f'daddr_size_k{k}')
            plt.title(f"IP Entropies Over Time (k={k}, {quantum//60} min)")
            plt.xlabel("Time Points")
            plt.ylabel("Entropy")
            plt.legend()
            plt.grid(True)
            plt.tight_layout()
            plt.savefig(os.path.join(PLOT_OUTPUT_DIR, f"ip_entropy_k{k}_{quantum//60}min.png"))
            plt.close()

    print("\n✅ All plots saved to:", os.path.abspath(PLOT_OUTPUT_DIR))

if __name__ == '__main__':
    main()


=== PCAP ENTROPY ANALYZER ===

Available memory: 7682 MB


