This section will induce NaN values so to generate a pcap that is able to be compared between the baseline model which drops NaN values and the prediciton model which looks to fill in the NaN values. 

We will start by taking a pcap file and inducing NaN values in the window size feature column. The reason we want to do this is by artificially creating NaN values so that in the future our model is able to predict these values. 

In [1]:
from scapy.all import rdpcap, wrpcap, TCP
import random
import pandas as pd

# Read the original pcap file
packets = rdpcap('data/netflix_resolution.pcap')

# Set parameters
nan_percentage = 0.3  # 30% of TCP packets will have window set to 0 (representing NaN)
random.seed(42)  # For reproducibility

# Track statistics
tcp_count = 0
modified_count = 0
original_windows = []
modified_windows = []

# Process packets
for packet in packets:
    if packet.haslayer(TCP):
        tcp_count += 1
        original_window = packet[TCP].window
        original_windows.append(original_window)
        
        # Randomly decide if this packet should have NaN window
        if random.random() < nan_percentage:
            packet[TCP].window = 0  # Set to 0 to represent NaN
            modified_count += 1
            modified_windows.append(None)  # Track as NaN
        else:
            modified_windows.append(original_window)
        
        # Recalculate checksums after modification
        del packet[TCP].chksum
        packet = packet.__class__(bytes(packet))

# Save modified pcap file
output_file = 'data/netflix_resolution_nan_window.pcap'
wrpcap(output_file, packets)

# Print statistics
print(f"Total packets: {len(packets)}")
print(f"TCP packets: {tcp_count}")
print(f"Modified packets (NaN induced): {modified_count}")
print(f"Percentage modified: {modified_count/tcp_count*100:.2f}%")
print(f"\nOriginal window size stats:")
print(f"  Min: {min([w for w in original_windows if w > 0])}")
print(f"  Max: {max(original_windows)}")
print(f"  Mean: {sum(original_windows)/len(original_windows):.2f}")
print(f"\nModified pcap saved to: {output_file}")



Total packets: 84440
TCP packets: 77999
Modified packets (NaN induced): 23360
Percentage modified: 29.95%

Original window size stats:
  Min: 1
  Max: 65535
  Mean: 2374.05

Modified pcap saved to: data/netflix_resolution_nan_window.pcap
