In [240]:
from scapy.all import PcapReader
import re
from typing import List, Any
from scapy.plist import PacketList

In [241]:
def extract_label_from_filename(filename: str) -> int:
	return int(re.search('grid_(\d+).+', filename).group(1))

def read_pcap(filename: str) -> PacketList:
	reader = PcapReader(filename)
	return reader.read_all()

def extract_features(plist: PacketList) -> List[Any]:
	# feature 1 = number of packets
	features = [len(plist)]

	# feature 2 = time duration
	max_timestamp = max([packet.time for packet in plist])
	min_timestamp = min([packet.time for packet in plist])
	features.append(max_timestamp - min_timestamp)

    # feature 3 = dict of packet size as key and number of packets with that size as value
	packet_length_dict = {}
	for packet in plist:
		if len(packet) not in packet_length_dict:
			packet_length_dict[len(packet)] = 1
		else:
			packet_length_dict[len(packet)] = packet_length_dict[len(packet)] + 1

	assert sum([packet_length_dict[key] for key in packet_length_dict]) == len(plist)
	features.append(packet_length_dict)

	return features

In [242]:
print(extract_label_from_filename("grid_1_20230527_095444.pcap"))
plist = read_pcap("grid_1_20230527_095444.pcap")
print(extract_features(plist))

1
[437, Decimal('18.438893'), {590: 79, 54: 183, 4124: 1, 654: 1, 1618: 1, 2530: 60, 5006: 5, 402: 1, 3160: 1, 1148: 1, 1198: 20, 3768: 17, 1292: 26, 1170: 1, 3674: 5, 1104: 3, 1098: 1, 800: 2, 2502: 1, 13194: 1, 12912: 1, 2032: 1, 1126: 1, 496: 4, 380: 2, 938: 1, 844: 2, 894: 2, 4464: 1, 750: 2, 170: 1, 4818: 1, 3204: 1, 308: 1, 916: 1, 1032: 1, 1010: 1, 6244: 1, 7482: 1, 214: 1}]
