# Feature extraction with Scapy 

In this demonstration, we will analyse how to extract packet header's features with Scapy, a Python library for packet manipulation.

In [None]:
# Author: Roberto Doriguzzi-Corin
# Project: Course on Network Intrusion and Anomaly Detection with Machine Learning
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from scapy.all import rdpcap

# Path to the capture file
capture_file = './PCAPs/benign-syn.pcap'
# Read packets from the pcap file
packets = rdpcap(capture_file)

# Extract the 5-tuple identifier

In [None]:
# Iterate through the captured packets and print the 5-tuple information
for packet in packets:
    if packet.haslayer('IP'):  # Check if the packet has an IP layer
        src_ip = packet['IP'].src  # Source IP address
        dst_ip = packet['IP'].dst  # Destination IP address

        # Check if the packet has a transport layer (TCP, UDP, etc.)
        if packet.haslayer('TCP'):
            src_port = packet['TCP'].sport  # Source port
            dst_port = packet['TCP'].dport  # Destination port
            protocol = 'TCP'
        elif packet.haslayer('UDP'):
            src_port = packet['UDP'].sport  # Source port
            dst_port = packet['UDP'].dport  # Destination port
            protocol = 'UDP'
        else:
            src_port = 0
            dst_port = 0
            protocol = 0

        print(f"Source IP: {src_ip}, Source Port: {src_port}, Destination IP: {dst_ip}, Destination Port: {dst_port}, Protocol: {protocol}")

# Extract the IP Flags
In the following block, we extract the IP Flags, useful to detect, for instance, fragmentation attacks. The ```hasattr``` method is used to ensure that the IP Flags are present in the packet and avoid errors in the case of Layer 2 frames (e.g., ARP protocol). ```0x02``` means "don't fragment", while 0x00 means "last fragment". 

In [None]:
# Iterate through each packet and print IP flags for IP packets
for packet in packets:
    if packet.haslayer('IP'):
        ip_flags = packet['IP'].flags
         # Print IP flags (Don't Fragment, More Fragments, and Reserved bits)
        print(f"IP Flags: {ip_flags}")
        print(f"Don't Fragment: {bool(ip_flags & 0x2)}")
        print(f"More Fragments: {bool(ip_flags & 0x1)}")
        print(f"Reserved: {bool(ip_flags & 0x4)}")
        print("-" * 30)

# Extract TCP flags
Monitoring the TCP flags can be useful to detect SYN flood attacks. 

In [None]:
# Iterate through the captured packets and print TCP flags
for packet in packets:
    if packet.haslayer('TCP'):  # Check if the packet has a TCP layer
        tcp_flags = packet['TCP'].flags  # Get TCP flags
        
        # Print TCP flags (URG, ACK, PSH, RST, SYN, FIN bits)
        print(f"TCP Flags: {tcp_flags}")
        print(f"URG: {bool(tcp_flags & 0x20)}")
        print(f"ACK: {bool(tcp_flags & 0x10)}")
        print(f"PSH: {bool(tcp_flags & 0x08)}")
        print(f"RST: {bool(tcp_flags & 0x04)}")
        print(f"SYN: {bool(tcp_flags & 0x02)}")
        print(f"FIN: {bool(tcp_flags & 0x01)}")
        print("-" * 30)

# Application layer features HTTP

In [None]:
# Function to process HTTP packets
def process_http(packet):
    if packet.haslayer('TCP') and packet['TCP'].dport == 80:
        # Check if the packet is destined for HTTP (port 80)
        payload = bytes(packet['TCP'].payload)
        # Look for HTTP request or response signatures in the payload
        if b'GET' in payload or b'POST' in payload or b'HTTP/1.1' in payload:
            # Extract and print the HTTP payload
            print(payload.decode(errors='ignore'))  # Decode to string (ignore decoding errors)

# Iterate through the captured packets and call the function for HTTP packets
for packet in packets:
    process_http(packet)

# Capturing live traffic from a network interface

In [None]:
from scapy.all import sniff

# Callback function to process captured packets
def process_packet(packet):
    print(packet.summary())  # Print a summary of each captured packet

# Sniff packets on network interface 'eth0' and call the callback function for each packet
sniff(iface='en0', prn=process_packet, count=10)