# Feature extraction with Pyshark 

In this demonstration, we will analyse how to extract packet header's features with Pyshark, a Python wrapper for Tshark.

In [None]:
# Author: Roberto Doriguzzi-Corin
# Project: Course on Network Intrusion and Anomaly Detection with Machine Learning
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pyshark

# We need the following to get around “RuntimeError: This event loop is already running” when using Pyshark within Jupyter notebooks.
# Not needed in stand-alone Python projects
import nest_asyncio
nest_asyncio.apply()  


# Path to the capture file
capture_file = './PCAPs/benign-syn.pcap'
cap = pyshark.FileCapture(capture_file)

# Extract the 5-tuple identifier

In [None]:
for packet in cap:
    if 'IP' in packet and ('TCP' in packet or 'UDP' in packet):  # Check if the packet has IP and either TCP or UDP layer
        src_ip = packet.ip.src  # Source IP address
        dst_ip = packet.ip.dst  # Destination IP address
        if 'TCP' in packet:
            src_port = packet.tcp.srcport  # Source port
            dst_port = packet.tcp.dstport  # Destination port
            protocol = packet.ip.proto  # Transport layer protocol (TCP or UDP)
        elif 'UDP' in packet:
            src_port = packet.udp.srcport  # Source port
            dst_port = packet.udp.dstport  # Destination port
            protocol = packet.ip.proto  # Transport layer protocol (TCP or UDP)
        else:
            src_port = 0
            dst_port = 0
            protocol = 0
        
        print(f"Source IP: {src_ip}, Source Port: {src_port}, Destination IP: {dst_ip}, Destination Port: {dst_port}, Protocol: {protocol}")

# Extract the IP Flags
In the following block, we extract the IP Flags, useful to detect, for instance, fragmentation attacks. The ```hasattr``` method is used to ensure that the IP Flags are present in the packet and avoid errors in the case of Layer 2 frames (e.g., ARP protocol). ```0x02``` means "don't fragment", while 0x00 means "last fragment". 

In [None]:
# Iterate through each packet and print IP flags for IP packets
for packet in cap:
    if 'IP' in packet and hasattr(packet.ip, 'flags'):
        ip_flags = int(packet.ip.flags, 16)  # Get IP flags as an integer
        dont_fragment = bool(ip_flags & 0x2)  # Check Don't Fragment bit
        more_fragments = bool(ip_flags & 0x1)  # Check More Fragments bit
        reserved = bool(ip_flags & 0x4)  # Check Reserved bit

        print(f"IP Flags: {ip_flags}")
        print(f"Don't Fragment: {dont_fragment}")
        print(f"More Fragments: {more_fragments}")
        print(f"Reserved: {reserved}")
        print("-" * 30)

# Extract TCP flags and length
Monitoring the TCP flags can be useful to detect SYN flood attacks. 

In [None]:
for packet in cap:
    if 'TCP' in packet:  # Check if the packet has a TCP layer
        print ("TCP Length: ", packet.tcp.len)

        tcp_flags = int(packet.tcp.flags, 16)  # Get TCP flags as an integer
        urg_flag = bool(tcp_flags & 0x20)  # Check URG bit
        ack_flag = bool(tcp_flags & 0x10)  # Check ACK bit
        psh_flag = bool(tcp_flags & 0x08)  # Check PSH bit
        rst_flag = bool(tcp_flags & 0x04)  # Check RST bit
        syn_flag = bool(tcp_flags & 0x02)  # Check SYN bit
        fin_flag = bool(tcp_flags & 0x01)  # Check FIN bit

        print(f"TCP Flags: {tcp_flags}")
        print(f"URG: {urg_flag}")
        print(f"ACK: {ack_flag}")
        print(f"PSH: {psh_flag}")
        print(f"RST: {rst_flag}")
        print(f"SYN: {syn_flag}")
        print(f"FIN: {fin_flag}")
        print("-" * 30)

# Application layer features HTTP and DNS

In [None]:
# Filtering HTTP packets
for packet in cap:
    try:
        if 'HTTP' in packet:
            print('Source IP:', packet.ip.src)
            print('Destination IP:', packet.ip.dst)
            print('HTTP Method:', packet.http.request_method)
            print('HTTP URI:', packet.http.request_uri)
            print('HTTP Host:', packet.http.host)
            print('----------------------------------')
    except AttributeError as e:
        print(f'Error processing packet: {e}')

In [None]:
# Filtering DNS packets
for packet in cap:
    try:
        if 'DNS' in packet:
            print('Source IP:', packet.ip.src)
            print('Destination IP:', packet.ip.dst)
            print('DNS Query Name:', packet.dns.qry_name)
            print('----------------------------------')
    except AttributeError as e:
        print(f'Error processing packet: {e}')

# Capturing live traffic from a network interface

In [None]:
# Callback function to process captured packets
def process_packet(pkt):
    print(pkt)

# Capture packets on network interface 'eth0' and call the callback function for each packet
cap = pyshark.LiveCapture(interface='en0')
cap.apply_on_packets(process_packet, timeout=10)  # Capture packets for 10 seconds

# Alternatively, you can use cap.sniff(timeout=10) if you want to capture packets without applying a callback function