# Feature Extraction

In [21]:
import os

## Defining root directory

In [22]:
# Defining data root directory
root_directory = f"{os.getcwd()}/data"

## Carregando e extraindo features do pcap

In [23]:
from scapy.all import *
import pandas as pd
import csv

CSV_FILE = f"{root_directory}/features.csv" # Change features CSV here.
flow_table = {}

def get_tcp(packet):

  sport = packet.sport
  dport = packet.dport
  seq = packet.seq
  ack = packet.ack
  window = packet.window
  dataofs = packet.dataofs
  urgptr = packet.urgptr
  reserved = packet.reserved
  tcp_chk = packet.chksum

  return sport, dport, [seq, ack, window, dataofs, urgptr, reserved, tcp_chk]

def get_ip(packet):

  src = packet.src
  dst = packet.dst
  proto = packet.proto
  version = packet.version
  ihl = packet.ihl
  tos = packet.tos
  length = packet.len
  id = packet.id
  frag = packet.frag
  ttl = packet.ttl
  chksum = packet.chksum

  if proto == 6:
    sport, dport, tcp = get_tcp(packet.payload)
    flow_key = [src, dst, sport, dport, proto]
  else:
    raise Exception(f"Not a TCP packet.")
  return hash(tuple(flow_key)), flow_key + [version, ihl, tos, length, id, frag, ttl, chksum] + tcp

def get_features(packet):
  try:

    flow_key, tcpip = get_ip(packet.payload)
    src = packet.src
    dst = packet.dst
    typ = packet.type
    frame_size = len(packet)

    if (flow_key not in flow_table): flow_table[flow_key] = int(packet.time * 1e6)
    ipi = int(packet.time * 1e6 - flow_table[flow_key])
    flow_table[flow_key] = int(packet.time * 1e6)

  except Exception as e:
    raise Exception(e)

  return [flow_key] + tcpip + [src, dst, typ, ipi, frame_size]

def process_pcap(file_path, classification, writer):
  print(f"Reading: {file_path}")
  # offset = next(PcapReader(file_path)).time
  for packet in PcapReader(file_path):
    try:
      flow_key, srcip, dstip, sport, dport, proto, version, ihl, tos, length, id, frag, ttl, chksum, seq, ack, window, dataofs, urgptr, reserved, tcp_chk, srcmac, dstmac, typ, ipi, frame_size = get_features(packet)
      features = [flow_key, srcip, dstip, sport, dport, proto,
                  version, ihl, tos, length, id, frag,
                  ttl, chksum, seq, ack, window, dataofs,
                  urgptr, reserved, tcp_chk, srcmac,
                  dstmac, typ, ipi, frame_size]
    except Exception as e:
      print(e)
      print(packet)
      continue
    # print(features)
    if typ == "IPV6": continue
    writer.writerow(features + [classification])


with open(CSV_FILE, 'w', newline="") as file:
  writer = csv.writer(file)
  writer.writerow(['flow_key', 'srcip', 'dstip', 'sport', 'dport', 'proto', 'version', 'ihl', 'tos', 'length', 'id', 'frag', 'ttl', 'chksum', 'seq', 'ack', 'window', 'dataofs', 'urgptr', 'reserved', 'tcp_chk', 'srcmac', 'dstmac', 'typ', 'ipi', 'frame_size', 'classification'])
  print("Starting pcap process")
  # Add pcaps to be processed here.

  process_pcap(f"{root_directory}/dash_f2_10m.pcap", 1, writer)
  print("Finished")


Starting pcap process
Reading: /home/icaro/Code/repo/In-NetworkML/jupyter/notebooks/data/dash_f2_10m.pcap
Not a TCP packet.
Ether / IP / ICMP 192.168.56.102 > 192.168.56.101 dest-unreach protocol-unreachable / IPerror / Raw
Not a TCP packet.
Ether / IP / ICMP 192.168.56.102 > 192.168.56.101 dest-unreach protocol-unreachable / IPerror / Raw
Not a TCP packet.
Ether / IP / ICMP 192.168.56.102 > 192.168.56.101 dest-unreach protocol-unreachable / IPerror / Raw
Not a TCP packet.
Ether / IP / ICMP 192.168.56.102 > 192.168.56.101 dest-unreach protocol-unreachable / IPerror / Raw
Not a TCP packet.
Ether / IP / ICMP 192.168.56.102 > 192.168.56.101 dest-unreach protocol-unreachable / IPerror / Raw
Not a TCP packet.
Ether / IP / ICMP 192.168.56.102 > 192.168.56.101 dest-unreach protocol-unreachable / IPerror / Raw
Not a TCP packet.
Ether / IP / ICMP 192.168.56.102 > 192.168.56.101 dest-unreach protocol-unreachable / IPerror / Raw
Not a TCP packet.
Ether / IP / ICMP 192.168.56.102 > 192.168.56.101 