In [18]:
import os
import pandas as pd
from nfstream import NFStreamer

# Function to parse pcap file and convert to DataFrame using NFStreamer
def pcap_to_dataframe(pcap_file):
    # Create NFStreamer object
    nfstreamer = NFStreamer(source=pcap_file, statistical_analysis=True)
    
    # Extract features as a list of dictionaries
    data = []
    for flow in nfstreamer:
        flow_dict = {
            'time': flow.bidirectional_duration_ms,  # Duration of the flow in milliseconds
            'length': flow.bidirectional_bytes,  # Total bytes in the flow
            'src_ip': flow.src_ip,
            'src_mac': flow.src_mac,
            'src_oui': flow.src_oui,
            'src_port': flow.src_port,
            'dst_ip': flow.dst_ip,
            'dst_mac': flow.dst_mac,
            'dst_oui': flow.dst_oui,
            'dst_port': flow.dst_port,
            'protocol': flow.protocol,
            'ip_version': flow.ip_version,
            'vlan_id': flow.vlan_id,
            'tunnel_id': flow.tunnel_id,
            'bidirectional_first_seen_ms': flow.bidirectional_first_seen_ms,
            'bidirectional_last_seen_ms': flow.bidirectional_last_seen_ms,
            'bidirectional_duration_ms': flow.bidirectional_duration_ms,
            'bidirectional_packets': flow.bidirectional_packets,
            'bidirectional_bytes': flow.bidirectional_bytes,
            'src2dst_first_seen_ms': flow.src2dst_first_seen_ms,
            'src2dst_last_seen_ms': flow.src2dst_last_seen_ms,
            'src2dst_duration_ms': flow.src2dst_duration_ms,
            'src2dst_packets': flow.src2dst_packets,
            'src2dst_bytes': flow.src2dst_bytes,
            'dst2src_first_seen_ms': flow.dst2src_first_seen_ms,
            'dst2src_last_seen_ms': flow.dst2src_last_seen_ms,
            'dst2src_duration_ms': flow.dst2src_duration_ms,
            'dst2src_packets': flow.dst2src_packets,
            'dst2src_bytes': flow.dst2src_bytes,
            'bidirectional_min_ps': flow.bidirectional_min_ps,
            'bidirectional_mean_ps': flow.bidirectional_mean_ps,
            'bidirectional_stddev_ps': flow.bidirectional_stddev_ps,
            'bidirectional_max_ps': flow.bidirectional_max_ps,
            'src2dst_min_ps': flow.src2dst_min_ps,
            'src2dst_mean_ps': flow.src2dst_mean_ps,
            'src2dst_stddev_ps': flow.src2dst_stddev_ps,
            'src2dst_max_ps': flow.src2dst_max_ps,
            'dst2src_min_ps': flow.dst2src_min_ps,
            'dst2src_mean_ps': flow.dst2src_mean_ps,
            'dst2src_stddev_ps': flow.dst2src_stddev_ps,
            'dst2src_max_ps': flow.dst2src_max_ps,
            'bidirectional_min_piat_ms': flow.bidirectional_min_piat_ms,
            'bidirectional_mean_piat_ms': flow.bidirectional_mean_piat_ms,
            'bidirectional_stddev_piat_ms': flow.bidirectional_stddev_piat_ms,
            'bidirectional_max_piat_ms': flow.bidirectional_max_piat_ms,
            'src2dst_min_piat_ms': flow.src2dst_min_piat_ms,
            'src2dst_mean_piat_ms': flow.src2dst_mean_piat_ms,
            'src2dst_stddev_piat_ms': flow.src2dst_stddev_piat_ms,
            'src2dst_max_piat_ms': flow.src2dst_max_piat_ms,
            'dst2src_min_piat_ms': flow.dst2src_min_piat_ms,
            'dst2src_mean_piat_ms': flow.dst2src_mean_piat_ms,
            'dst2src_stddev_piat_ms': flow.dst2src_stddev_piat_ms,
            'dst2src_max_piat_ms': flow.dst2src_max_piat_ms,
            'bidirectional_syn_packets': flow.bidirectional_syn_packets,
            'bidirectional_cwr_packets': flow.bidirectional_cwr_packets,
            'bidirectional_ece_packets': flow.bidirectional_ece_packets,
            'bidirectional_urg_packets': flow.bidirectional_urg_packets,
            'bidirectional_ack_packets': flow.bidirectional_ack_packets,
            'bidirectional_psh_packets': flow.bidirectional_psh_packets,
            'bidirectional_rst_packets': flow.bidirectional_rst_packets,
            'bidirectional_fin_packets': flow.bidirectional_fin_packets,
            'src2dst_syn_packets': flow.src2dst_syn_packets,
            'src2dst_cwr_packets': flow.src2dst_cwr_packets,
            'src2dst_ece_packets': flow.src2dst_ece_packets,
            'src2dst_urg_packets': flow.src2dst_urg_packets,
            'src2dst_ack_packets': flow.src2dst_ack_packets,
            'src2dst_psh_packets': flow.src2dst_psh_packets,
            'src2dst_rst_packets': flow.src2dst_rst_packets,
            'src2dst_fin_packets': flow.src2dst_fin_packets,
            'dst2src_syn_packets': flow.dst2src_syn_packets,
            'dst2src_cwr_packets': flow.dst2src_cwr_packets,
            'dst2src_ece_packets': flow.dst2src_ece_packets,
            'dst2src_urg_packets': flow.dst2src_urg_packets,
            'dst2src_ack_packets': flow.dst2src_ack_packets,
            'dst2src_psh_packets': flow.dst2src_psh_packets,
            'dst2src_rst_packets': flow.dst2src_rst_packets,
            'dst2src_fin_packets': flow.dst2src_fin_packets,
            'application_name': flow.application_name,
            'application_category_name': flow.application_category_name,
            'application_is_guessed': flow.application_is_guessed,
            'application_confidence': flow.application_confidence,
            'requested_server_name': flow.requested_server_name,
            'client_fingerprint': flow.client_fingerprint,
            'server_fingerprint': flow.server_fingerprint,
            'user_agent': flow.user_agent,
            'content_type': flow.content_type,
        }
        data.append(flow_dict)
    
    # Convert the list of flows to a DataFrame
    df = pd.DataFrame(data)
    return df

def datasetCreation(input_directory, output_folder):
    os.makedirs(output_folder, exist_ok=True)

#     # Initialize a counter for processed files
#     file_count = 0
#     max_files = 1

    # Loop through each file in the input directory
    for pcap_file in os.listdir(input_directory):
        if pcap_file.endswith('.pcap'):
            input_file_path = os.path.join(input_directory, pcap_file)
            output_file_path = os.path.join(output_folder, os.path.splitext(pcap_file)[0] + '.csv')

            # Convert the pcap file to a DataFrame
            df = pcap_to_dataframe(input_file_path)

            # Save the DataFrame to CSV
            df.to_csv(output_file_path, index=False)

            print(f"File converted: {output_file_path}")

#             # Increment the file count
#             file_count += 1

#             # Break the loop after processing the first two files
#             if file_count >= max_files:
#                 break

In [19]:
input_directory = 'C:/Users/Aurora/Desktop/TinyML_Contribution/dataset/CICEVSE2024_Dataset/Network Traffic/EVSE-A/pcaps'
output_folder = 'C:/Users/Aurora/Desktop/A'
datasetCreation(input_directory, output_folder)

File converted: C:/Users/Aurora/Desktop/A\EVSE-A-charging-Aggressive-scan.csv
File converted: C:/Users/Aurora/Desktop/A\EVSE-A-Charging-Benign.csv
File converted: C:/Users/Aurora/Desktop/A\EVSE-A-charging-icmp-flood.csv
File converted: C:/Users/Aurora/Desktop/A\EVSE-A-charging-icmp-fragmentation.csv
File converted: C:/Users/Aurora/Desktop/A\EVSE-A-charging-os-fingerprinting.csv
File converted: C:/Users/Aurora/Desktop/A\EVSE-A-charging-portscan.csv
File converted: C:/Users/Aurora/Desktop/A\EVSE-A-charging-push-ack-flood.csv
File converted: C:/Users/Aurora/Desktop/A\EVSE-A-charging-service-detection.csv
File converted: C:/Users/Aurora/Desktop/A\EVSE-A-charging-slowLoris-scan.csv
File converted: C:/Users/Aurora/Desktop/A\EVSE-A-charging-syn-flood.csv
File converted: C:/Users/Aurora/Desktop/A\EVSE-A-charging-syn-stealth.csv
File converted: C:/Users/Aurora/Desktop/A\EVSE-A-charging-synonymous-ip.csv
File converted: C:/Users/Aurora/Desktop/A\EVSE-A-charging-tcp-flood.csv
File converted: C:/U

In [20]:
input_directory = 'C:/Users/Aurora/Desktop/TinyML_Contribution/dataset/CICEVSE2024_Dataset/Network Traffic/EVSE-B/pcaps'
output_folder = 'C:/Users/Aurora/Desktop/B'
datasetCreation(input_directory, output_folder)

File converted: C:/Users/Aurora/Desktop/B\EVSE-B-charging-aggressive-scan.csv
File converted: C:/Users/Aurora/Desktop/B\EVSE-B-charging-icmp-flood.csv
File converted: C:/Users/Aurora/Desktop/B\EVSE-B-charging-os-fingerprinting.csv
File converted: C:/Users/Aurora/Desktop/B\EVSE-B-charging-port-scan.csv
File converted: C:/Users/Aurora/Desktop/B\EVSE-B-charging-push-ack-flood.csv
File converted: C:/Users/Aurora/Desktop/B\EVSE-B-charging-service-detection-scan.csv
File converted: C:/Users/Aurora/Desktop/B\EVSE-B-charging-syn-flood.csv
File converted: C:/Users/Aurora/Desktop/B\EVSE-B-charging-syn-stealth.csv
File converted: C:/Users/Aurora/Desktop/B\EVSE-B-charging-synonymous-ip-flood.csv
File converted: C:/Users/Aurora/Desktop/B\EVSE-B-charging-tcp-flood.csv
File converted: C:/Users/Aurora/Desktop/B\EVSE-B-charging-udp-flood.csv
File converted: C:/Users/Aurora/Desktop/B\EVSE-B-charging-vulnerability-scan.csv
File converted: C:/Users/Aurora/Desktop/B\EVSE-B-idle-aggressive-scan.csv
File con