In [1]:
# prompt: connect to drive

from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# !pip install scapy
# !pip install catboost

In [3]:
from scapy.all import rdpcap, IP, TCP, UDP
import pandas as pd
import numpy as np
from collections import defaultdict

In [4]:
# Path to the pcap file
pcap_file = '/content/drive/MyDrive/Major Project/PCAP/output4.pcap'

In [5]:
# Load the pcap file
packets = rdpcap(pcap_file)

# Initialize storage for flows
flows = defaultdict(list)

In [6]:
# Helper function to get flow key
def get_flow_key(packet):
    if IP in packet:
        ip_layer = packet[IP]
        proto = ip_layer.proto
        sport = dport = 0
        if TCP in packet:
            sport = packet[TCP].sport
            dport = packet[TCP].dport
        elif UDP in packet:
            sport = packet[UDP].sport
            dport = packet[UDP].dport
        return (ip_layer.src, ip_layer.dst, sport, dport, proto)
    return None


In [7]:
# Process packets
for packet in packets:
    flow_key = get_flow_key(packet)
    if flow_key:
        flows[flow_key].append(packet)

In [8]:

# Feature extraction for each flow
flow_data = []
for flow_key, packets in flows.items():
    features = {}
    features['Source_IP'] = flow_key[0]
    features['Destination_IP'] = flow_key[1]
    features['Source_Port'] = flow_key[2]
    features['Destination_Port'] = flow_key[3]
    features['Protocol'] = flow_key[4]

    # Initialize feature placeholders
    fwd_packet_lengths = []
    bwd_packet_lengths = []
    fwd_inter_arrival_times = []
    bwd_inter_arrival_times = []
    syn_flag_count = fin_flag_count = ack_flag_count = rst_flag_count = psh_flag_count = urg_flag_count = 0
    fwd_psh_flags = bwd_psh_flags = fwd_urg_flags = bwd_urg_flags = 0

    # Convert scapy time values to float
    flow_start_time = float(packets[0].time)
    flow_end_time = float(packets[-1].time)
    features['Flow_Duration'] = flow_end_time - flow_start_time

    for i, packet in enumerate(packets):
        packet_length = len(packet)
        packet_time = float(packet.time)  # Convert packet time to float
        if packet[IP].src == features['Source_IP']:
            fwd_packet_lengths.append(packet_length)
            if i > 0:
                fwd_inter_arrival_times.append(packet_time - float(packets[i - 1].time))
        else:
            bwd_packet_lengths.append(packet_length)
            if i > 0:
                bwd_inter_arrival_times.append(packet_time - float(packets[i - 1].time))

        # Count flags
        if TCP in packet:
            flags = packet[TCP].flags
            syn_flag_count += (flags & 0x02) != 0
            fin_flag_count += (flags & 0x01) != 0
            ack_flag_count += (flags & 0x10) != 0
            rst_flag_count += (flags & 0x04) != 0
            psh_flag_count += (flags & 0x08) != 0
            urg_flag_count += (flags & 0x20) != 0

            # Count PSH and URG flags in forward and backward directions
            if packet[IP].src == features['Source_IP']:
                fwd_psh_flags += (flags & 0x08) != 0
                fwd_urg_flags += (flags & 0x20) != 0
            else:
                bwd_psh_flags += (flags & 0x08) != 0
                bwd_urg_flags += (flags & 0x20) != 0

    # Compute packet length features
    features['Total_Fwd_Packets'] = len(fwd_packet_lengths)
    features['Total_Backward_Packets'] = len(bwd_packet_lengths)
    features['Total_Length_of_Fwd_Packets'] = sum(fwd_packet_lengths)
    features['Total_Length_of_Bwd_Packets'] = sum(bwd_packet_lengths)

    if fwd_packet_lengths:
        features['Fwd_Packet_Length_Max'] = max(fwd_packet_lengths)
        features['Fwd_Packet_Length_Min'] = min(fwd_packet_lengths)
        features['Fwd_Packet_Length_Mean'] = np.mean(fwd_packet_lengths)
        features['Fwd_Packet_Length_Std'] = np.std(fwd_packet_lengths)
    else:
        features['Fwd_Packet_Length_Max'] = features['Fwd_Packet_Length_Min'] = features['Fwd_Packet_Length_Mean'] = features['Fwd_Packet_Length_Std'] = 0

    if bwd_packet_lengths:
        features['Bwd_Packet_Length_Max'] = max(bwd_packet_lengths)
        features['Bwd_Packet_Length_Min'] = min(bwd_packet_lengths)
        features['Bwd_Packet_Length_Mean'] = np.mean(bwd_packet_lengths)
        features['Bwd_Packet_Length_Std'] = np.std(bwd_packet_lengths)
    else:
        features['Bwd_Packet_Length_Max'] = features['Bwd_Packet_Length_Min'] = features['Bwd_Packet_Length_Mean'] = features['Bwd_Packet_Length_Std'] = 0

    # Compute flow rates
    if features['Flow_Duration'] > 0:
        features['Flow_Bytes/s'] = (features['Total_Length_of_Fwd_Packets'] + features['Total_Length_of_Bwd_Packets']) / features['Flow_Duration']
        features['Flow_Packets/s'] = (features['Total_Fwd_Packets'] + features['Total_Backward_Packets']) / features['Flow_Duration']
    else:
        features['Flow_Bytes/s'] = features['Flow_Packets/s'] = 0

    # Compute inter-arrival times
    if fwd_inter_arrival_times:
        features['Fwd_IAT_Total'] = sum(fwd_inter_arrival_times)
        features['Fwd_IAT_Mean'] = np.mean(fwd_inter_arrival_times)
        features['Fwd_IAT_Std'] = np.std(fwd_inter_arrival_times)
        features['Fwd_IAT_Max'] = max(fwd_inter_arrival_times)
        features['Fwd_IAT_Min'] = min(fwd_inter_arrival_times)
    else:
        features['Fwd_IAT_Total'] = features['Fwd_IAT_Mean'] = features['Fwd_IAT_Std'] = features['Fwd_IAT_Max'] = features['Fwd_IAT_Min'] = 0

    if bwd_inter_arrival_times:
        features['Bwd_IAT_Total'] = sum(bwd_inter_arrival_times)
        features['Bwd_IAT_Mean'] = np.mean(bwd_inter_arrival_times)
        features['Bwd_IAT_Std'] = np.std(bwd_inter_arrival_times)
        features['Bwd_IAT_Max'] = max(bwd_inter_arrival_times)
        features['Bwd_IAT_Min'] = min(bwd_inter_arrival_times)
    else:
        features['Bwd_IAT_Total'] = features['Bwd_IAT_Mean'] = features['Bwd_IAT_Std'] = features['Bwd_IAT_Max'] = features['Bwd_IAT_Min'] = 0

    # Additional features
    features['Fwd_PSH_Flags'] = fwd_psh_flags
    features['Bwd_PSH_Flags'] = bwd_psh_flags
    features['Fwd_URG_Flags'] = fwd_urg_flags
    features['Bwd_URG_Flags'] = bwd_urg_flags
    features['Fwd_Header_Length'] = 0  # Placeholder
    features['Bwd_Header_Length'] = 0  # Placeholder
    features['Fwd_Packets/s'] = features['Total_Fwd_Packets'] / features['Flow_Duration'] if features['Flow_Duration'] > 0 else 0
    features['Bwd_Packets/s'] = features['Total_Backward_Packets'] / features['Flow_Duration'] if features['Flow_Duration'] > 0 else 0
    features['Min_Packet_Length'] = min(fwd_packet_lengths + bwd_packet_lengths) if (fwd_packet_lengths + bwd_packet_lengths) else 0
    features['Max_Packet_Length'] = max(fwd_packet_lengths + bwd_packet_lengths) if (fwd_packet_lengths + bwd_packet_lengths) else 0
    features['Packet_Length_Mean'] = np.mean(fwd_packet_lengths + bwd_packet_lengths) if (fwd_packet_lengths + bwd_packet_lengths) else 0
    features['Packet_Length_Std'] = np.std(fwd_packet_lengths + bwd_packet_lengths) if (fwd_packet_lengths + bwd_packet_lengths) else 0
    features['Packet_Length_Variance'] = np.var(fwd_packet_lengths + bwd_packet_lengths) if (fwd_packet_lengths + bwd_packet_lengths) else 0
    features['FIN_Flag_Count'] = fin_flag_count
    features['SYN_Flag_Count'] = syn_flag_count
    features['RST_Flag_Count'] = rst_flag_count
    features['PSH_Flag_Count'] = psh_flag_count
    features['URG_Flag_Count'] = urg_flag_count
    features['ECE_Flag_Count'] = 0  # Placeholder
    features['Down/Up_Ratio'] = 0  # Placeholder
    features['Average_Packet_Size'] = (features['Total_Length_of_Fwd_Packets'] + features['Total_Length_of_Bwd_Packets']) / (features['Total_Fwd_Packets'] + features['Total_Backward_Packets']) if (features['Total_Fwd_Packets'] + features['Total_Backward_Packets']) > 0 else 0
    features['Avg_Fwd_Segment_Size'] = 0  # Placeholder
    features['Avg_Bwd_Segment_Size'] = 0  # Placeholder
    features['Fwd_Header_Length.1'] = 0  # Placeholder
    features['Fwd_Avg_Bytes/Bulk'] = 0  # Placeholder
    features['Fwd_Avg_Packets/Bulk'] = 0  # Placeholder
    features['Fwd_Avg_Bulk_Rate'] = 0  # Placeholder
    features['Bwd_Avg_Bytes/Bulk'] = 0  # Placeholder
    features['Bwd_Avg_Packets/Bulk'] = 0  # Placeholder
    features['Bwd_Avg_Bulk_Rate'] = 0  # Placeholder
    features['Subflow_Fwd_Packets'] = 0  # Placeholder
    features['Subflow_Fwd_Bytes'] = 0  # Placeholder
    features['Subflow_Bwd_Packets'] = 0  # Placeholder
    features['Subflow_Bwd_Bytes'] = 0  # Placeholder
    features['Init_Win_bytes_forward'] = 0  # Placeholder
    features['Init_Win_bytes_backward'] = 0  # Placeholder
    features['act_data_pkt_fwd'] = 0  # Placeholder
    features['min_seg_size_forward'] = 0  # Placeholder
    features['Active_Mean'] = 0  # Placeholder
    features['Active_Std'] = 0  # Placeholder
    features['Active_Max'] = 0  # Placeholder
    features['Active_Min'] = 0  # Placeholder
    features['Idle_Mean'] = 0  # Placeholder
    features['Idle_Std'] = 0  # Placeholder
    features['Idle_Max'] = 0  # Placeholder
    features['Idle_Min'] = 0  # Placeholder
    features['Inbound'] = 0  # Placeholder

    features['Flow_IAT_Mean'] = 0  # Placeholder
    features['CWE_Flag_Count'] = 0  # Placeholder
    features['Flow_IAT_Min'] = 0  # Placeholder
    features['Flow_IAT_Std'] = 0  # Placeholder
    features['Flow_IAT_Max'] = 0  # Placeholder
    features['ACK_Flag_Count'] = 0  # Placeholder

    flow_data.append(features)

In [9]:
# Convert the extracted flow data to a DataFrame
df = pd.DataFrame(flow_data)

# Handle any missing values (e.g., fill with 0)
df = df.fillna(0)

# At this point, you would preprocess df as necessary (e.g., scaling) and use it for predictions.


In [10]:
df

Unnamed: 0,Source_IP,Destination_IP,Source_Port,Destination_Port,Protocol,Flow_Duration,Total_Fwd_Packets,Total_Backward_Packets,Total_Length_of_Fwd_Packets,Total_Length_of_Bwd_Packets,...,Idle_Std,Idle_Max,Idle_Min,Inbound,Flow_IAT_Mean,CWE_Flag_Count,Flow_IAT_Min,Flow_IAT_Std,Flow_IAT_Max,ACK_Flag_Count
0,127.0.0.1,127.0.0.1,49724,49725,6,10.220637,32,0,1440,0,...,0,0,0,0,0,0,0,0,0,0
1,127.0.0.1,127.0.0.1,49725,49724,6,10.220618,32,0,1408,0,...,0,0,0,0,0,0,0,0,0,0
2,127.0.0.1,127.0.0.1,49730,49729,6,10.219441,32,0,1440,0,...,0,0,0,0,0,0,0,0,0,0
3,127.0.0.1,127.0.0.1,49729,49730,6,10.219457,32,0,1408,0,...,0,0,0,0,0,0,0,0,0,0


In [11]:
df.columns

Index(['Source_IP', 'Destination_IP', 'Source_Port', 'Destination_Port',
       'Protocol', 'Flow_Duration', 'Total_Fwd_Packets',
       'Total_Backward_Packets', 'Total_Length_of_Fwd_Packets',
       'Total_Length_of_Bwd_Packets', 'Fwd_Packet_Length_Max',
       'Fwd_Packet_Length_Min', 'Fwd_Packet_Length_Mean',
       'Fwd_Packet_Length_Std', 'Bwd_Packet_Length_Max',
       'Bwd_Packet_Length_Min', 'Bwd_Packet_Length_Mean',
       'Bwd_Packet_Length_Std', 'Flow_Bytes/s', 'Flow_Packets/s',
       'Fwd_IAT_Total', 'Fwd_IAT_Mean', 'Fwd_IAT_Std', 'Fwd_IAT_Max',
       'Fwd_IAT_Min', 'Bwd_IAT_Total', 'Bwd_IAT_Mean', 'Bwd_IAT_Std',
       'Bwd_IAT_Max', 'Bwd_IAT_Min', 'Fwd_PSH_Flags', 'Bwd_PSH_Flags',
       'Fwd_URG_Flags', 'Bwd_URG_Flags', 'Fwd_Header_Length',
       'Bwd_Header_Length', 'Fwd_Packets/s', 'Bwd_Packets/s',
       'Min_Packet_Length', 'Max_Packet_Length', 'Packet_Length_Mean',
       'Packet_Length_Std', 'Packet_Length_Variance', 'FIN_Flag_Count',
       'SYN_Flag_Count', '

In [12]:
import pickle
with open('/content/drive/MyDrive/Major Project/catboost_model.pkl', 'rb') as f:
    model = pickle.load(f)

In [13]:
df.drop(['Source_IP', 'Destination_IP'], axis=1, inplace=True)

In [14]:
result = model.predict(df)

result

array([[1],
       [1],
       [1],
       [1]])

In [15]:
mapping = {
    0: 'NetBIOS',
    1: 'BENIGN',
    2: 'LDAP',
    3: 'MSSQL',
    4: 'Portmap',
    5: 'Syn',
    6: 'UDP',
    7: 'UDPLag'
}

# Extract the integer predictions from the NumPy array
result_int = [pred[0] for pred in result]

# Map the integer predictions to labels
result_mapped = [mapping[pred] for pred in result_int]
print(result_mapped)

['BENIGN', 'BENIGN', 'BENIGN', 'BENIGN']


# NetBIOS:

Description: NetBIOS (Network Basic Input/Output System) is a protocol used for network communication in Windows-based networks, allowing applications on separate computers to communicate over a local area network (LAN). DDoS attacks exploiting NetBIOS can overwhelm a network by sending a large number of NetBIOS requests, leading to a denial of service.

Impact: These attacks can disrupt services relying on NetBIOS, leading to network slowdown or complete unavailability.

# BENIGN:

Description: This label represents normal, non-malicious network traffic. It is included as a baseline for comparison with attack traffic.

Impact: None, as this is regular, safe traffic.

# LDAP:

Description: LDAP (Lightweight Directory Access Protocol) is used for accessing and managing directory information. DDoS attacks targeting LDAP servers can involve sending a high volume of requests to the server, overwhelming it and causing service disruption.

Impact: Disruption of services that rely on directory access, leading to possible unavailability of authentication and resource location services.

# MSSQL:

Description: Microsoft SQL Server (MSSQL) is a relational database management system. DDoS attacks on MSSQL servers typically involve sending massive amounts of SQL queries or connection requests, which can overwhelm the database server.

Impact: The database server may become unresponsive, leading to a denial of service for applications that rely on the database.

# Portmap:

Description: Portmap (also known as RPCBIND) is a service that maps RPC (Remote Procedure Call) program numbers to network port numbers. A Portmap DDoS attack involves sending a high volume of RPC requests, overwhelming the server.

Impact: This can lead to a denial of service for any applications or services relying on RPC communication.

# Syn:

Description: A SYN flood is a type of DDoS attack where the attacker sends a large number of SYN (synchronization) requests to a target's system. These requests initiate a TCP connection but the handshake is never completed, leaving the server with half-open connections and eventually exhausting its resources.

Impact: This can cause the target server to become unresponsive, preventing legitimate users from establishing connections.

# UDP:

Description: UDP (User Datagram Protocol) flood attacks involve sending a large number of UDP packets to random ports on a target server. The server must process and respond to each packet, which can overwhelm it.

Impact: The server may become overwhelmed with the number of requests, leading to service disruption or unavailability.

# UDPLag:

Description: A UDPLag attack is a variant of the UDP flood, where the attacker sends a large volume of UDP packets specifically designed to cause latency in the target system's response. This can increase lag and delay in network communications.

Impact: This can lead to significant delays in network services, reducing performance and potentially causing service outages.