In [1]:
import transformers
from transformers import (
    pipeline,
    logging,
)
from typing import List
import torch
from torch import cuda, bfloat16
from datasets import load_dataset
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
# import seaborn as sns
from pylab import rcParams
import os

In [2]:
model_id = 'hyonbokan/mobile_llama_10k'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'


# Need auth token for these
hf_auth = os.environ.get('hf_token')

model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    # quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)


model.eval()
print(f"Model loaded on {device}")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Model loaded on cuda:0


In [3]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "right"

Write a Python script using the Scapy library to perform packet-level anomaly detection for potential DDoS attacks. Implement a function detect_ddos that inspects each packet and tracks the number of packets received from each source IP address. If the number of packets from any source IP address exceeds a predefined threshold, print a message indicating a potential DDoS attack. 

In [None]:
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "generate python code script using scapy library for DDoS attack detection. the script should import raw pcap file and set threshold with the time window"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=512)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

In [None]:
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "Given a n3.pcap file containing network traffic data, create a Python script using Scapy to process the PCAP. The script should extract the source IP, destination IP, source port, destination port, and the raw payload data for each packet in the PCAP. Impletent try-except blocks for errors if expected layers IP, TCP/UDP, Raw. Store this data in a pandas DataFrame and display it."
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=712)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

In [None]:
import scapy.all as scapy
import pandas as pd

# Load the PCAP file
def load_pcap(file_path):
    try:
        packets = scapy.rdpcap(file_path)
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None
    return packets

# Extract packet information
def extract_packet_info(packets):
    packet_data = []
    for packet in packets:
        source_ip = packet[IP].src
        dest_ip = packet[IP].dst
        src_port = packet[TCP].sport if TCP in packet else 0
        dst_port = packet[TCP].dport if TCP in packet else 0
        payload = packet[Raw].load if Raw in packet else b''
        packet_data.append([source_ip, dest_ip, src_port, dst_port, payload])
    return packet_data

# Create a DataFrame from the packet data
def to_dataframe(packet_data):
    df = pd.DataFrame(packet_data, columns=['Source IP', 'Destination IP', 'Source Port', 'Destination Port', 'Raw Payload'])
    return df

# Main function to process the PCAP file
def process_pcap(file_path):
    packets = load_pcap(file_path)
    packet_data = extract_packet_info(packets)
    df = to_dataframe(packet_data)
    print(df.head())

# Specify the PCAP file path
process_pcap('/home/hb/5G_dataset/n3_example.pcap')

In [7]:
import pandas as pd
import scapy.all as scapy

# Specify the PCAP file path
pcap_file = '/home/hb/5G_dataset/n3_example.pcap'

try:
    packets = scapy.rdpcap(pcap_file)  # Read the PCAP file

    # Initialize an empty DataFrame
    df = pd.DataFrame(columns=['Source IP', 'Destination IP', 'Source Port', 'Destination Port', 'Raw Payload'])

    for packet in packets:
        # Extract the source IP and destination IP
        source_ip = packet[IP].src
        destination_ip = packet[IP].dst

        # Extract the source port and destination port
        source_port = packet[TCP].sport
        destination_port = packet[TCP].dport

        # Extract the raw payload data
        payload = packet.show()

        # Add the data to the DataFrame
        df = df.append({'Source IP': source_ip, 'Destination IP': destination_ip, 'Source Port': source_port, 'Destination Port': destination_port, 'Raw Payload': payload}, ignore_index=True)

    # Display the DataFrame
    print(df)

except Exception as e:
    print(f"An error occurred: {str(e)}")

An error occurred: Layer [TCP] not found


In [None]:
# Run text generation pipeline with our next model
prompt = "Generate Python code to calculate 5G network performance KPIs: Total Network Capacity, Capacity per Area, Capacity per Point, Cost per Capacity, Cost per Area, and Surplus per Area. Load data from '5G_Infrastructure/demand_driven_postcode_data_results.csv'. Use keywords: 'capacity', 'cost', 'area', 'numpoints' to identify relevant columns."
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=1024)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

In [None]:
# Run text generation pipeline with our next model
prompt = "Perform BGP analysis using PyBGPStream and detect anomalies in AS path lengths for IPv4 prefixes over two time periods: from January 15, 2020, 15:00 to January 15, 2020, 17:00, and January 18, 2020, 12:00 to January 18, 2020, 13:00."
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=718)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

<s>[INST] Perform BGP analysis using PyBGPStream and detect anomalies in AS path lengths for IPv4 prefixes over two time periods: from January 15, 2020, 15:00 to January 15, 2020, 17:00, and January 18, 2020, 12:00 to January 18, 2020, 13:00. [/INST]  Sure! Here's an example of how you could perform BGP analysis using PyBGPStream and detect anomalies in AS path lengths for IPv4 prefixes over two time periods:

1. Install PyBGPStream:
```
pip install pybgpstream
```
2. Initialize PyBGPStream:
```
stream = pybgpstream.BGPStream(
    from_time="2020-01-15 15:00:00",
    until_time="2020-01-15 17:00:00",
    collectors=["rrc00"],
    record_type="updates",
    filter="ipversion 4"
)
```
3. Initialize a list to store the AS path lengths:
```
as_path_lengths = []
```
4. Loop through the stream records:
```
for rec in stream.records():
    for elem in rec:
        if elem.type == "A":
            as_path = elem.fields["as-path"].split()
            as_path_length = len(as_path)
            as

# Examples of Pcap

In [None]:
import pandas as pd
from scapy.all import *

# Specify the PCAP file path
pcap_file = '5G_data/n3_example.pcap'

try:
    packets = rdpcap(pcap_file)  # Read the PCAP file
    print(f"Total packets in the capture: {len(packets)}\n")

    # Create a list to store packet data
    packet_data = []

    for packet in packets:
        src_ip, dst_ip, src_port, dst_port, raw_data = '', '', '', '', ''
        
        if IP in packet:
            print("IP found")
            # Extract IP information
            src_ip = packet[IP].src
            dst_ip = packet[IP].dst

        if TCP in packet:
            print("TCP found")
            # Extract TCP information
            src_port = packet[TCP].sport
            dst_port = packet[TCP].dport

        if Raw in packet:
            print("Raw found")
            # Extract and append raw data (payload)
            raw_data = packet[Raw].load
        packet_data.append([src_ip, dst_ip, src_port, dst_port, raw_data])
    
    # Create a DataFrame from the packet data
    df = pd.DataFrame(packet_data, columns=['Source IP', 'Destination IP', 'Source Port', 'Destination Port', 'Raw Data'])
    
    # Display the DataFrame
    display(df)

except Exception as e:
    print(f"An error occurred: {str(e)}")

In [2]:
import pandas as pd
from scapy.all import *

# Specify the PCAP file path
pcap_file = '/home/hb/5G_dataset/n3_example.pcap'

try:
    packets = rdpcap(pcap_file)  # Read the PCAP file
    print(f"Total packets in the capture: {len(packets)}\n")

    # Create a list to store packet data
    packet_data = []

    for packet in packets:
        # Initialize all fields to extract
        timestamp = packet.time
        src_ip, dst_ip, src_port, dst_port, proto, length = '', '', '', '', '', len(packet)
        src_mac, dst_mac, raw_data = '', '', ''
        pkt_type = type(packet)
        
        # Ethernet Layer
        if Ether in packet:
            src_mac = packet[Ether].src
            dst_mac = packet[Ether].dst

        # IP Layer
        if IP in packet:
            src_ip = packet[IP].src
            dst_ip = packet[IP].dst
            proto = packet[IP].proto

        # Transport Layer
        if TCP in packet:
            src_port = packet[TCP].sport
            dst_port = packet[TCP].dport
        elif UDP in packet:
            src_port = packet[UDP].sport
            dst_port = packet[UDP].dport
        elif ICMP in packet:
            src_port = 'N/A'
            dst_port = 'N/A'
            icmp_type = packet[ICMP].type
            icmp_code = packet[ICMP].code
        
        # Raw Payload
        if Raw in packet:
            raw_data = packet[Raw].load

        packet_data.append([timestamp, src_mac, dst_mac, src_ip, dst_ip, src_port, dst_port, proto, pkt_type, length, raw_data])
    
    # Create a DataFrame from the packet data
    df = pd.DataFrame(packet_data, columns=['Timestamp', 'Source MAC', 'Destination MAC', 'Source IP', 'Destination IP', 
                                            'Source Port', 'Destination Port', 'Protocol', 'Packet Type', 'Length', 'Raw Data'])
    
    # Display the DataFrame
    display(df)

except Exception as e:
    print(f"An error occurred: {str(e)}")


Total packets in the capture: 92



Unnamed: 0,Timestamp,Source MAC,Destination MAC,Source IP,Destination IP,Source Port,Destination Port,Protocol,Packet Type,Length,Raw Data
0,1615905754.602072,52:54:00:e2:36:87,40:de:ad:d9:ed:d9,172.16.12.2,10.200.11.70,2152,2152,17,<class 'scapy.layers.l2.Ether'>,936,b'4\xff\x03v@\x02\x01%\x00\x00\x00\x85\x01\x10...
1,1615905754.603009,52:54:00:e2:36:87,40:de:ad:d9:ed:d9,172.16.12.2,10.200.11.70,2152,2152,17,<class 'scapy.layers.l2.Ether'>,936,b'4\xff\x03v@\x02\x01%RT\x00\x85\x01\x10\x05\x...
2,1615905754.604011,52:54:00:e2:36:87,40:de:ad:d9:ed:d9,172.16.12.2,10.200.11.70,2152,2152,17,<class 'scapy.layers.l2.Ether'>,936,b'4\xff\x03v@\x02\x01%RT\x00\x85\x01\x10\x05\x...
3,1615905754.605010,52:54:00:e2:36:87,40:de:ad:d9:ed:d9,172.16.12.2,10.200.11.70,2152,2152,17,<class 'scapy.layers.l2.Ether'>,936,b'4\xff\x03v@\x02\x01%\x00\x00\x00\x85\x01\x10...
4,1615905754.606004,52:54:00:e2:36:87,40:de:ad:d9:ed:d9,172.16.12.2,10.200.11.70,2152,2152,17,<class 'scapy.layers.l2.Ether'>,936,b'4\xff\x03v@\x02\x01%\x00\x00\x00\x85\x01\x10...
...,...,...,...,...,...,...,...,...,...,...,...
87,1615905754.653013,52:54:00:e2:36:87,40:de:ad:d9:ed:d9,172.16.12.2,10.200.11.70,2152,2152,17,<class 'scapy.layers.l2.Ether'>,936,b'4\xff\x03v@\x02\x01%\x07\x17\x00\x85\x01\x10...
88,1615905754.653530,40:de:ad:d9:ed:d9,52:54:00:e2:36:87,10.200.11.70,172.16.12.2,61458,2152,17,<class 'scapy.layers.l2.Ether'>,936,b'4\xff\x03v\x00\x00\x00\x01\x00\x00\x00\x85\x...
89,1615905754.654010,52:54:00:e2:36:87,40:de:ad:d9:ed:d9,172.16.12.2,10.200.11.70,2152,2152,17,<class 'scapy.layers.l2.Ether'>,936,b'4\xff\x03v@\x02\x01%\x00\x00\x00\x85\x01\x10...
90,1615905754.654535,40:de:ad:d9:ed:d9,52:54:00:e2:36:87,10.200.11.70,172.16.12.2,61458,2152,17,<class 'scapy.layers.l2.Ether'>,936,b'4\xff\x03v\x00\x00\x00\x01\x00\x00\x00\x85\x...


# DDoS

### GPT-3.5

In [None]:
import sys
from scapy.all import *
from collections import defaultdict
import time

pcap_file = '/home/hb/5G_dataset/n3_example.pcap'

def detect_ddos(pcap_file, threshold=100, window_size=10):
    try:
        packets = rdpcap(pcap_file)
    except Exception as e:
        print(f"Failed to read PCAP file: {e}")
        sys.exit(1)

    # Initialize variables
    start_time = None
    packet_count = defaultdict(int)

    # Process packets
    for packet in packets:
        print(packet_count)
        if not start_time:
            start_time = packet.time
        
        current_time = packet.time
        elapsed_time = current_time - start_time

        if elapsed_time > window_size:
            # Check and log potential attacks
            for src_ip, count in packet_count.items():
                pps = count / window_size
                if pps > threshold:
                    print(f"Potential DDoS attack from {src_ip} with {pps:.2f} pps")

            # Reset for next time window
            start_time = current_time
            packet_count.clear()

        if IP in packet:
            packet_count[packet[IP].src] += 1

    # Final check for the last window
    for src_ip, count in packet_count.items():
        pps = count / max(elapsed_time, 1)  # prevent division by zero
        if pps > threshold:
            print(f"Potential DDoS attack from {src_ip} with {pps:.2f} pps")


detect_ddos(pcap_file)


In [2]:
from scapy.all import *
from collections import defaultdict
from datetime import datetime, timedelta

threshold = 100
monitoring_interval = 10

# Dictionary to store packet counts for each source IP
packet_counts = defaultdict(int)

# Function to check for potential DDoS attacks
def check_ddos():
    current_time = datetime.now()
    start_time = current_time - timedelta(seconds=monitoring_interval)
    print(f"Monitoring interval: {start_time} to {current_time}")
    
    # Calculate packets per second for each source IP
    for ip, count in packet_counts.items():
        pps = count / monitoring_interval
        if pps > threshold:
            print(f"Potential DDoS attack detected from {ip}: {pps} packets per second")

    # Reset packet counts for next monitoring interval
    packet_counts.clear()

# Packet sniffing callback function
def packet_callback(packet):
    if 'IP' in packet:
        src_ip = packet['IP'].src
        packet_counts[src_ip] += 1

# Function to read from a PCAP file
def read_pcap(file_path):
    packets = rdpcap(file_path)
    for packet in packets:
        packet_callback(packet)
    check_ddos()

# Example usage
read_pcap('/home/hb/5G_dataset/n3_example.pcap')

Monitoring interval: 2024-04-17 10:27:59.914739 to 2024-04-17 10:28:09.914739
