In [1]:
from torch import cuda, bfloat16
import transformers
from transformers import (
    pipeline,
    logging,
)
import pandas as pd
import os

model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# bnb_config = transformers.BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_quant_type='nf4',
#     bnb_4bit_use_double_quant=True,
#     bnb_4bit_compute_dtype=bfloat16
# )

hf_token = os.environ.get('hf_token')

# Need auth token for these
hf_auth = hf_token
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    # quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)
model.eval()
print(f"Model loaded on {device}")

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Model loaded on cuda:0


In [2]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "right"

In [4]:
logging.set_verbosity(logging.CRITICAL)

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=2056)
result = pipe(f"Using pybgpstream identify prefixes associated with ASN AS4766 that show high update frequency (announcements and withdrawals) within the period oct 28 13:00 to oct 28 13:15, 2024. Summarize the prefixes, update counts, and provide an assessment of their stability.")
print(result[0]['generated_text'])

Using pybgpstream identify prefixes associated with ASN AS4766 that show high update frequency (announcements and withdrawals) within the period oct 28 13:00 to oct 28 13:15, 2024. Summarize the prefixes, update counts, and provide an assessment of their stability. 

```python
import pybgpstream

# Create a BGP Stream reader
reader = pybgpstream.BGPStreamReader(
    'bgpstream://localhost:4739?start_time=2024-10-28T13:00:00&end_time=2024-10-28T13:15:00&as=4766&prefixes=',
    'json'
)

# Initialize a dictionary to store the prefixes and their update counts
prefixes = {}

# Iterate over the BGP stream
for record in reader:
    # Get the prefix and the update type (announcement or withdrawal)
    prefix = record.prefix
    update_type = record.update_type

    # If the prefix is not in the dictionary, add it with an update count of 1
    if prefix not in prefixes:
        prefixes[prefix] = {'announcements': 1, 'withdrawals': 1}
    # If the prefix is already in the dictionary, increment

In [5]:
logging.set_verbosity(logging.CRITICAL)

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=2056)
result = pipe(f"Using pybgpstream summarize the count of BGP update messages (announcements and withdrawals) for ASN AS4766 between oct 28 13:00 and oct 28 13:15, 2024. Provide total counts as well as breakdowns by 5-minute intervals.")
print(result[0]['generated_text'])

Using pybgpstream summarize the count of BGP update messages (announcements and withdrawals) for ASN AS4766 between oct 28 13:00 and oct 28 13:15, 2024. Provide total counts as well as breakdowns by 5-minute intervals. 

```python
import pybgpstream

# Define the time interval
start_time = '2024-10-28 13:00:00'
end_time = '2024-10-28 13:15:00'

# Define the ASN
asn = '4766'

# Create a BGP stream reader
reader = pybgpstream.BGPStreamReader()

# Define the query
query = pybgpstream.Query()
query.set_asn(asn)
query.set_start_time(start_time)
query.set_end_time(end_time)

# Add the query to the reader
reader.add_query(query)

# Read the stream
reader.read_next()

# Initialize counters
total_announcements = 0
total_withdrawals = 0

# Initialize a dictionary to store 5-minute interval counts
interval_counts = {}

# Loop through the stream
while reader.read_next():
    record = reader.get_record()
    if record.get_type() == 'update':
        total_announcements += 1
        interval_counts[

### BGP-LLaMA

In [6]:
from datetime import datetime
import pybgpstream

# Define time range and collectors
from_time = "2024-10-28 13:00:00"
until_time = "2024-10-28 13:15:00"

stream = pybgpstream.BGPStream(
    from_time=from_time,
    until_time=until_time,
    record_type="updates",
    collectors=["rrc00"]
)

# Initialize counters and data structures
announcements = {}
prefix_as_paths = {}

# Iterate over records and elements
for rec in stream.records():
    for elem in rec:
        elem_time = datetime.utcfromtimestamp(elem.time)
        elem_type = elem.type
        fields = elem.fields
        
        prefix = fields.get("prefix")
        
        if prefix is None:
            continue
        
        as_path_str = fields.get('as-path', "")
        as_path = as_path_str.split()
        
        target_asn = '4766'
        
        if target_asn not in as_path:
            continue
        
        peer_asn = elem.peer_asn
        collector = rec.collector
        
        # Filter and count announcements and withdrawals
        if elem_type == 'A':
            announcements[prefix] = announcements.get(prefix, []) + [as_path]
        elif elem_type == 'W':
            # Handle withdrawals similarly to announcements
            announcements[prefix] = announcements.get(prefix, []) + [as_path]

# Summarize AS paths for each prefix associated with ASN 4766
for prefix, as_paths in announcements.items():
    
    # Identify AS paths associated with ASN 4766
    as_4766_paths = [path for path in as_paths if '4766' in path]
    
    if not as_4766_paths:
        continue
    
    # Calculate minimum, maximum, and median AS path length
    min_length = min(len(path) for path in as_4766_paths)
    max_length = max(len(path) for path in as_4766_paths)
    median_length = sorted([len(path) for path in as_4766_paths])[len(as_4766_paths)//2]
    
    print(f"Prefix: {prefix}")
    print(f"Min AS path length: {min_length}")
    print(f"Max AS path length: {max_length}")
    print(f"Median AS path length: {median_length}\n")

    # Highlight significant path changes observed in BGP updates
    previous_path = None
    for i, path in enumerate(as_4766_paths):
        if i > 0 and path!= previous_path:
            print(f"Significant path change at update {i+1}: {previous_path} -> {path}")
        previous_path = path

Prefix: 14.55.70.0/23
Min AS path length: 6
Max AS path length: 6
Median AS path length: 6

Significant path change at update 3: ['24482', '141654', '55655', '9002', '4766', '38692'] -> ['202365', '206499', '34549', '174', '4766', '38692']
Significant path change at update 4: ['202365', '206499', '34549', '174', '4766', '38692'] -> ['24482', '141654', '55655', '9002', '4766', '38692']
Prefix: 14.55.72.0/23
Min AS path length: 6
Max AS path length: 6
Median AS path length: 6

Significant path change at update 3: ['24482', '141654', '55655', '9002', '4766', '38692'] -> ['202365', '206499', '34549', '174', '4766', '38692']
Significant path change at update 4: ['202365', '206499', '34549', '174', '4766', '38692'] -> ['24482', '141654', '55655', '9002', '4766', '38692']
Prefix: 1.34.184.0/24
Min AS path length: 7
Max AS path length: 7
Median AS path length: 7

Prefix: 211.52.3.0/24
Min AS path length: 5
Max AS path length: 5
Median AS path length: 5

Prefix: 220.67.208.0/24
Min AS path leng

Comparison of the Two Scripts Based on Syntax and Logic Errors

Llama Script
Syntax Analysis:

Imports and Initialization:

Correctly imports necessary modules: datetime and pybgpstream.
Initializes from_time and until_time with proper datetime strings.
Initializes pybgpstream.BGPStream with appropriate parameters:
from_time, until_time, record_type, and collectors are correctly specified.
Data Structures:

Uses dictionaries announcements and prefix_as_paths for data storage.
Employs standard Python data manipulation techniques without syntax errors.
Iteration and Data Extraction:

Correctly iterates over stream.records() and rec.
Extracts elem.fields, prefix, as-path, and other necessary fields without issues.
Uses fields.get() method safely to handle missing keys.
Overall Syntax:

The script has no syntax errors and adheres to Python's syntax rules.
Proper use of control structures (for loops, if statements).
Logic Analysis:

Filtering AS Paths:

Defines target_asn = '4766' to focus on ASN 4766.
Filters out elements where ASN 4766 is not in the as_path:
python
Copy code
if target_asn not in as_path:
    continue
This ensures only relevant AS paths are processed.
Data Collection:

Collects announcements and withdrawals, storing AS paths associated with each prefix:
python
Copy code
announcements[prefix] = announcements.get(prefix, []) + [as_path]
Handles both announcement ('A') and withdrawal ('W') types similarly.
AS Path Analysis:

For each prefix, filters AS paths containing ASN 4766:
python
Copy code
as_4766_paths = [path for path in as_paths if '4766' in path]
Calculates minimum, maximum, and median AS path lengths.
Detects significant path changes by comparing consecutive AS paths.
Logic Integrity:

The script logically processes BGP updates relevant to ASN 4766.
Accurately calculates statistics and identifies path changes.
Redundancy in filtering could be optimized, but it doesn't affect correctness.
Summary:

Strengths:

Correct syntax and logical flow.
Proper use of pybgpstream for data retrieval and filtering.
Effective data analysis relevant to network engineers.
Weaknesses:

Minor redundancy in filtering AS paths for ASN 4766.
GPT 4o Script
Syntax Analysis:

Imports and Initialization:

Imports datetime, pybgpstream, numpy, and defaultdict.
Issue: numpy is imported as np but not used in the script.
Attempts to initialize pybgpstream.BGPStream with incorrect parameters:
python
Copy code
stream = pybgpstream.BGPStream(
    project="routeviews",
    collector="route-views.sg",
    record_type="updates",
    filter=f"peer AS4766",
)
Errors:
project, collector, and filter are not valid parameters for the BGPStream constructor.
Correct parameters should be from_time, until_time, record_type, collectors, and data_interface.
Adding Filters:

Uses stream.add_interval_filter(start_time, end_time) correctly.
Adds a filter for peer ASN:
python
Copy code
stream.add_filter("peer-asn", "4766")
Issue: May not filter AS paths containing ASN 4766 but filters peers with ASN 4766.
Overall Syntax:

The script contains syntax errors due to incorrect constructor parameters.
Misuse of pybgpstream API leads to syntactical issues.
Logic Analysis:

Filtering Mechanism:

Misuses the peer-asn filter, which filters BGP peers rather than AS paths containing a specific ASN.
The intended filter should target AS paths containing ASN 4766.
Incorrectly initializes filters in the constructor rather than using add_filter.
Data Collection:

Attempts to collect AS paths and path lengths per prefix.
Uses defaultdict for organizing data, which is appropriate.
AS Path Analysis:

Stores unique AS paths and calculates path lengths.
However, due to incorrect filtering, the data may not correspond to AS paths containing ASN 4766.
Logic Integrity:

The incorrect filter leads to collecting irrelevant or incomplete data.
The analysis performed may not reflect the intended focus on ASN 4766.
Summary:

Strengths:

Uses defaultdict effectively for data organization.
Correctly processes records and elements from pybgpstream.
Weaknesses:

Critical Syntax Errors:
Incorrect parameters in the BGPStream constructor.
Logic Errors:
Misapplication of filters, leading to incorrect data being analyzed.
Unused Imports:
numpy is imported but not utilized.
Scoring Based on Coding Criteria
Scoring Criteria:

Syntax Errors: Correct use of programming language syntax and APIs.
Logic Errors: Correct implementation of algorithms and data processing to achieve the intended functionality.
Llama Script
Syntax Score: 10/10
No syntax errors; correct use of Python and pybgpstream APIs.
Logic Score: 9/10
Logic is sound and achieves intended functionality.
Minor redundancy in filtering could be optimized.
Total Score: 9.5/10
GPT 4o Script
Syntax Score: 4/10
Major syntax errors in the initialization of pybgpstream.BGPStream.
Incorrect use of parameters that do not exist in the constructor.
Unused imports (e.g., numpy).
Logic Score: 3/10
Incorrect filtering logic results in analyzing the wrong dataset.
The intended functionality (analyzing AS paths containing ASN 4766) is not achieved.
Total Score: 3.5/10
Conclusion
Llama Script:

Strengths:

Correctly filters and processes BGP updates related to ASN 4766.
Accurate computation of AS path statistics.
Suitable for network engineers needing detailed BGP routing analysis.
Recommendations:

Optimize by removing redundant filtering steps.
Ensure that all data structures are necessary and efficiently used.
GPT 4o Script:

Issues Identified:

Incorrect Initialization:
The BGPStream constructor is used improperly with invalid parameters.
Filter Misapplication:
The peer-asn filter does not achieve the intended filtering of AS paths containing ASN 4766.
Syntax Errors:
The script would fail to run due to incorrect API usage.
Unused Imports:
Unnecessary imports should be removed to clean up the code.
Recommendations:

Correct BGPStream Initialization:
python
Copy code
stream = pybgpstream.BGPStream(
    from_time=start_time,
    until_time=end_time,
    record_type="updates",
    collectors=["route-views.sg"]
)
Proper Filtering:
Since pybgpstream does not support filtering AS paths directly via add_filter, filter AS paths within the iteration:
python
Copy code
as_path = elem.fields.get("as-path", "")
if "4766" not in as_path.split():
    continue
Remove Unused Imports:
Delete import numpy as np unless it's used later in the code.
Final Remarks:

For Accurate BGP Analysis:

Ensure that the filters applied match the intended data to be analyzed.
Verify that all API calls conform to the library's documentation.
Code Maintenance:

Regularly review and test code to catch syntax and logic errors early.
Keep code clean by removing unnecessary imports and comments.
Overall Assessment:

The Llama script demonstrates a strong understanding of both Python programming and the pybgpstream library, making it a reliable tool for BGP data analysis.
The GPT 4o script needs significant revisions to correct syntax and logic errors before it can be effectively used for BGP analysis related to ASN 4766.

### Anomaly detection

In [8]:
import datetime
from pybgpstream import BGPStream
from collections import defaultdict
import re

def analyze_prefix_flapping(stream):
    from_time = "2024-10-28 13:00:00"
    until_time = "2024-10-28 13:15:00"
    stream = BGPStream(from_time=from_time, until_time=until_time, record_type="updates", collectors=["rrc00"])

    announcements = defaultdict(list)
    prefixes_with_flaps = defaultdict(int)

    for rec in stream.records():
        for elem in rec:
            elem_time = datetime.datetime.utcfromtimestamp(elem.time)
            elem_type = elem.type
            fields = elem.fields
            
            prefix = fields.get("prefix")
            peer_asn = elem.peer_asn
            collector = rec.collector
            
            if prefix is None:
                continue

            if peer_asn == 'AS4766':
                announcements[prefix].append((elem_time, elem_type))

    prefixes_to_report = []
    for prefix, announcement_list in announcements.items():
        prev_time = None
        prev_elem_type = None
        flaps = 0
        
        for i in range(0, len(announcement_list), 2):
            curr_time = announcement_list[i][0]
            curr_elem_type = announcement_list[i][1]

            if prev_time is not None and prev_elem_type == 'A' and curr_elem_type == 'W':
                flaps += 1
                
                time_diff = (curr_time - prev_time).total_seconds() / 60
                if flaps > 1:
                    prefixes_to_report.append({
                        'prefix': prefix,
                        'flaps': flaps,
                        'duration': time_diff
                    })

            prev_time = curr_time
            prev_elem_type = curr_elem_type
            
    return prefixes_to_report

report = analyze_prefix_flapping(BGPStream())
print(report)

[]
