In [1]:
import transformers
from transformers import (
    pipeline,
    logging,
)
from typing import List
import torch
from torch import cuda, bfloat16
from datasets import load_dataset
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
# import seaborn as sns
from pylab import rcParams
import os

In [2]:
model_id = 'hyonbokan/BGPStream13-10k-cutoff-1024-max-2048'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# bnb_config = transformers.BitsAndBytesConfig(
#     load_in_8bit=True,
# )

# Need auth token for these
hf_auth = os.environ.get('hf_token')

model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    # quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)


model.eval()
print(f"Model loaded on {device}")

Downloading config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

Downloading (…)model.bin.index.json:   0%|          | 0.00/33.4k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00003.bin:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00003.bin:   0%|          | 0.00/9.90G [00:00<?, ?B/s]

Downloading (…)l-00003-of-00003.bin:   0%|          | 0.00/6.18G [00:00<?, ?B/s]

The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Model loaded on cuda:0


In [3]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

tokenizer.pad_token = tokenizer.eos_token
# tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "right"

Downloading tokenizer_config.json:   0%|          | 0.00/1.57k [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/437 [00:00<?, ?B/s]

# Model: BGPStream13-10k-cutoff-1024-max-2048

## Task 1: Analysis of BGP Table Statistics
Use PyBGPStream to analyze the BGP table statistics for AS131072, focusing on the characteristics of advertisements. Print the metrics evaluating the degree of similarity of the AS paths of these specific advertisements to their encompassing aggregate.

In [None]:
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "Use PyBGPStream to analyze the BGP table statistics for AS131072, focusing on the characteristics of advertisements. Print the metrics evaluating the degree of similarity of the AS paths of these specific advertisements to their encompassing aggregate."
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=718)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

In [1]:
from pybgpstream import BGPStream

# Initialize BGPStream
stream = BGPStream(
    from_time="2021-01-01 00:00:00", until_time="2021-01-02 01:00:00",
    # collectors=["route-views"], # specify the collectors
    record_type="updates"
)

# Dictionary to store the AS paths and their corresponding aggregates
as_paths = {}

# Loop through the stream records
for rec in stream.records():
    for elem in rec:
        if elem.type == "A":
            as_path = elem.fields["as-path"].split()
            origin_as = as_path[-1]
            if origin_as == "131072":
                # Store the AS path and its corresponding aggregate
                as_paths[elem.fields["prefix"]] = elem.fields["as-path"]

# Calculate the degree of similarity of the AS paths
for prefix, as_path in as_paths.items():
    # Calculate the degree of similarity
    degree_of_similarity = len(set(as_path.split())) / len(as_path.split())
    print(f"Prefix: {prefix}, Degree of Similarity: {degree_of_similarity}")

## Task 2: Peering Relationships Analysis
Use PyBGPStream to examine the list of AS numbers that AS131072 peers with, including any known policy attributes or tags. Determine the types of peering relationships (transit, peer, customer) present and discuss the implications of these relationships for AS131072's network reachability and performance. Highlight how these peering arrangements influence traffic flow and the strategic importance of each peering type in the broader network ecosystem.

In [6]:
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "Use PyBGPStream to examine the list of AS numbers that AS131072 peers with, including any known policy attributes or tags. Determine the types of peering relationships (transit, peer, customer) present and discuss the implications of these relationships for AS131072's network reachability and performance. Highlight how these peering arrangements influence traffic flow and the strategic importance of each peering type in the broader network ecosystem."
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=718)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

<s>[INST] Use PyBGPStream to examine the list of AS numbers that AS131072 peers with, including any known policy attributes or tags. Determine the types of peering relationships (transit, peer, customer) present and discuss the implications of these relationships for AS131072's network reachability and performance. Highlight how these peering arrangements influence traffic flow and the strategic importance of each peering type in the broader network ecosystem. [/INST]  To examine the list of AS numbers that AS131072 peers with, including any known policy attributes or tags, and to determine the types of peering relationships present, we can use PyBGPStream to collect BGP data from the RouteViews collector. We can then use the BGPStream library to analyze the data and extract the required information.

Here is an example of how to do this:
```
import pybgpstream

# Initialize BGPStream
stream = pybgpstream.BGPStream(
    from_time="2023-03-10 00:00:00", until_time="2023-03-10 23:59:59",

In [None]:
import pybgpstream

# Initialize BGPStream
stream = pybgpstream.BGPStream(
    from_time="2023-03-10 00:00:00", until_time="2023-03-10 23:59:59",
    collectors=["route-views"],
    record_type="updates"
)

# Collect data
for rec in stream.records():
    for elem in rec:
        if elem.type == "A":
            as_path = elem.fields["as-path"].split()
            origin_as = as_path[-1]
            if origin_as == "131072":
                print(f"Peer ASN: {elem.peer_asn}, Policy Attributes: {elem.policy_attributes}, Tag: {elem.tag}")

## Task 3: Identification of Anomalies
Use PyBGPStream to identify and analyze any anomalies in the BGP announcements from AS131072 over a specified period. Look for unusual patterns such as sudden increases in the number of prefixes, withdrawal of significant routes, or shifts in AS_PATHs and alert if such anomalies are detected.

In [7]:
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "Use PyBGPStream to identify and analyze any anomalies in the BGP announcements from AS131072 over a specified period. Look for unusual patterns such as sudden increases in the number of prefixes, withdrawal of significant routes, or shifts in AS_PATHs and alert if such anomalies are detected."
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=718)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

<s>[INST] Use PyBGPStream to identify and analyze any anomalies in the BGP announcements from AS131072 over a specified period. Look for unusual patterns such as sudden increases in the number of prefixes, withdrawal of significant routes, or shifts in AS_PATHs and alert if such anomalies are detected. [/INST]  Sure! Here's an example code that uses PyBGPStream to identify and analyze any anomalies in the BGP announcements from AS131072 over a specified period:
```
import pybgpstream

# Initialize the BGPStream
stream = pybgpstream.BGPStream(
    from_time="2021-01-01 00:00:00", until_time="2021-01-01 01:00:00 UTC",
    collectors=["route-views.jinx"],
    record_type="updates",
    filter="peer 131072"
)

# Loop through the stream records
for rec in stream.records():
    for elem in rec:
        # Check for unusual patterns
        if elem.type == "A":
            as_path = elem.fields["as-path"].split()
            prefix = elem.fields["prefix"]
            if len(as_path) > 10 or le

In [None]:
import pybgpstream

# Initialize the BGPStream
stream = pybgpstream.BGPStream(
    from_time="2021-01-01 00:00:00", until_time="2021-01-02 01:00:00 UTC",
    collectors=["route-views.jinx"],
    record_type="updates",
    filter="peer 131072"
)

# Loop through the stream records
for rec in stream.records():
    for elem in rec:
        # Check for unusual patterns
        if elem.type == "A":
            as_path = elem.fields["as-path"].split()
            prefix = elem.fields["prefix"]
            if len(as_path) > 10 or len(as_path) < 2:
                print(f"Anomaly detected! AS Path length for prefix {prefix} is {len(as_path)}")
            elif elem.fields["withdrawn"]:
                print(f"Anomaly detected! Prefix {prefix} withdrawn")
            elif elem.fields["as-path"]!= elem.fields["prev-as-path"]:
                print(f"Anomaly detected! AS Path shift for prefix {prefix}")

# BGP Path Changes
Use PyBGPStream to review a series of BGP update messages that show alterations in the AS_PATH attribute for key routes managed by AS131072.

# Knowledge Test

In [6]:
import json
import re

input_file_path = "/home/hb/LLM-research/evaluation/BGP/bgp_test_multiple_choice_v2.json" 

with open(input_file_path, "r") as input_file:
    questions_data = json.load(input_file)

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=128)

generated_answers = []
ground_truth = [q_dict["answer"] for q_dict in questions_data]
n_question = 0

for q_dict in questions_data:
    question = q_dict["question"]
    options = q_dict["options"]
    
    # Construct prompt with question and options
    prompt = f"Choose the correct answer: {question}\n" + "\n".join(options)
    # print(prompt)
    
    # Generate answer using the language model
    generated_answer = pipe(prompt)[0]['generated_text']
    print(f"Generated Answer: {generated_answer}")
    
    # Parse the generated answer
    answer_lines = generated_answer.splitlines()
    parsed_answer = ""
    for line in answer_lines:
            if line.startswith("Answer: ") or line.startswith("Correct answer: "):
                parsed_answer = re.search(r'\b(\w)\)', line).group(1)
                print(f"Parsed_answer: {parsed_answer}")
                break
    generated_answers.append(parsed_answer)
    n_question += 1
    print(f"----------------{n_question}-----------------")
print(generated_answers)


total_questions = len(questions_data)
correct_answers = 0

for ground_answer, generated_answer in zip(ground_truth, generated_answers):
    if generated_answer.lower() == ground_answer.lower():
        print("Correct")
        correct_answers += 1
    else:
        print(f"GPT answer: {generated_answer}. Correct answer: {ground_answer}")
        print("Incorrect")

accuracy = (correct_answers / total_questions) * 100

print(f"Accuracy: {accuracy:.2f}%")
print(f"Correct answers: {correct_answers}")
print(f"Incorrect answers: {total_questions - correct_answers}")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: When analyzing BGP route propagation, which attribute provides information about the path that the BGP update has traversed?
a) AS-Path
b) MED (Multi-Exit Discriminator)
c) Local Preference
d) Next Hop

Answer: a) AS-Path

Explanation: The AS-Path attribute is used to track the path that a BGP update has traversed. It is a list of Autonomous System (AS) numbers that the update has passed through. This attribute helps in identifying the path that the update has
Parsed_answer: a
----------------1-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: Which of the following is a primary purpose of BGP?
a) Establishing communication between devices within a LAN
b) Exchanging routing information between autonomous systems
c) Optimizing local network traffic
d) Assigning IP addresses to devices

Answer: b) Exchanging routing information between autonomous systems.

Explanation: BGP (Border Gateway Protocol) is a protocol used to exchange routing information between autonomous systems. It is used to establish communication between devices within a LAN, optimize local network traffic, and assign IP
Parsed_answer: b
----------------2-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: What is the main function of the AS-Path attribute in BGP?
a) Identifying the IP address of the next hop
b) Preventing route hijacking
c) Specifying the exit point from an AS
d) Tracking the ASes a route has traversed

Answer: d) Tracking the ASes a route has traversed. The AS-Path attribute is used to track the ASes a route has traversed, allowing BGP routers to determine the best path to a destination. It is not used to identify the IP address of the next
Parsed_answer: d
----------------3-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: Which BGP attribute indicates the preferred exit point from an AS?
a) ORIGIN
b) LOCAL-PREF
c) NEXT-HOP
d) AS-PATH

Answer: c) NEXT-HOP

Explanation: The NEXT-HOP attribute indicates the IP address of the next-hop router for a particular route. This attribute is used to determine the preferred exit point from an Autonomous System (AS). The NEXT-HOP attribute is used by BGP routers to determine the best path to a
Parsed_answer: c
----------------4-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: Which BGP message is used to establish a BGP session?
a) KEEPALIVE
b) OPEN
c) UPDATE
d) NOTIFICATION

Answer: b) OPEN

Explanation: The OPEN message is used to establish a BGP session between two BGP peers. It is sent by the initiator of the session and contains information about the BGP version, the BGP identifier, and the BGP hold timer. The OPEN message is used to establish a BGP session and is the first message exchanged between two B
Parsed_answer: b
----------------5-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: What is the primary purpose of the BGP NEXT-HOP attribute?
a) Specifies the IP address of the sender
b) Indicates the preferred exit point from an AS
c) Defines the next router in the path
d) Specifies the route's origin

Answer: c) Defines the next router in the path

Explanation: The BGP NEXT-HOP attribute is used to specify the next router in the path of a BGP route. It is used to indicate the IP address of the next router in the path,
Parsed_answer: c
----------------6-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: Which BGP message is used to advertise new or withdrawn routes?
a) KEEPALIVE
b) OPEN
c) UPDATE
d) NOTIFICATION

Answer: c) UPDATE

Explanation: The UPDATE message is used to advertise new or withdrawn routes. It is sent by a router to its peers to inform them of changes in the network. The KEEPALIVE message is used to maintain the connection between BGP peers, the OPEN message is used to establish a BGP session, and the NOTIF
Parsed_answer: c
----------------7-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: What is the purpose of BGP route aggregation?
a) Breaking down large routes into smaller subnets
b) Combining multiple routes into a single summarized route
c) Assigning priority to specific routes
d) Applying policies to routes

Answer: b) Combining multiple routes into a single summarized route.

Explanation: BGP route aggregation is the process of combining multiple routes into a single summarized route. This helps reduce the size of the routing table and improve network performance. By combining multiple routes into a single summarized
Parsed_answer: b
----------------8-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: What is the term for maliciously advertising IP prefixes an AS doesn't own?
a) BGP Route Hijacking
b) BGP Anomaly Detection
c) BGP Route Flap
d) BGP Route Aggregation

Answer: a) BGP Route Hijacking.

Explanation: BGP Route Hijacking is the term used to describe the malicious advertisement of IP prefixes an AS does not own. This can be done by an attacker to redirect traffic away from its intended destination, or
Parsed_answer: a
----------------9-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: BGP Prefix Origin Validation (ROV) is primarily aimed at preventing:
a) BGP route damping
b) BGP session establishment
c) BGP route leaks and hijacks
d) BGP route aggregation

Answer: c) BGP route leaks and hijacks.

Explanation: BGP Prefix Origin Validation (ROV) is a security feature that helps prevent route leaks and hijacks by verifying the origin of BGP routes. It ensures that routes are not propagated from an unexpected source
Parsed_answer: c
----------------10-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: Which BGP attribute is used to group routes into categories?
a) LOCAL-PREF
b) MED
c) AS-PATH
d) COMMUNITY

Answer: d) COMMUNITY

Explanation: The COMMUNITY attribute is used to group routes into categories. It is used to identify routes that are part of a specific community, such as a specific AS or a specific group of ASes. This attribute is used to influence route selection and to control the propagation of routes within a network.

Note: The other
Parsed_answer: d
----------------11-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: BGP confederations are used to:
a) Optimize local network traffic
b) Divide an AS into sub-ASes for scalability
c) Group routes into categories
d) Handle BGP session establishment

Answer: b) Divide an AS into sub-ASes for scalability. BGP confederations are used to divide an Autonomous System (AS) into sub-ASes, allowing for better scalability and improved performance. This is done by grouping multiple ASes into a single confederation, allowing them to share a single BGP
Parsed_answer: b
----------------12-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: Which BGP attribute is used to influence route selection by assigning a value to the route?
a) ORIGIN
b) MED
c) AS-PATH
d) LOCAL-PREF

Answer: d) LOCAL-PREF

Explanation: The LOCAL-PREF attribute is used to influence route selection by assigning a value to the route. This attribute is used to indicate the preference of a route within a single autonomous system. The higher the value, the more preferred the route is. This attribute is used to influence route selection
Parsed_answer: d
----------------13-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: What is the purpose of the BGP NOTIFICATION message?
a) Establish a BGP session
b) Advertise new routes
c) Signal an error or event in the BGP session
d) Request a route refresh

Answer: c) Signal an error or event in the BGP session.

Explanation: The BGP NOTIFICATION message is used to signal an error or event in the BGP session. It is used to inform the other BGP peer of an error or event that has occurred in the session, such as a
Parsed_answer: c
----------------14-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: What is the primary purpose of the BGP Community attribute?
a) Indicate the preferred exit point
b) Prevent route flapping
c) Define route policies based on grouping
d) Determine the AS-Path length

Answer: c) Define route policies based on grouping. The BGP Community attribute is used to group routes and define policies based on those groups. It allows for more granular control over route selection and can be used to influence the path taken by traffic. The other options are incorrect because the primary purpose of the BGP Community attribute is not to indicate the
Parsed_answer: c
----------------15-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: BGP Route Reflectors are used to address the scalability issues associated with:
a) Excessive route dampening
b) Route summarization
c) Full mesh BGP peering
d) BGP prefix hijacking

Answer: b) Route summarization. BGP Route Reflectors are used to address the scalability issues associated with route summarization. They reduce the number of BGP updates by summarizing routes and propagating them to other BGP peers. This helps to reduce the number of BGP updates and improve scalability.

Question
Parsed_answer: b
----------------16-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: What is the primary purpose of the BGP Path Attribute?
a) Identify the exit point from an AS
b) Indicate the origin of a route
c) Determine the optimal route
d) Filter BGP routes

Answer: c) Determine the optimal route

Explanation: The primary purpose of the BGP Path Attribute is to determine the optimal route for a given destination. It contains information about the route, such as the origin AS, the AS path, the next hop, and the local preference. This information is used by BGP routers
Parsed_answer: c
----------------17-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: BGP Prefix Filtering is used to:
a) Prevent route hijacking
b) Optimize route propagation
c) Establish BGP sessions
d) Calculate BGP AS-Paths

Answer: a) Prevent route hijacking

Explanation: BGP Prefix Filtering is used to prevent route hijacking by allowing an administrator to filter out routes that are not desired. This is done by configuring a prefix list that specifies which routes should be accepted or rejected. By using prefix filtering, an administrator can prevent
Parsed_answer: a
----------------18-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: In BGP, what is the primary purpose of the ORIGIN attribute?
a) Identify the exit point from an AS
b) Determine the preferred route within an AS
c) Indicate the origin of a route
d) Assign values for path selection

Answer: c) Indicate the origin of a route. The ORIGIN attribute is used to indicate the origin of a route, such as an IGP or an EGP. It is used to identify the source of a route and to determine the preferred route within an AS. It is not used to
Parsed_answer: c
----------------19-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: What does the BGP Maximum Prefix Limit control?
a) The number of BGP routes that can be aggregated
b) The maximum length of the AS-Path attribute
c) The number of prefixes a BGP router will accept
d) The BGP route redistribution process

Answer: c) The number of prefixes a BGP router will accept. The BGP Maximum Prefix Limit controls the number of prefixes a BGP router will accept from its peers. This limit is used to prevent the router from accepting too many prefixes
Parsed_answer: c
----------------20-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: What is the purpose of the BGP Large Communities attribute?
a) Determine the optimal route path
b) Prevent BGP session timeouts
c) Provide more efficient route summarization
d) Group routes into categories based on policies

Answer: c) Provide more efficient route summarization. The BGP Large Communities attribute is used to group routes into categories based on policies, allowing for more efficient route summarization. It is not used to determine the optimal route path, prevent BGP session timeouts, or group routes into categories based on policies.
Parsed_answer: c
----------------21-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: In BGP, what is the significance of the BGP Weight attribute?
a) It determines the exit point from an AS
b) It influences the route selection within an AS
c) It specifies the BGP session timeout value
d) It defines the route origin

Answer: b) It influences the route selection within an AS

Explanation: The BGP Weight attribute is used to influence the route selection within an Autonomous System (AS). It is used to prioritize routes within an AS, allowing the network administrator to control the
Parsed_answer: b
----------------22-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: How does the BGP AS Confederation help address the full mesh peering challenge?
a) It reduces the number of AS-Path segments
b) It introduces hierarchical routing within a single AS
c) It increases the number of BGP sessions between routers
d) It optimizes the BGP Weight attribute

Answer: b) It introduces hierarchical routing within a single AS.

Explanation: BGP AS Confederation is a technique used to address the full mesh peering challenge by introducing hierarchical routing within a single AS.
Parsed_answer: b
----------------23-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: What is BGPsec and how does it enhance BGP security?
a) BGPsec is a protocol for secure BGP session establishment
b) BGPsec is an extension to BGP that adds cryptographic verification to BGP route announcements
c) BGPsec is a mechanism for prefix aggregation in BGP
d) BGPsec is used to prevent BGP route leaks

Answer: b) BGPsec is an extension to BGP that adds cryptographic verification to BGP route announcements. BGPsec is a protocol that
Parsed_answer: b
----------------24-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: When is the BGP Extended Community attribute used, and what information does it convey?
a) It is used for route summarization within a single AS
b) It conveys policy information and inter-domain actions associated with a route
c) It determines the local preference of a BGP route
d) It specifies the AS-Path segment of a route

Answer: b) It conveys policy information and inter-domain actions associated with a route.

Explanation: The BGP Extended Community attribute is used to convey policy information and inter-
Parsed_answer: b
----------------25-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Input length of input_ids is 141, but `max_length` is set to 128. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.


Generated Answer: Choose the correct answer: What is the concept of BGP Flowspec, and how does it enhance BGP routing control?
a) BGP Flowspec is a mechanism to advertise real-time network traffic statistics
b) BGP Flowspec is used for route aggregation in large networks
c) BGP Flowspec allows the dissemination of traffic filtering rules using BGP updates
d) BGP Flowspec determines the AS-Path of a route

Answer: c) BGP Flowspec allows the dissemination of traffic filtering rules using BGP updates. BGP Flowspec is a
Parsed_answer: c
----------------26-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: Explain the difference between BGP Route Reflection and BGP Confederations.
a) BGP Route Reflection optimizes BGP sessions within an AS, while BGP Confederations divide an AS into smaller sub-ASes for scalability
b) BGP Route Reflection is used to group BGP routes, while BGP Confederations enhance BGP security
c) BGP Route Reflection is a method to prevent BGP route hijacking, while BGP Confederations manage BGP community attributes
d) BGP Route Reflection establishes BGP sessions between ASes, while BGP Confederations control the BGP Weight attribute

----------------27-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: What is BGP Route Damping, and how does it impact BGP routing stability?
a) BGP Route Damping is a mechanism to prevent BGP prefix aggregation, ensuring route granularity
b) BGP Route Damping is a technique to mitigate the impact of BGP prefix hijacking
c) BGP Route Damping is used to reduce the rate of BGP route updates in response to route flapping
d) BGP Route Damping is a security feature that encrypts BGP updates to prevent eavesdropping

----------------28-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Input length of input_ids is 133, but `max_length` is set to 128. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.


Generated Answer: Choose the correct answer: How does BGP Prefix Hijacking occur, and what are the potential consequences?
a) BGP Prefix Hijacking occurs when BGP sessions are established without proper authentication, leading to data leaks
b) BGP Prefix Hijacking involves advertising IP prefixes an AS doesn't own, potentially redirecting traffic to unauthorized paths
c) BGP Prefix Hijacking is a result of BGP session timeouts causing route instability
d) BGP Prefix Hijacking is a technique to achieve BGP route summarization


----------------29-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: What is the purpose of the BGP RPKI (Resource Public Key Infrastructure) framework, and how does it enhance BGP security?
a) BGP RPKI is used for route reflection in BGP Confederations, enhancing scalability
b) BGP RPKI provides cryptographic verification of BGP route announcements, preventing prefix hijacking
c) BGP RPKI optimizes the BGP Local Preference attribute for route selection
d) BGP RPKI is a mechanism for BGP prefix aggregation, reducing the number of routes exchanged

----------------30-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Input length of input_ids is 140, but `max_length` is set to 128. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.


Generated Answer: Choose the correct answer: Describe the purpose of the BGP FlowSpec Redirect action and how it affects network traffic.
a) BGP FlowSpec Redirect is used to filter incoming BGP routes based on their AS-Path
b) BGP FlowSpec Redirect redirects incoming network traffic matching specified criteria to a designated next-hop
c) BGP FlowSpec Redirect increases the BGP session timeout value, reducing BGP convergence time
d) BGP FlowSpec Redirect optimizes the BGP Weight attribute for route selection

Answer: b) BGP FlowSpec Redirect redirects
Parsed_answer: b
----------------31-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: What is the difference between BGP Route Refresh and BGP Full Withdraw?
a) BGP Route Refresh is used to advertise new routes, while BGP Full Withdraw indicates route errors
b) BGP Route Refresh is a mechanism for re-advertising BGP routes, while BGP Full Withdraw removes all prefixes associated with a peer
c) BGP Route Refresh is used for BGP prefix aggregation, while BGP Full Withdraw triggers route convergence
d) BGP Route Refresh optimizes the BGP LOCAL-PREF attribute, while BGP Full Withdraw prevents route flapping

----------------32-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Input length of input_ids is 128, but `max_length` is set to 128. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.


Generated Answer: Choose the correct answer: How does the BGP Add-Path feature enhance BGP route selection and convergence?
a) BGP Add-Path enables BGP routers to advertise multiple alternate paths for a given prefix
b) BGP Add-Path decreases the BGP session establishment time, enhancing route convergence
c) BGP Add-Path optimizes the BGP Weight attribute for route selection
d) BGP Add-Path prevents BGP prefix hijacking by adding cryptographic signatures to BGP updates

Answer: a) BGP Add-Path enables BGP rou
Parsed_answer: a
----------------33-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer: What are BGP Route Reflection Clusters, and how do they contribute to BGP scalability?
a) BGP Route Reflection Clusters are used to enhance BGP security by encrypting BGP updates
b) BGP Route Reflection Clusters group routers within an AS, reducing the number of BGP sessions needed for route reflection
c) BGP Route Reflection Clusters prevent BGP prefix hijacking by using cryptographic verification
d) BGP Route Reflection Clusters optimize the BGP MED attribute for efficient route selection

----------------34-----------------
Generated Answer: Choose the correct answer: Explain the concept of BGP PIC (Prefix Independent Convergence) and its significance in enhancing network reliability.
a) BGP PIC ensures that BGP routers converge to the same routing table in the event of network failures
b) BGP PIC introduces route summarization to improve BGP scalability
c) BGP PIC reduces the BGP session establishment time, enhancing route convergence
d)

In [None]:
import json
import re

input_file_path = "/home/hb/LLM-research/evaluation/bgp_test_fill_the_blank.json" 

with open(input_file_path, "r") as input_file:
    questions_data = json.load(input_file)

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=128)

generated_answers_fb = []
filled_answers = []

n_question = 0
for q_dict in questions_data:
    question = q_dict["question"]
    answer = q_dict["answer"]

    # Generate answer using the language model
    prompt = f"[INST]{question} \n Fill the blank: \n [/INST]"
    generated_answer = pipe(prompt)[0]['generated_text']
    print(f"Output: {generated_answer}")
    generated_answers_fb.append(generated_answer)
    
    generated_answer = re.search(r'\[/INST\]\s+(.*)', generated_answer).group(1).strip()
    if "Sure" in generated_answer:
        generated_lines = generated_answer.split('\n')
        if len(generated_lines) > 1:
            generated_answer = '\n'.join(generated_lines[1:])  # Skip the first line
    # generated_answers_fb.append(generated_answer)
    # print(f"Generated answer: {generated_answer}")
    # Find the position of the placeholder in the first string
    placeholder_position = question.find("________")

    # Extract the content filled in the placeholder
    filled_content = generated_answer[placeholder_position:placeholder_position + len("autonomous systems (ASes)")]
    filled_answers.append(filled_content)
    n_question += 1
    print(f"Filled content: {filled_content}")
    print(f"---------------------{n_question}-----------------------------")

In [None]:
input_file_path = "/home/hb/LLM-research/evaluation/bgp_test_QA_val.json" 

with open(input_file_path, "r") as input_file:
    questions_data = json.load(input_file)

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=512)

generated_answers_qa = []

n_question = 0
for q_dict in questions_data:
    n_question += 1
    question = q_dict["question"]
    # Generate answer using the language model
    prompt = f"[INST]{question}[/INST]"
    generated_answer = pipe(prompt)[0]['generated_text']
    print(f"Output: {generated_answer}")
    generated_answers_qa.append(generated_answer)
    print(f"---------------------{n_question}-----------------------------")

### LLaMA2-7B:
`Multiple Choice v2`:
    Accuracy: 48.57%
    Correct answers: 17
    Incorrect answers: 18

`Multiple Choice v1`:
    Accuracy: 51.43%
    Correct answers: 18
    Incorrect answers: 17

### LLaMA2-13B:
`Multiple Choice v2`:
    Accuracy: 60.00%
    Correct answers: 21
    Incorrect answers: 14

`Multiple Choice v1`:
    Accuracy: 71.43%
    Correct answers: 25
    Incorrect answers: 10

### BGPtest9- cosine - `Best`:
`Multiple Choice v2`:
    Accuracy: 68.57%
    Correct answers: 24
    Incorrect answers: 11

`Multiple Choice v1`:
    - Accuracy: 82.86%
    - Correct answers: 29
    - Incorrect answers: 6

### BGPtest10- constant:
`Multiple Choice v2`:
    Accuracy: 62.86%
    Correct answers: 22
    Incorrect answers: 13

`Multiple Choice v1`:
    - Accuracy: 71.43%
    - Correct answers: 25
    - Incorrect answers: 10

`Fill the blank v1`:
    - Accuracy: 88.57%
    - Correct answers: 31
    - Incorrect answers: 4

`Fill the blank v2`:
    - Accuracy: 91.42%
    - Correct answers: 33
    - Incorrect answers: 2


### BGP-LLaMA-1-cosine-2k-alpha64:
`Multiple Choice v2`:
    Accuracy: 51.43%
    Correct answers: 18
    Incorrect answers: 17

`Multiple Choice v1`:
    Accuracy: 68.57%
    Correct answers: 24
    Incorrect answers: 11

### BGP-LLaMA-2-cosine-20k-alpha16:
`Multiple Choice v1`:
    Accuracy: 68.57%
    Correct answers: 24
    Incorrect answers: 11

`Multiple Choice v2`:
    Accuracy: 54.29%
    Correct answers: 19
    Incorrect answers: 16


### BGP-LLaMA-2(combined)-cosine-20k-alpha16:
`Multiple Choice v1`:
    Accuracy: 80.00%
    Correct answers: 28
    Incorrect answers: 7

`Multiple Choice v2`:

    Accuracy: 65.71%
    Correct answers: 23
    Incorrect answers: 12

### BGP-LLaMA-13b-2iter-40k-cutoff-max-2048: ??
`Multiple Choice v1`:
    Accuracy: 77.14%
    Correct answers: 27
    Incorrect answers: 8

    
`Multiple Choice v2`:
    Accuracy: 62.86%
    Correct answers: 22
    Incorrect answers: 13

`PyBGPStream`: not pass

### BGP-LLaMA-13b-3-30k-cutoff-max-2048:
`Multiple Choice v1`:
    Accuracy: 57.14%
    Correct answers: 20
    Incorrect answers: 15

`Multiple Choice v2`:
    Accuracy: 54%

`PyBGPStream`: pass

### BGP-LLaMA-13b-50k-cutoff-max-2048: 
`Multiple Choice v1`:
    Accuracy: 60.00%
    Correct answers: 21
    Incorrect answers: 14
`PyBGPStream`: pass

### BGP-LLaMA-13b-20k-cutoff-max-2048: 
`Multiple Choice v1`
    Accuracy: 48.57%
    Correct answers: 17
    Incorrect answers: 18
`PyBGPStream`: pass


### BGP-LLaMA-13b-20k-cutoff-1024-max-none:
`Multiple Choice v1`
    Accuracy: 45.71%
    Correct answers: 16
    Incorrect answers: 19
`PyBGPStream`: pass


### BGP-LLaMA-13b-30k-cutoff-1024-max-none:
`Multiple Choice v1`
Accuracy: 62.86%
Correct answers: 22
Incorrect answers: 13

`PyBGPStream`: pass


### BGP-LLaMA-13b-40k-cutoff-1024-max-none:
`Multiple Choice v1`
    Accuracy: 48.57%
    Correct answers: 17
    Incorrect answers: 18
`PyBGPStream`: not pass

### BGP-LLaMA-13b-50k-cutoff-1024-max-none:
`Multiple Choice v1`
    Accuracy: 40.00%
    Correct answers: 14
    Incorrect answers: 21
`PyBGPStream`: not pass


### BGP-LLaMA26k-13b-30k-cutoff-1024-max-None:
`Multiple Choice v1`: not pass

`PyBGPStream`: pass

### BGP-LLaMA26k-13b-20k-cutoff-1024-max-None:
`Multiple Choice v1`:
Incorrect
Accuracy: 37.14%
Correct answers: 13
Incorrect answers: 22
`PyBGPStream`: pass

### BGP-LLaMA26k-13b-10k-cutoff-1024-max-2048
`Multiple Choice v1`:
Accuracy: 62.86%
Correct answers: 22
Incorrect answers: 13
`PyBGPStream`: pass (better than previous)


### BGP-LLaMA26k-13b-5k-cutoff-1024-max-2048 - `Best Combined`
`Multiple Choice v1`:
Accuracy: 82.86%
Correct answers: 29
Incorrect answers: 6

`Multiple Choice v2`:
Accuracy: 71.43%
Correct answers: 25
Incorrect answers: 10

### BGP-LLaMA-knowledge-20k-cutoff-1024-max-2048
`Multiple Choice v1`:
Accuracy: 62.86%
Correct answers: 22
Incorrect answers: 13

`Multiple Choice v2`:
Accuracy: 48.57%
Correct answers: 17
Incorrect answers: 18

### BGP-LLaMA-knowledge-5k-cutoff-1024-max-2048 `best knowledge`
`Multiple Choice v1`:
Accuracy: 85.71%
Correct answers: 30
Incorrect answers: 5

`Multiple Choice v2`:
Accuracy: 71.43%
Correct answers: 25
Incorrect answers: 10

### BGP-LLaMA-knowledge-3k-cutoff-1024-max-2048
`Multiple Choice v1`:
Accuracy: 82.86%
Correct answers: 29
Incorrect answers: 6

`Multiple Choice v2`:
Accuracy: 74.29%
Correct answers: 26
Incorrect answers: 9

### BGP-LLaMA-knowledge-2k-cutoff-1024-max-2048
`Multiple Choice v1`:
Accuracy: 82.86%
Correct answers: 29
Incorrect answers: 6

`Multiple Choice v2`:
Accuracy: 71.43%
Correct answers: 25
Incorrect answers: 10