In [1]:
import transformers
from transformers import (
    pipeline,
    logging,
)
from typing import List
import torch
from torch import cuda, bfloat16
from datasets import load_dataset
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
# import seaborn as sns
from pylab import rcParams
import os

In [2]:
model_id = 'hyonbokan/BGP-LLaMA26k-13b-10k-cutoff-1024-max-2048'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# bnb_config = transformers.BitsAndBytesConfig(
#     load_in_8bit=True,
# )

# Need auth token for these
hf_auth = os.environ.get('hf_token')

model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    # quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)


model.eval()
print(f"Model loaded on {device}")

Downloading (…)lve/main/config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

Downloading (…)model.bin.index.json:   0%|          | 0.00/33.4k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00003.bin:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00003.bin:   0%|          | 0.00/9.90G [00:00<?, ?B/s]

Downloading (…)l-00003-of-00003.bin:   0%|          | 0.00/6.18G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Model loaded on cuda:0


In [3]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

tokenizer.pad_token = tokenizer.eos_token
# tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "right"

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.57k [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/437 [00:00<?, ?B/s]

In [4]:
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "Write a Python script using the PyBGPStream library to detect potential prefix hijacks for a given target prefix. The target prefix should be '10.10.0.0/16'."
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=718)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

<s>[INST] Write a Python script using the PyBGPStream library to detect potential prefix hijacks for a given target prefix. The target prefix should be '10.10.0.0/16'. [/INST]  Sure! Here is an example Python script using the PyBGPStream library to detect potential prefix hijacks for the target prefix '10.10.0.0/16':
```
import pybgpstream

# Initialize the BGPStream
stream = pybgpstream.BGPStream(
    from_time="2021-01-01 00:00:00", until_time="2021-01-01 01:00:00",
    project="ris",
    collectors="rrc00",
    record_type="updates",
    filter="prefix exact 10.10.0.0/16"
)

# Loop through the stream records
for rec in stream.records():
    for elem in rec:
        # Check if the prefix is announced by a different AS
        if elem.fields['as-path'].split()[0]!= elem.fields['as-path'].split()[-1]:
            print(f"Possible prefix hijack detected for {elem.fields['prefix']}")
```
This script will detect any announcements of the target prefix '10.10.0.0/16' by a different AS than 

In [5]:
import json
import re

input_file_path = "/home/hb/LLM-research/evaluation/BGP/bgp_test_multiple_choice.json" 

with open(input_file_path, "r") as input_file:
    questions_data = json.load(input_file)

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=128)

generated_answers = []
ground_truth = [q_dict["answer"] for q_dict in questions_data]
n_question = 0

for q_dict in questions_data:
    question = q_dict["question"]
    options = q_dict["options"]
    
    # Construct prompt with question and options
    prompt = f"Choose the correct answer: {question}\n" + "\n".join(options)
    # print(prompt)
    
    # Generate answer using the language model
    generated_answer = pipe(prompt)[0]['generated_text']
    print(f"Generated Answer: {generated_answer}")
    
    # Parse the generated answer
    answer_lines = generated_answer.splitlines()
    parsed_answer = ""
    for line in answer_lines:
            if line.startswith("Answer: ") or line.startswith("Correct answer: "):
                parsed_answer = re.search(r'\b(\w)\)', line).group(1)
                print(f"Parsed_answer: {parsed_answer}")
                break
    generated_answers.append(parsed_answer)
    n_question += 1
    print(f"----------------{n_question}-----------------")
print(generated_answers)


total_questions = len(questions_data)
correct_answers = 0

for ground_answer, generated_answer in zip(ground_truth, generated_answers):
    if generated_answer.lower() == ground_answer.lower():
        print("Correct")
        correct_answers += 1
    else:
        print(f"GPT answer: {generated_answer}. Correct answer: {ground_answer}")
        print("Incorrect")

accuracy = (correct_answers / total_questions) * 100

print(f"Accuracy: {accuracy:.2f}%")
print(f"Correct answers: {correct_answers}")
print(f"Incorrect answers: {total_questions - correct_answers}")

Generated Answer: Choose the correct answer: When analyzing BGP route propagation, which attribute provides information about the path that the BGP update has traversed?
a) AS-Path
b) MED (Multi-Exit Discriminator)
c) Local Preference
d) Next Hop

Answer: a) AS-Path.

Explanation: The AS-Path attribute is used to track the path that a BGP update has traversed. It is a list of autonomous system numbers that the update has passed through. This attribute is used to determine the best path to a destination, as the
Parsed_answer: a
----------------1-----------------
Generated Answer: Choose the correct answer: Which of the following is a primary purpose of BGP?
a) Establishing communication between devices within a LAN
b) Exchanging routing information between autonomous systems
c) Optimizing local network traffic
d) Assigning IP addresses to devices

Answer: b) Exchanging routing information between autonomous systems.

Explanation: BGP is a protocol used to exchange routing information be



Generated Answer: Choose the correct answer: Which BGP attribute is used to group routes into categories?
a) LOCAL-PREF
b) MED
c) AS-PATH
d) COMMUNITY

Answer: d) COMMUNITY.

Explanation: The COMMUNITY attribute is used to group routes into categories. It is used to identify routes that are part of the same community, allowing for more efficient route selection and traffic engineering. The LOCAL-PREF attribute is used to influence the route selection process, while the MED attribute is used to influence the route
Parsed_answer: d
----------------11-----------------
Generated Answer: Choose the correct answer: BGP confederations are used to:
a) Optimize local network traffic
b) Divide an AS into sub-ASes for scalability
c) Group routes into categories
d) Handle BGP session establishment
e) Reduce the number of BGP sessions
f) Improve route aggregation
g) Enhance route stability
h) Increase the number of BGP sessions
i) Improve route convergence
j) Enhance route selection
k) Improve rout

In [None]:
import json
import re

input_file_path = "/home/hb/LLM-research/evaluation/bgp_test_fill_the_blank.json" 

with open(input_file_path, "r") as input_file:
    questions_data = json.load(input_file)

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=128)

generated_answers_fb = []
filled_answers = []

n_question = 0
for q_dict in questions_data:
    question = q_dict["question"]
    answer = q_dict["answer"]

    # Generate answer using the language model
    prompt = f"[INST]{question} \n Fill the blank: \n [/INST]"
    generated_answer = pipe(prompt)[0]['generated_text']
    print(f"Output: {generated_answer}")
    generated_answers_fb.append(generated_answer)
    
    generated_answer = re.search(r'\[/INST\]\s+(.*)', generated_answer).group(1).strip()
    if "Sure" in generated_answer:
        generated_lines = generated_answer.split('\n')
        if len(generated_lines) > 1:
            generated_answer = '\n'.join(generated_lines[1:])  # Skip the first line
    # generated_answers_fb.append(generated_answer)
    # print(f"Generated answer: {generated_answer}")
    # Find the position of the placeholder in the first string
    placeholder_position = question.find("________")

    # Extract the content filled in the placeholder
    filled_content = generated_answer[placeholder_position:placeholder_position + len("autonomous systems (ASes)")]
    filled_answers.append(filled_content)
    n_question += 1
    print(f"Filled content: {filled_content}")
    print(f"---------------------{n_question}-----------------------------")

In [None]:
input_file_path = "/home/hb/LLM-research/evaluation/bgp_test_QA_val.json" 

with open(input_file_path, "r") as input_file:
    questions_data = json.load(input_file)

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=512)

generated_answers_qa = []

n_question = 0
for q_dict in questions_data:
    n_question += 1
    question = q_dict["question"]
    # Generate answer using the language model
    prompt = f"[INST]{question}[/INST]"
    generated_answer = pipe(prompt)[0]['generated_text']
    print(f"Output: {generated_answer}")
    generated_answers_qa.append(generated_answer)
    print(f"---------------------{n_question}-----------------------------")

### LLaMA2-7B:
`Multiple Choice v2`:
    Accuracy: 48.57%
    Correct answers: 17
    Incorrect answers: 18

`Multiple Choice v1`:
    Accuracy: 51.43%
    Correct answers: 18
    Incorrect answers: 17

### LLaMA2-13B:
`Multiple Choice v2`:
    Accuracy: 60.00%
    Correct answers: 21
    Incorrect answers: 14

`Multiple Choice v1`:
    Accuracy: 71.43%
    Correct answers: 25
    Incorrect answers: 10

### BGPtest9- cosine - `Best`:
`Multiple Choice v2`:
    Accuracy: 68.57%
    Correct answers: 24
    Incorrect answers: 11

`Multiple Choice v1`:
    - Accuracy: 82.86%
    - Correct answers: 29
    - Incorrect answers: 6

### BGPtest10- constant:
`Multiple Choice v2`:
    Accuracy: 62.86%
    Correct answers: 22
    Incorrect answers: 13

`Multiple Choice v1`:
    - Accuracy: 71.43%
    - Correct answers: 25
    - Incorrect answers: 10

`Fill the blank v1`:
    - Accuracy: 88.57%
    - Correct answers: 31
    - Incorrect answers: 4

`Fill the blank v2`:
    - Accuracy: 91.42%
    - Correct answers: 33
    - Incorrect answers: 2


### BGP-LLaMA-1-cosine-2k-alpha64:
`Multiple Choice v2`:
    Accuracy: 51.43%
    Correct answers: 18
    Incorrect answers: 17

`Multiple Choice v1`:
    Accuracy: 68.57%
    Correct answers: 24
    Incorrect answers: 11

### BGP-LLaMA-2-cosine-20k-alpha16:
`Multiple Choice v1`:
    Accuracy: 68.57%
    Correct answers: 24
    Incorrect answers: 11

`Multiple Choice v2`:
    Accuracy: 54.29%
    Correct answers: 19
    Incorrect answers: 16


### BGP-LLaMA-2(combined)-cosine-20k-alpha16:
`Multiple Choice v1`:
    Accuracy: 80.00%
    Correct answers: 28
    Incorrect answers: 7

`Multiple Choice v2`:

    Accuracy: 65.71%
    Correct answers: 23
    Incorrect answers: 12

### BGP-LLaMA-13b-2iter-40k-cutoff-max-2048: ??
`Multiple Choice v1`:
    Accuracy: 77.14%
    Correct answers: 27
    Incorrect answers: 8

    
`Multiple Choice v2`:
    Accuracy: 62.86%
    Correct answers: 22
    Incorrect answers: 13

`PyBGPStream`: not pass

### BGP-LLaMA-13b-3-30k-cutoff-max-2048:
`Multiple Choice v1`:
    Accuracy: 57.14%
    Correct answers: 20
    Incorrect answers: 15

`Multiple Choice v2`:
    Accuracy: 54%

`PyBGPStream`: pass

### BGP-LLaMA-13b-50k-cutoff-max-2048: 
`Multiple Choice v1`:
    Accuracy: 60.00%
    Correct answers: 21
    Incorrect answers: 14
`PyBGPStream`: pass

### BGP-LLaMA-13b-20k-cutoff-max-2048: 
`Multiple Choice v1`
    Accuracy: 48.57%
    Correct answers: 17
    Incorrect answers: 18
`PyBGPStream`: pass


### BGP-LLaMA-13b-20k-cutoff-1024-max-none:
`Multiple Choice v1`
    Accuracy: 45.71%
    Correct answers: 16
    Incorrect answers: 19
`PyBGPStream`: pass


### BGP-LLaMA-13b-30k-cutoff-1024-max-none:
`Multiple Choice v1`
Accuracy: 62.86%
Correct answers: 22
Incorrect answers: 13

`PyBGPStream`: pass


### BGP-LLaMA-13b-40k-cutoff-1024-max-none:
`Multiple Choice v1`
    Accuracy: 48.57%
    Correct answers: 17
    Incorrect answers: 18
`PyBGPStream`: not pass

### BGP-LLaMA-13b-50k-cutoff-1024-max-none:
`Multiple Choice v1`
    Accuracy: 40.00%
    Correct answers: 14
    Incorrect answers: 21
`PyBGPStream`: not pass


### BGP-LLaMA26k-13b-30k-cutoff-1024-max-None:
`Multiple Choice v1`: not pass

`PyBGPStream`: pass

### BGP-LLaMA26k-13b-20k-cutoff-1024-max-None:
`Multiple Choice v1`:
Incorrect
Accuracy: 37.14%
Correct answers: 13
Incorrect answers: 22
`PyBGPStream`: pass

### BGP-LLaMA26k-13b-10k-cutoff-1024-max-2048
`Multiple Choice v1`:
Accuracy: 62.86%
Correct answers: 22
Incorrect answers: 13
`PyBGPStream`: pass (better than previous)