In [1]:
import transformers
from transformers import (
    pipeline,
    logging,
)
from typing import List
import torch
from torch import cuda, bfloat16
from datasets import load_dataset
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
# import seaborn as sns
from pylab import rcParams
import os

In [2]:
model_id = 'hyonbokan/BGP-LLaMA-13b-3-30k-cutoff-max-2048'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# bnb_config = transformers.BitsAndBytesConfig(
#     load_in_8bit=True,
# )

# Need auth token for these
hf_auth = os.environ.get('hf_token')

model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    # quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)


model.eval()
print(f"Model loaded on {device}")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Model loaded on cuda:0


In [3]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

tokenizer.pad_token = tokenizer.eos_token
# tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "right"

In [5]:
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "Use PyBGPStream to capture BGP updates for IPv6 prefixes using collector RRC07 from August 10, 2020, 05:00 to August 10, 2020, 07:00. Store and categorize this data for future reference."
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=1024)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

<s>[INST] Use PyBGPStream to capture BGP updates for IPv6 prefixes using collector RRC07 from August 10, 2020, 05:00 to August 10, 2020, 07:00. Store and categorize this data for future reference. [/INST]  Here is an example of how to use PyBGPStream to capture BGP updates for IPv6 prefixes using collector RRC07 from August 10, 2020, 05:00 to August 10, 2020, 07:00 and store and categorize the data for future reference:

import pybgpstream

# Initialize BGPStream
stream = pybgpstream.BGPStream(
    from_time="2020-08-10 05:00:00", until_time="2020-08-10 07:00:00",
    collectors=["rrc07"],
    record_type="updates",
    filter="ipversion 6"
)

# Store and categorize the data
prefix_counts = {}
for rec in stream.records():
    for elem in rec:
        if elem.type == "A":
            prefix = elem.fields['prefix']
            if prefix not in prefix_counts:
                prefix_counts[prefix] = 1
            else:
                prefix_counts[prefix] += 1

# Print the results
for pre

In [None]:
import json
import re

input_file_path = "/home/hb/LLM-research/evaluation/bgp_test_multiple_choice.json" 

with open(input_file_path, "r") as input_file:
    questions_data = json.load(input_file)

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=128)

generated_answers = []
ground_truth = [q_dict["answer"] for q_dict in questions_data]
n_question = 0

for q_dict in questions_data:
    question = q_dict["question"]
    options = q_dict["options"]
    
    # Construct prompt with question and options
    prompt = f"Choose the correct answer: \n{question}\n" + "\n".join(options)
    # print(prompt)
    
    # Generate answer using the language model
    generated_answer = pipe(prompt)[0]['generated_text']
    print(f"Generated Answer: {generated_answer}")
    
    # Parse the generated answer
    answer_lines = generated_answer.splitlines()
    parsed_answer = ""
    for line in answer_lines:
            if line.startswith("Answer: ") or line.startswith("Correct answer: "):
                parsed_answer = re.search(r'\b(\w)\)', line).group(1)
                print(f"Parsed_answer: {parsed_answer}")
                break
    generated_answers.append(parsed_answer)
    n_question += 1
    print(f"----------------{n_question}-----------------")
print(generated_answers)


total_questions = len(questions_data)
correct_answers = 0

for ground_answer, generated_answer in zip(ground_truth, generated_answers):
    if generated_answer.lower() == ground_answer.lower():
        print("Correct")
        correct_answers += 1
    else:
        print(f"GPT answer: {generated_answer}. Correct answer: {ground_answer}")
        print("Incorrect")

accuracy = (correct_answers / total_questions) * 100

print(f"Accuracy: {accuracy:.2f}%")
print(f"Correct answers: {correct_answers}")
print(f"Incorrect answers: {total_questions - correct_answers}")

In [None]:
import json
import re

input_file_path = "/home/hb/LLM-research/evaluation/bgp_test_fill_the_blank.json" 

with open(input_file_path, "r") as input_file:
    questions_data = json.load(input_file)

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=128)

generated_answers_fb = []
filled_answers = []

n_question = 0
for q_dict in questions_data:
    question = q_dict["question"]
    answer = q_dict["answer"]

    # Generate answer using the language model
    prompt = f"[INST]{question} \n Fill the blank: \n [/INST]"
    generated_answer = pipe(prompt)[0]['generated_text']
    print(f"Output: {generated_answer}")
    generated_answers_fb.append(generated_answer)
    
    generated_answer = re.search(r'\[/INST\]\s+(.*)', generated_answer).group(1).strip()
    if "Sure" in generated_answer:
        generated_lines = generated_answer.split('\n')
        if len(generated_lines) > 1:
            generated_answer = '\n'.join(generated_lines[1:])  # Skip the first line
    # generated_answers_fb.append(generated_answer)
    # print(f"Generated answer: {generated_answer}")
    # Find the position of the placeholder in the first string
    placeholder_position = question.find("________")

    # Extract the content filled in the placeholder
    filled_content = generated_answer[placeholder_position:placeholder_position + len("autonomous systems (ASes)")]
    filled_answers.append(filled_content)
    n_question += 1
    print(f"Filled content: {filled_content}")
    print(f"---------------------{n_question}-----------------------------")

In [29]:
input_file_path = "/home/hb/LLM-research/evaluation/bgp_test_QA_val.json" 

with open(input_file_path, "r") as input_file:
    questions_data = json.load(input_file)

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=512)

generated_answers_qa = []

n_question = 0
for q_dict in questions_data:
    n_question += 1
    question = q_dict["question"]
    # Generate answer using the language model
    prompt = f"[INST]{question}[/INST]"
    generated_answer = pipe(prompt)[0]['generated_text']
    print(f"Output: {generated_answer}")
    generated_answers_qa.append(generated_answer)
    print(f"---------------------{n_question}-----------------------------")

Output: [INST]What is the purpose of BGP Update messages, and how do they carry route information?[/INST]  BGP Update messages are used to exchange routing information between BGP speakers. They contain information about the routes that are available, as well as the attributes associated with each route. This information is used to determine the best path to a destination, and to update the routing tables of the BGP speakers. The Update message contains the following information:

1. Network Layer Reachability Information (NLRI): This contains the IP prefix and the associated subnet mask.
2. Path Attributes: This contains information about the route, such as the AS path, the origin, the next hop, and the MED.
3. Withdrawn Routes: This contains information about routes that are no longer valid.
4. Path Identifiers: This contains the BGP identifier of the sender.
5. Optional Parameters: This contains additional information that can be used to modify the route selection process.

The Upda

### LLaMA2-7B:
`Multiple Choice v2`:
    Accuracy: 48.57%
    Correct answers: 17
    Incorrect answers: 18

`Multiple Choice v1`:
    Accuracy: 51.43%
    Correct answers: 18
    Incorrect answers: 17

### LLaMA2-13B:
`Multiple Choice v2`:
    Accuracy: 60.00%
    Correct answers: 21
    Incorrect answers: 14

`Multiple Choice v1`:
    Accuracy: 71.43%
    Correct answers: 25
    Incorrect answers: 10

### BGPtest1:
`Multiple Choice v2`:
    Accuracy: 65.71%
    Correct answers: 23
    Incorrect answers: 12
    
`Multiple Choice v1`:
    Accuracy: 77.14%
    Correct answers: 27
    Incorrect answers: 8

### BGPtest8- cosine:

`Multiple Choice v2`:
    Accuracy: 62.86%
    Correct answers: 22
    Incorrect answers: 13
    
`Multiple Choice v1`:
    Accuracy: 71.43%
    Correct answers: 25
    Incorrect answers: 10

### BGPtest9- cosine - `Best`:
`Multiple Choice v2`:
    Accuracy: 68.57%
    Correct answers: 24
    Incorrect answers: 11

`Multiple Choice v1`:
    - Accuracy: 82.86%
    - Correct answers: 29
    - Incorrect answers: 6

### BGPtest10- constant:
`Multiple Choice v2`:
    Accuracy: 62.86%
    Correct answers: 22
    Incorrect answers: 13

`Multiple Choice v1`:
    - Accuracy: 71.43%
    - Correct answers: 25
    - Incorrect answers: 10

`Fill the blank v1`:
    - Accuracy: 88.57%
    - Correct answers: 31
    - Incorrect answers: 4

`Fill the blank v2`:
    - Accuracy: 91.42%
    - Correct answers: 33
    - Incorrect answers: 2


### BGP-LLaMA-1-cosine-2k-alpha64:
`Multiple Choice v2`:
    Accuracy: 51.43%
    Correct answers: 18
    Incorrect answers: 17

`Multiple Choice v1`:
    Accuracy: 68.57%
    Correct answers: 24
    Incorrect answers: 11

### BGP-LLaMA-2-cosine-20k-alpha16:
`Multiple Choice v1`:
    Accuracy: 68.57%
    Correct answers: 24
    Incorrect answers: 11

`Multiple Choice v2`:
    Accuracy: 54.29%
    Correct answers: 19
    Incorrect answers: 16


### BGP-LLaMA-2(combined)-cosine-20k-alpha16:
`Multiple Choice v1`:
    Accuracy: 80.00%
    Correct answers: 28
    Incorrect answers: 7

`Multiple Choice v2`:
    Accuracy: 71.43%
    Correct answers: 25
    Incorrect answers: 10

    Accuracy: 65.71%
    Correct answers: 23
    Incorrect answers: 12

# Eval - old

BGP-llama2-13b - test 1:
* True/False: 
    - Accuracy: 93.33%
    - Correct answers: 28
    - Incorrect answers: 2
* Multiple choice:
    - Accuracy: 82.86%
    - Correct answers: 29
    - Incorrect answers: 6
* Fill the blank: 74.29%(26/35)

BGP-llama2-13b - test 3:

* True/False: 
    - Accuracy: 93%
    - Correct answers: 28
    - Incorrect answers: 2
* Multiple choice:
    - Accuracy: 82.86%
    - Correct answers: 29
    - Incorrect answers: 6


BGP-llama2-13b - test 4:

* True/False: 
    - Accuracy: 93%
    - Correct answers: 28
    - Incorrect answers: 2
* Multiple choice:
    - Accuracy: 82.86%
    - Correct answers: 29
    - Incorrect answers: 6
* Fill the blank: 80%(28/35)

BGP-llama2-13b - test 7 - 20k - constant (gradient 4):
* True/False: 
    - Accuracy: 86.68%
* Multiple choice:
    - Accuracy: 80%

BGP-llama2-13b - test 7 - 20k - constant (gradient 1):
* Multiple choice:
    - Accuracy: %

BGP-llama2-13b - test 8 - 20k -consine:
* Multiple choice:
    Accuracy: 74.29%
    Correct answers: 26
    Incorrect answers: 9

BGP-llama2-13b - test 8 - 20k -consine- alpha 16:
* Multiple choice:
    - Accuracy: 82.86%
    - Correct answers: 29
    - Incorrect answers: 6