In [1]:
# %pip install -qU transformers accelerate einops langchain xformers bitsandbytes
# %pip install scipy

In [2]:
from peft import LoraConfig, get_peft_model

lora_alpha = 16
lora_dropout = 0.1
lora_r = 64

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM"
)

In [1]:
from torch import cuda, bfloat16
import transformers
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
import pandas as pd
import os

model_id = 'meta-llama/Llama-2-70b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)


hf_token = os.environ.get('hf_token')
# Need auth token for these
hf_auth = hf_token
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    # quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)
model.eval()
print(f"Model loaded on {device}")

Loading checkpoint shards:   0%|          | 0/15 [00:00<?, ?it/s]

Model loaded on cuda:0


In [2]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

In [5]:
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "What is the purpose of the BGP community attribute, and how is it used?"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=128)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

<s>[INST] What is the purpose of the BGP community attribute, and how is it used? [/INST]  The BGP community attribute is a feature of the Border Gateway Protocol (BGP) that allows networks to share information about their routing policies and preferences with other networks. The community attribute is a way for a network to communicate its local routing policies to its neighboring networks, and to influence the routing decisions made by those networks.

The community attribute is a optional attribute that can be included in BGP updates, and it contains a list of communities that the originating network belongs to.


In [None]:
import json
import re
from transformers import pipeline

input_file_path = "/home/hb/fine-tuning-alpaca/evaluation/bgp_test_true_false.json" 

# Load questions and answers from JSON file
with open(input_file_path, "r") as input_file:
    questions_data = json.load(input_file)

# Initialize text generation pipeline
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=128)

llama2_answers = []

# Evaluate generated answers
correct_answers = 0
total_questions = len(questions_data)

for q_dict in questions_data:
    question = q_dict["question"]
    answer = q_dict["answer"]

    # Generate answer using the language model
    prompt = f"[INST] {question} [/INST]"
    generated_answer = pipe(prompt)[0]['generated_text']
    # print(generated_answer)
    llama2_answers.append(generated_answer)

# Extract ground truth answers
ground_truth = [q_dict["answer"] for q_dict in questions_data]

print(ground_truth)

for ground_answer, generated_answer in zip(ground_truth, llama2_answers):
    generated_answer = re.search(r'\[/INST\]\s+(.*?)\.', generated_answer).group(1).strip()
    print(f"Generated Answer: {generated_answer}")
    
    if generated_answer.lower() == ground_answer.lower():
        print("Correct")
        correct_answers += 1
    else:
        print("Incorrect")

accuracy = (correct_answers / total_questions) * 100

print(f"Accuracy: {accuracy:.2f}%")
print(f"Correct answers: {correct_answers}")
print(f"Incorrect answers: {total_questions - correct_answers}")


In [None]:
import json

input_file_path = "/home/hb/fine-tuning-alpaca/evaluation/bgp_test_multiple_choice.json" 

with open(input_file_path, "r") as input_file:
    questions_data = json.load(input_file)

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=128)

generated_answers = []
ground_truth = [q_dict["answer"] for q_dict in questions_data]

n_question = 0
for q_dict in questions_data:
    question = q_dict["question"]
    options = q_dict["options"]
    
    # Construct prompt with question and options
    prompt = f"{question}\nChoose the correct answer:\n" + "\n".join(options)
    # print(prompt)
    
    # Generate answer using the language model
    generated_answer = pipe(prompt)[0]['generated_text']
    print(f"Generated Answer: {generated_answer}")
    
        # Parse the generated answer
    answer_lines = generated_answer.splitlines()
    parsed_answer = ""
    for line in answer_lines:
        if line.startswith("Answer: "):
            parsed_answer = re.search(r'\b(\w)\)', line).group(1)
            print(f"Parsed_answer: {parsed_answer}")
            break
    generated_answers.append(parsed_answer)
    n_question += 1
    print(f"----------------{n_question}-----------------")
print(generated_answers)


total_questions = len(questions_data)
correct_answers = 0

for ground_answer, generated_answer in zip(ground_truth, generated_answers):
    if generated_answer.lower() == ground_answer.lower():
        print("Correct")
        correct_answers += 1
    else:
        print("Incorrect")

accuracy = (correct_answers / total_questions) * 100

print(f"Accuracy: {accuracy:.2f}%")
print(f"Correct answers: {correct_answers}")
print(f"Incorrect answers: {total_questions - correct_answers}")

Generated Answer: When analyzing BGP route propagation, which attribute provides information about the path that the BGP update has traversed?
Choose the correct answer:
a) AS-Path
b) MED (Multi-Exit Discriminator)
c) Local Preference
d) Next Hop

Answer: a) AS-Path

Explanation:
The AS-Path attribute in BGP provides information about the path that the BGP update has traversed. It is a list of autonomous system numbers (ASNs) that the update has passed through, with the originating ASN at
Parsed_answer: a
----------------1-----------------
Generated Answer: Which of the following is a primary purpose of BGP?
Choose the correct answer:
a) Establishing communication between devices within a LAN
b) Exchanging routing information between autonomous systems
c) Optimizing local network traffic
d) Assigning IP addresses to devices within a network

Answer: b) Exchanging routing information between autonomous systems

Explanation: BGP (Border Gateway Protocol) is a protocol used for exchanging

In [None]:
import json
import re

input_file_path = "/home/hb/fine-tuning-alpaca/evaluation/bgp_test_fill_the_blank.json" 

with open(input_file_path, "r") as input_file:
    questions_data = json.load(input_file)

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=128)

generated_answers = []
filled_answers = []

n_question = 0
for q_dict in questions_data:
    question = q_dict["question"]
    answer = q_dict["answer"]

    # Generate answer using the language model
    prompt = f"[INST]{question} \n Fill the blank: \n [/INST]"
    generated_answer = pipe(prompt)[0]['generated_text']
    # print(f"Output: {generated_answer}")
    
    generated_answer = re.search(r'\[/INST\]\s+(.*)', generated_answer, re.DOTALL).group(1).strip()
    if "Sure" in generated_answer:
        generated_answer = generated_answer.split('\n', 1)[1]  # Skip the first line
    print(f"Generated Answer: {generated_answer}")
    generated_answers.append(generated_answer)
    
    # Find the position of the placeholder in the first string
    placeholder_position = question.find("________")

    # Extract the content filled in the placeholder
    filled_content = generated_answer[placeholder_position:placeholder_position + len("autonomous systems (ASes)")]
    filled_answers.append(filled_content)
    n_question += 1
    print(f"Filled content: {filled_content}")
    print(f"---------------------{n_question}-----------------------------")


Generated Answer: Autonomous Systems (AS).

BGP is used to exchange routing and reachability information between different Autonomous Systems (AS) on the Internet. An Autonomous System is a group of networks that are managed by a single entity, such as an Internet Service Provider (ISP) or
Filled content: Internet. An Autonomous S
---------------------1-----------------------------
Generated Answer: In BGP, the AS-Path attribute represents the sequence of autonomous systems (ASes) that a route has traversed.
Filled content: sequence of autonomous sy
---------------------2-----------------------------
Generated Answer: BGP routers exchange routing information using UPDATE messages, which contain information about reachable destinations.
Filled content: UPDATE messages, which co
---------------------3-----------------------------
Generated Answer: BGP route aggregation is the process of combining multiple smaller IP prefixes into a single aggregated prefix.
Filled content: aggregation is