In [1]:
from torch import cuda, bfloat16
import transformers
from transformers import (
    pipeline,
    logging,
)
import pandas as pd
import os

model_id = 'meta-llama/Llama-2-13b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# bnb_config = transformers.BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_quant_type='nf4',
#     bnb_4bit_use_double_quant=True,
#     bnb_4bit_compute_dtype=bfloat16
# )

hf_token = os.environ.get('hf_token')

# Need auth token for these
hf_auth = hf_token
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    # quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)
model.eval()
print(f"Model loaded on {device}")



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Model loaded on cuda:0


In [2]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

tokenizer.pad_token = tokenizer.eos_token
# tokenizer.pad_token_id = tokenizer.eos_token_id
# tokenizer.padding_side = "right"

# Lift test

In [3]:
import pandas as pd
from datetime import datetime, timedelta

# Generate timestamps
start_time = datetime(2017, 8, 25, 3, 15, 0)
timestamps = [start_time + timedelta(minutes=5 * i) for i in range(5)]

# Define normal data and one anomaly
data = [
    {
        "timestamp": "2017-08-25T03:25:00.000",
        "nb_A": 3080,
        "nb_W": 309,
        "nb_implicit_W": 2199,
        "nb_dup_A": 320,
        "nb_dup_W": 0,
        "nb_A_prefix": 686,
        "nb_W_prefix": 181,
        "max_A_prefix": 55,
        "avg_A_prefix": 4,
        "max_A_AS": 314,
        "avg_A_AS": 16,
        "nb_orign_change": 210,
        "nb_new_A": 391,
        "nb_new_A_afterW": 170,
        "max_path_len": 15,
        "avg_path_len": 6,
        "max_editdist": 11,
        "avg_editdist": 3,
        "editdist_7": 250,
        "editdist_8": 25,
        "editdist_9": 44,
        "editdist_10": 1,
        "editdist_11": 1,
        "editdist_12": 0,
        "editdist_13": 0,
        "editdist_14": 0,
        "editdist_15": 0,
        "editdist_16": 0,
        "editdist_17": 0,
        "nb_tolonger": 889,
        "nb_toshorter": 1310,
        "avg_interarrival": 1116,
    },
    {
        "timestamp": "2017-08-25T03:35:00.000",
        "nb_A": 86458,
        "nb_W": 356,
        "nb_implicit_W": 3790,
        "nb_dup_A": 7500,
        "nb_dup_W": 0,
        "nb_A_prefix": 75578,
        "nb_W_prefix": 203,
        "max_A_prefix": 46,
        "avg_A_prefix": 1,
        "max_A_AS": 19182,
        "avg_A_AS": 38,
        "nb_orign_change": 182,
        "nb_new_A": 75028,
        "nb_new_A_afterW": 140,
        "max_path_len": 21,
        "avg_path_len": 4,
        "max_editdist": 14,
        "avg_editdist": 3,
        "editdist_7": 243,
        "editdist_8": 58,
        "editdist_9": 75,
        "editdist_10": 69,
        "editdist_11": 9,
        "editdist_12": 13,
        "editdist_13": 0,
        "editdist_14": 1,
        "editdist_15": 0,
        "editdist_16": 0,
        "editdist_17": 0,
        "nb_tolonger": 1180,
        "nb_toshorter": 2610,
        "avg_interarrival": 1087,
    },
    {
        "timestamp": "2017-08-25T04:00:00.000",
        "nb_A": 4311,
        "nb_W": 401,
        "nb_implicit_W": 3069,
        "nb_dup_A": 625,
        "nb_dup_W": 0,
        "nb_A_prefix": 1030,
        "nb_W_prefix": 165,
        "max_A_prefix": 54,
        "avg_A_prefix": 4,
        "max_A_AS": 305,
        "avg_A_AS": 20,
        "nb_orign_change": 206,
        "nb_new_A": 367,
        "nb_new_A_afterW": 250,
        "max_path_len": 19,
        "avg_path_len": 6,
        "max_editdist": 17,
        "avg_editdist": 3,
        "editdist_7": 202,
        "editdist_8": 23,
        "editdist_9": 36,
        "editdist_10": 2,
        "editdist_11": 5,
        "editdist_12": 7,
        "editdist_13": 1,
        "editdist_14": 1,
        "editdist_15": 0,
        "editdist_16": 0,
        "editdist_17": 56,
        "nb_tolonger": 1318,
        "nb_toshorter": 1751,
        "avg_interarrival": 1112,
    },
        {
        "timestamp": "2017-08-25T03:20:00.000",
        "nb_A": 2462,
        "nb_W": 148,
        "nb_implicit_W": 1721,
        "nb_dup_A": 347,
        "nb_dup_W": 0,
        "nb_A_prefix": 434,
        "nb_W_prefix": 66,
        "max_A_prefix": 51,
        "avg_A_prefix": 6,
        "max_A_AS": 188,
        "avg_A_AS": 16,
        "nb_orign_change": 172,
        "nb_new_A": 285,
        "nb_new_A_afterW": 109,
        "max_path_len": 13,
        "avg_path_len": 6,
        "max_editdist": 11,
        "avg_editdist": 3,
        "editdist_7": 165,
        "editdist_8": 36,
        "editdist_9": 37,
        "editdist_10": 0,
        "editdist_11": 1,
        "editdist_12": 0,
        "editdist_13": 0,
        "editdist_14": 0,
        "editdist_15": 0,
        "editdist_16": 0,
        "editdist_17": 0,
        "nb_tolonger": 738,
        "nb_toshorter": 983,
        "avg_interarrival": 1133,
    },
    {
        "timestamp": "2017-08-25T03:25:00.000",
        "nb_A": 3080,
        "nb_W": 309,
        "nb_implicit_W": 2199,
        "nb_dup_A": 320,
        "nb_dup_W": 0,
        "nb_A_prefix": 686,
        "nb_W_prefix": 181,
        "max_A_prefix": 55,
        "avg_A_prefix": 4,
        "max_A_AS": 314,
        "avg_A_AS": 16,
        "nb_orign_change": 210,
        "nb_new_A": 391,
        "nb_new_A_afterW": 170,
        "max_path_len": 15,
        "avg_path_len": 6,
        "max_editdist": 11,
        "avg_editdist": 3,
        "editdist_7": 250,
        "editdist_8": 25,
        "editdist_9": 44,
        "editdist_10": 1,
        "editdist_11": 1,
        "editdist_12": 0,
        "editdist_13": 0,
        "editdist_14": 0,
        "editdist_15": 0,
        "editdist_16": 0,
        "editdist_17": 0,
        "nb_tolonger": 889,
        "nb_toshorter": 1310,
        "avg_interarrival": 1116,
    },
]

anomaly_status_list = [
    "",
    "anomaly due to high value of nb_A, nb_A_prefix, avg_A_prefix, max_A_AS, avg_A_AS, nb_new_A, editdist_9",
    "",
    "",
    ""
]

In [4]:
# Initialize the text generation pipeline
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=1024)

# Loop through each instance in the dataset
for i, instance in enumerate(data):
    prompt = f"""
   Below is BGP update message data. Your task is to determine the BGP anomaly status based on the data provided. If an anomaly is detected, specify the reason. If there are no amomalies then state that no anomalies detected. Keep your answers short.
    {instance}"""
    
    result = pipe(f"<s>[INST] {prompt} [/INST]")
    print(f"Result for instance {i+1}:")
    print(result[0]['generated_text'])
    print("\n")

Result for instance 1:
<s>[INST] 
   Below is BGP update message data. Your task is to determine the BGP anomaly status based on the data provided. If an anomaly is detected, specify the reason. If there are no amomalies then state that no anomalies detected. Keep your answers short.
    {'timestamp': '2017-08-25T03:25:00.000', 'nb_A': 3080, 'nb_W': 309, 'nb_implicit_W': 2199, 'nb_dup_A': 320, 'nb_dup_W': 0, 'nb_A_prefix': 686, 'nb_W_prefix': 181, 'max_A_prefix': 55, 'avg_A_prefix': 4, 'max_A_AS': 314, 'avg_A_AS': 16, 'nb_orign_change': 210, 'nb_new_A': 391, 'nb_new_A_afterW': 170, 'max_path_len': 15, 'avg_path_len': 6, 'max_editdist': 11, 'avg_editdist': 3, 'editdist_7': 250, 'editdist_8': 25, 'editdist_9': 44, 'editdist_10': 1, 'editdist_11': 1, 'editdist_12': 0, 'editdist_13': 0, 'editdist_14': 0, 'editdist_15': 0, 'editdist_16': 0, 'editdist_17': 0, 'nb_tolonger': 889, 'nb_toshorter': 1310, 'avg_interarrival': 1116} [/INST]  No anomalies detected.


Result for instance 2:
<s>[INST]

## Lift results

# Pcap

In [3]:
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "Given a n3.pcap file containing network traffic data, create a Python script using scapy library to process the PCAP. The script should extract the source IP, destination IP, source port, destination port, and the raw payload data for each packet in the PCAP. Store this data in a pandas DataFrame and display it."
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=1012)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

<s>[INST] Given a n3.pcap file containing network traffic data, create a Python script using scapy library to process the PCAP. The script should extract the source IP, destination IP, source port, destination port, and the raw payload data for each packet in the PCAP. Store this data in a pandas DataFrame and display it. [/INST]  Sure! Here is a Python script using the Scapy library to process a PCAP file and extract the source IP, destination IP, source port, destination port, and raw payload data for each packet. The data is then stored in a pandas DataFrame and displayed.
```
import scapy
import pandas as pd

# Open the PCAP file
pcap = scapy.open("n3.pcap")

# Create a list to store the packet data
packet_data = []

# Iterate over each packet in the PCAP file
for packet in pcap:
    # Extract the source IP, destination IP, source port, and destination port
    src_ip = packet.src_ip
    dst_ip = packet.dst_ip
    src_port = packet.sport
    dst_port = packet.dport
    
    # Extra

In [4]:
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "Create a Python script using PyBGPStream to monitor real-time (project='ris-live') BGP updates, focusing on announcements from AS6830. The script utilizes a dictionary to track the timestamp and count of announcements for each prefix associated with AS6830. Implement a function that checks daily if the data needs to be reset by comparing the current date with the date of the last reset, and if they differ, it clears the dictionary, ensuring data freshness each new day. The script should also alert if any prefix's count exceeds 10,000 to flag significant anomalies in real time. For extracting AS paths from BGP updates, use the code as_path = elem.fields.get('as-path', '').split() to accurately parse the path and identify announcements directly associated with AS6830."
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=718)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

<s>[INST] Create a Python script using PyBGPStream to monitor real-time (project='ris-live') BGP updates, focusing on announcements from AS6830. The script utilizes a dictionary to track the timestamp and count of announcements for each prefix associated with AS6830. Implement a function that checks daily if the data needs to be reset by comparing the current date with the date of the last reset, and if they differ, it clears the dictionary, ensuring data freshness each new day. The script should also alert if any prefix's count exceeds 10,000 to flag significant anomalies in real time. For extracting AS paths from BGP updates, use the code as_path = elem.fields.get('as-path', '').split() to accurately parse the path and identify announcements directly associated with AS6830. [/INST]  Sure, here is an example Python script using PyBGPStream to monitor real-time BGP updates for announcements from AS6830 and track the count of prefixes associated with this autonomous system:
```
import p

# 5G General Knowledge

In [3]:
import json
import re

input_file_path = "/home/hb/LLM-research/evaluation/5G/5G_analysis_multi_choice_v2.json" 

with open(input_file_path, "r") as input_file:
    questions_data = json.load(input_file)

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=128)

generated_answers = []
ground_truth = [q_dict["answer"] for q_dict in questions_data]
n_question = 0

for q_dict in questions_data:
    question = q_dict["question"]
    options = q_dict["options"]
    
    # Construct prompt with question and options
    prompt = f"Choose the correct answer. {question}\n" + "\n".join(options)
    # print(prompt)
    
    # Generate answer using the language model
    generated_answer = pipe(prompt)[0]['generated_text']
    print(f"Generated Answer: {generated_answer}")
    
    # Parse the generated answer
    answer_lines = generated_answer.splitlines()
    parsed_answer = ""
    for line in answer_lines:
            if line.startswith("Answer: ") or line.startswith("Correct answer: "):
                match = re.search(r'\b(\w)[\)-]', line)
                if match is not None:
                    parsed_answer = match.group(1)
                    print(f"Parsed_answer: {parsed_answer}")
                    break
                else:
                    parsed_answer = "Couldn't parse the answer"
                    print(f"Parsed_answer: {parsed_answer}")
    generated_answers.append(parsed_answer)
    n_question += 1
    print(f"----------------{n_question}-----------------")
print(generated_answers)


total_questions = len(questions_data)
correct_answers = 0

for ground_answer, generated_answer in zip(ground_truth, generated_answers):
    if generated_answer.lower() == ground_answer.lower():
        print("Correct")
        correct_answers += 1
    else:
        print(f"GPT answer: {generated_answer}. Correct answer: {ground_answer}")
        print("Incorrect")

accuracy = (correct_answers / total_questions) * 100

print(f"Accuracy: {accuracy:.2f}%")
print(f"Correct answers: {correct_answers}")
print(f"Incorrect answers: {total_questions - correct_answers}")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. What happens to packets larger than the Maximum SDU size in the network?
a) Always dropped
b) Forwarded with increased QoS
c) Implementation specific
d) Always forwarded with no changes

Answer: c) Implementation specific

Explanation: The behavior of packets larger than the Maximum SDU size in the network depends on the specific implementation of the network devices and protocols. Some devices may drop such packets, while others may fragment them or drop them with an error message. Therefore, the correct answer is (c) Implementation specific
Parsed_answer: c
----------------1-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. The Maximum Transfer Unit (MTU) of the IP layer and the Maximum SDU Size:
a) Are directly related
b) Have no relationship
c) Are the same
d) Depend on the GGSN

Answer: b) Have no relationship

Explanation: The Maximum Transfer Unit (MTU) of the IP layer and the Maximum SDU Size (Maximum Service Data Unit) are not directly related. The MTU is a parameter that determines the maximum size of a single IP packet, while the Maximum SDU Size
Parsed_answer: b
----------------2-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. Why does RAN need SDU size information?
a) For IP fragmentation
b) For efficient use of the radio spectrum
c) To operate in transparent RLC protocol mode
d) For GGSN optimization

Answer: b) For efficient use of the radio spectrum

Explanation: RAN (Radio Access Network) needs SDU (Service Data Unit) size information to efficiently use the radio spectrum. This information is used to determine the maximum size of the SDU that can be transmitted over the air interface, taking into account the available bandwidth and the overhead
Parsed_answer: b
----------------3-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. What does SDU error ratio indicate?
a) Fraction of SDUs dropped
b) Fraction of SDUs forwarded with decreased QoS
c) Fraction of SDUs lost or detected as erroneous
d) Efficiency of the RLC protocol

Answer: c) Fraction of SDUs lost or detected as erroneous

Explanation: The SDU error ratio is a metric that measures the percentage of SDUs that are lost or detected as erroneous. It is calculated by dividing the number of SDUs lost or detected
Parsed_answer: c
----------------4-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. Which attribute specifies the relative importance of the UMTS bearer compared to other bearers?
a) Maximum bitrate
b) Traffic handling priority
c) Allocation/Retention Priority
d) Guaranteed bitrate

Answer: c) Allocation/Retention Priority

Explanation: The Allocation/Retention Priority attribute specifies the relative importance of the UMTS bearer compared to other bearers. It determines the order in which bearers are allocated or released when there are competing demands for resources. A
Parsed_answer: c
----------------5-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. What happens to packets larger than the Maximum SDU size in the network?
a) Always dropped
b) Forwarded with increased QoS
c) Implementation specific
d) Always forwarded with no changes

Answer: c) Implementation specific

Explanation: The behavior of packets larger than the Maximum SDU size in the network depends on the specific implementation of the network devices and protocols. Some devices may drop such packets, while others may fragment them or drop them with an error message. Therefore, the correct answer is (c) Implementation specific
Parsed_answer: c
----------------6-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. The Maximum Transfer Unit (MTU) of the IP layer and the Maximum SDU Size:
a) Are directly related
b) Have no relationship
c) Are the same
d) Depend on the GGSN

Answer: b) Have no relationship

Explanation: The Maximum Transfer Unit (MTU) of the IP layer and the Maximum SDU Size (Maximum Service Data Unit) are not directly related. The MTU is a parameter that determines the maximum size of a single IP packet, while the Maximum SDU Size
Parsed_answer: b
----------------7-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. Why does RAN need SDU size information?
a) For IP fragmentation
b) For efficient use of the radio spectrum
c) To operate in transparent RLC protocol mode
d) For GGSN optimization

Answer: b) For efficient use of the radio spectrum

Explanation: RAN (Radio Access Network) needs SDU (Service Data Unit) size information to efficiently use the radio spectrum. This information is used to determine the maximum size of the SDU that can be transmitted over the air interface, taking into account the available bandwidth and the overhead
Parsed_answer: b
----------------8-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. What does SDU error ratio indicate?
a) Fraction of SDUs dropped
b) Fraction of SDUs forwarded with decreased QoS
c) Fraction of SDUs lost or detected as erroneous
d) Efficiency of the RLC protocol

Answer: c) Fraction of SDUs lost or detected as erroneous

Explanation: The SDU error ratio is a metric that measures the percentage of SDUs that are lost or detected as erroneous. It is calculated by dividing the number of SDUs lost or detected
Parsed_answer: c
----------------9-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. Which attribute specifies the relative importance of the UMTS bearer compared to other bearers?
a) Maximum bitrate
b) Traffic handling priority
c) Allocation/Retention Priority
d) Guaranteed bitrate

Answer: c) Allocation/Retention Priority

Explanation: The Allocation/Retention Priority attribute specifies the relative importance of the UMTS bearer compared to other bearers. It determines the order in which bearers are allocated or released when there are competing demands for resources. A
Parsed_answer: c
----------------10-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. What is the purpose of including the traffic class itself as an attribute in Radio Access Bearer Service Attributes?
a) To standardize the attributes across different network components
b) RAN can make assumptions about the traffic source and optimise the transport for that traffic type
c) To enhance the security of transmitted data
d) To synchronize traffic flows across different bearers

Answer: b) RAN can make assumptions about the traffic source and optimise the transport for that traffic type.

Explanation: The purpose of including the traffic class itself as an attribute in
Parsed_answer: b
----------------11-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. For what purpose might the Guaranteed bitrate be used?
a) To ensure consistent connection speed across devices
b) To facilitate admission control and for resource allocation within RAN
c) To prioritize certain traffic types over others
d) To maintain the QoS for high-priority services

Answer: d) To maintain the QoS for high-priority services

Explanation: The Guaranteed bitrate is used to maintain the Quality of Service (QoS) for high-priority services, such as voice and video
Parsed_answer: d
----------------12-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. What is the use of the SDU format information?
a) To assist in efficient packet fragmentation and reassembly
b) To assist in route optimization for transmitted data
c) To define which bits of the payload belong to each subflow
d) To assist in load balancing across multiple links

Answer: c) To define which bits of the payload belong to each subflow.

Explanation: The SDU format information is used to define which bits of the payload belong to each subflow. This information is necessary for efficient packet fragmentation and reassembly, as
Parsed_answer: c
----------------13-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. The Residual bit error ratio is used to?
a) Configure radio interface protocols, algorithms, and error detection coding
b) Enhance the overall QoS for end users
c) Improve the handover process between different network nodes
d) Adjust the power levels in the transmitting device

Answer: a) Configure radio interface protocols, algorithms, and error detection coding

Explanation: The Residual bit error ratio (RBER) is a metric used to evaluate the performance of wireless communication systems. It is defined as the
Parsed_answer: a
----------------14-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. What shall the PDN GW modify if the subscribed ARP parameter has been changed?
a) All dedicated S2a bearers with the new ARP value
b) All dedicated S2a bearers with the previously subscribed ARP value
c) Only the default bearer
d) None of the above

Answer: b) All dedicated S2a bearers with the previously subscribed ARP value

Explanation: When the subscribed ARP parameter has been changed, the PDN GW should modify all dedicated S2a bearers with the
Parsed_answer: b
----------------15-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. What message does the trusted non-3GPP IP access send to acknowledge the bearer modification to the PDN GW?
a) Update Bearer Request
b) Update Bearer Response
c) Create Session Request
d) Delete Bearer Request

Answer: b) Update Bearer Response

Explanation: When the trusted non-3GPP IP access sends a bearer modification to the PDN GW, the PDN GW responds with an Update Bearer Response message to acknowledge the modification.

Reference: 3GPP
Parsed_answer: b
----------------16-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. What happens if the default bearer belonging to a PDN connection is deactivated?
a) Only the default bearer is deactivated
b) All bearers belonging to the PDN connection are deactivated
c) The PDN GW retains the PDN address
d) The PDN GW releases the PDN address

Answer: b) All bearers belonging to the PDN connection are deactivated

Explanation: When the default bearer belonging to a PDN connection is deactivated, all bearers belonging to the same P
Parsed_answer: b
----------------17-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. In the Local Breakout case, which entities are responsible for forwarding messages?
a) PDN GW and hPCRF
b) vPCRF and hPCRF
c) PDN GW and vPCRF
d) None of the above

Answer: b) vPCRF and hPCRF

Explanation: In the Local Breakout case, the vPCRF and hPCRF entities are responsible for forwarding messages. The vPCRF is responsible for forwarding the message to the PDN G
Parsed_answer: b
----------------18-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. What does the RAT type indicate?
a) The 3GPP IP access technology type
b) The non-3GPP IP access technology type
c) Both 3GPP and non-3GPP IP access technology types
d) Neither 3GPP nor non-3GPP IP access technology types

Answer: c) Both 3GPP and non-3GPP IP access technology types

Explanation: The RAT type indicates the type of radio access technology used by the device to access the network. The RAT type can
Parsed_answer: c
----------------19-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. When the WLAN is considered trusted by the operator, how is it interfaced?
a) Via STa to the 3GPP AAA to EPC through S2a
b) Only via S2a
c) Only via 3GPP AAA
d) None of the above

Answer: b) Only via S2a

Explanation: When the WLAN is considered trusted by the operator, it is interfaced only via S2a. This is because the WLAN is not subject to the same level
Parsed_answer: b
----------------20-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. What might STa need to be enhanced to support?
a) An indication on whether the Trusted WLAN AN supports S2a
b) The SSID selected by the UE
c) The APN the user is to be associated with for EPC access
d) All of the above

Answer: d) All of the above

Explanation: STa needs to be enhanced to support all of the above information to ensure seamless handover between Wi-Fi and LTE networks. The information includes:

* An indication on
Parsed_answer: d
----------------21-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. Which entity is associated with the SWw reference point?
a) WLAN UE's connection to EPC for data transfer
b) Link between 3GPP AAA Server and WLAN AN for EAP messages
c) Communication interface between UE and HSS
d) Pathway from TNAN to AAA server over STa for authentication

Answer: b) Link between 3GPP AAA Server and WLAN AN for EAP messages.

Explanation: The SWw reference point is used for the authentication of WLAN
Parsed_answer: b
----------------22-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. What capability might the TNAN indicate to the AAA server via STa?
a) Ability to support IPv6 addressing exclusively
b) Capacity to handle both S2a and non-seamless offload
c) Bandwidth allocation per UE for data traffic
d) Selection logic for PDN Gateway for routing data traffic
e) Support for both IPv4 and IPv6 addressing

Answer: e) Support for both IPv4 and IPv6 addressing

Explanation: The TNAN (Tracking
Parsed_answer: e
----------------23-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. What consequence follows when a UE dispatches a DHCPv4 message for IPv4 address release?
a) UE's migration from IPv4 to IPv6 addressing
b) Initiation of Bearer Modification by the PDN GW
c) Severance of PDN connection when the connection type is IPv4
d) Sustainment of the existing PDN connection without interruption

Answer: c) Severance of PDN connection when the connection type is IPv4

Explanation: When a UE dispatches a D
Parsed_answer: c
----------------24-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. What should the source MME/SSGSN do if the target MME/SSGSN doesn't support the SGW restoration procedure?
a) Transfer the PDN connection to a different SGW
b) Continue with the standard SGW relocation procedure
c) Reject the Handover Required/Relocation Required message and release the S1/Iu/radio resources
d) Wait for the target MME/SSGSN to update its SGW restoration support

Answer: b) Continue with the standard SG
Parsed_answer: b
----------------25-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. What happens when the T-Release-GBR timer expires in the PGW?
a) The PGW initiates a new SGW restoration process
b) The PGW releases GBR bearers that haven't been restored yet and informs the PCRF
c) The PGW maintains the PDN connections without any change
d) The PGW attempts to restore the PDN connection with the MME/SSGSN

Answer: b) The PGW releases GBR bearers that haven't been restored yet
Parsed_answer: b
----------------26-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. For which reason might the PGW reject an IP-CAN session modification procedure after an SGW failure?
a) If the request includes changing the PDN connection route
b) If the request includes the installation/modification of PCC rules
c) If the PGW has already initiated a new SGW restoration process
d) If the MME/SSGSN has already restored the PDN connection

Answer: b) If the request includes the installation/modification of PCC rules.

Explanation: The PGW might
Parsed_answer: b
----------------27-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. What does OMR stand for in the context of the provided text?
Optimized Media Routeing
Operational Management Review
Online Music Recorder
Optical Message Reading
OMR stands for Optimized Media Routeing.
----------------28-----------------


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer: Choose the correct answer. What protocol is mentioned alongside SIP for OMR based signaling flows?
HTTP
SMTP
SDP
UDP

Answer: B. SDP

Explanation: SIP (Session Initiation Protocol) is a signaling protocol used for initiating, modifying, and terminating sessions in an IP network. OMR (Online Media Resource) based signaling flows use SIP along with SDP (Session Description Protocol) for describing and negotiating the media sessions. HTTP and SMTP are not related to OMR based signaling flows. UDP
Parsed_answer: Couldn't parse the answer
----------------29-----------------
Generated Answer: Choose the correct answer. What is the role of IBCF in the provided context?
Allocating MR context
Decoding audio files
Compressing video streams
Routing email traffic
IBCF (Intelligent Buffer Control Function) is responsible for allocating MR (Multi-Rate) contexts in the provided context. Therefore, the correct answer is:
Allocating MR context.
----------------30-----------------
['c'

In [None]:
import json
input_file_path = "/home/hb/LLM-research/evaluation/5G_SPEC/5G_SPEC_QA.json" 

with open(input_file_path, "r") as input_file:
    questions_data = json.load(input_file)

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=512)

generated_answers_qa = []

n_question = 0
for q_dict in questions_data:
    n_question += 1
    question = q_dict["question"]
    # Generate answer using the language model
    prompt = f"[INST]{question}[/INST]"
    generated_answer = pipe(prompt)[0]['generated_text']
    print(f"Output: {generated_answer}")
    generated_answers_qa.append(generated_answer)
    print(f"---------------------{n_question}-----------------------------")

# 5G analysis knowledge

## Multiple Choice
`5G configuration analysis v1`:
Accuracy: 90%
Correct answers: 21 + 7 = 28
Incorrect answers: 2

`5G configuration analysis v2`:
Accuracy: 70.00%
Correct answers: 21
Incorrect answers: 12