# Finetuned Model Evaluation

## Anomaly Detection


In [1]:
from peft import LoraConfig, PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    pipeline,
    logging,
)
import transformers
import os

model_id = 'meta-llama/Llama-2-7b-chat-hf'
# model_id = 'meta-llama/Llama-2-13b-chat-hf'
# model_id = 'codellama/CodeLlama-7b-hf'
# model_id = "meta-llama/Meta-Llama-3-70B-Instruct"
new_model = "/home/hb/dataset_bgp/llm_finetuned/llama2-7b-table283New-10split-5k-instruct-1e5rate-loraa64drop01"

hf_auth = os.environ.get('hf_token')

bnb_config = transformers.BitsAndBytesConfig(
    load_in_8bit=True,
)

model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)
model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    # quantization_config=bnb_config,
    device_map="auto",
    use_auth_token=hf_auth
)

model = PeftModel.from_pretrained(model, new_model)
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [2]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    pipeline,
    logging,
)
import transformers
import os

model_id = "hyonbokan/bgp-llama-knowledge-5k"

# Need auth token for these
hf_auth = os.environ.get('hf_token')

model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    # quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)

tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "right"

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

In [3]:
import json
from tablular_eval_util import combine_csv_files, split_dataframe, preprocess_data, run_llm_inference

directory = '/home/hb/dataset_bgp/bgp_tab_dataset_test'
combined_df = combine_csv_files(directory)

if 'anomaly_status' in combined_df.columns:
    combined_df = combined_df.drop(columns=['anomaly_status'])
    
# Split the DataFrame into smaller chunks
split_size = 20
data_list = split_dataframe(combined_df, split_size)

# Preprocess the data into the required format
formatted_data = [preprocess_data(chunk) for chunk in data_list]

formatted_data_file = f'llm_table_bgp_data_test_{split_size}.json'
with open(formatted_data_file, 'w') as f:
    json.dump(formatted_data, f, indent=4)

with open(formatted_data_file, 'r') as f:
    formatted_data = json.load(f)

output_results_file = f'table135-20split-2k-with-outputs-{split_size}.json'
run_llm_inference(formatted_data, model, tokenizer, max_length=2550, output_results_file=output_results_file)

Processed 1/29
Processed 2/29
Processed 3/29
Processed 4/29
Processed 5/29
Processed 6/29
Processed 7/29
Processed 8/29
Processed 9/29
Processed 10/29




Processed 11/29
Processed 12/29
Processed 13/29
Processed 14/29
Processed 15/29
Processed 16/29
Processed 17/29
Processed 18/29
Processed 19/29
Processed 20/29
Processed 21/29
Processed 22/29
Processed 23/29
Processed 24/29
Processed 25/29
Processed 26/29
Processed 27/29
Processed 28/29
Processed 29/29
[{'instruction': 'The goal for this task is to determine if the data indicates an anomaly. The context, section, and table columns provide important information for identifying the correct anomaly type.', 'input_seg': '[TLE] The section is related to a specific time period of BGP monitoring. [TAB] col: | timestamp | asn | num_routes | num_new_routes | num_withdrawals | num_origin_changes | num_route_changes | max_path_length | avg_path_length | max_edit_distance | avg_edit_distance | num_announcements | num_unique_prefixes_announced | row 1: | 2022-03-28 07:00:00 | 8342 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0 | 0.0 | 0 | 0 | [SEP] row 2: | 2022-03-28 07:05:00 | 8342 | 7 | 7 | 0 | 0 | 0 | 0 | 0

In [4]:
# import json
# from tablular_eval_util import combine_csv_files, split_dataframe, preprocess_data, run_llm_inference, shuffle_and_split_dataframe

# directory = '/home/hb/dataset_bgp/bgp_tab_dataset_test'
# combined_df = combine_csv_files(directory)

# if 'anomaly_status' in combined_df.columns:
#     combined_df = combined_df.drop(columns=['anomaly_status'])
    
# # Split the DataFrame into smaller chunks
# split_size = 20
# data_list = shuffle_and_split_dataframe(combined_df, split_size)

# # Preprocess the data into the required format
# formatted_data = [preprocess_data(chunk) for chunk in data_list]

# formatted_data_file = f'llm_table_bgp_data_test_{split_size}_shuffled.json'
# with open(formatted_data_file, 'w') as f:
#     json.dump(formatted_data, f, indent=4)

# with open(formatted_data_file, 'r') as f:
#     formatted_data = json.load(f)

# output_results_file = f'table135-20split-2k-with-outputs-{split_size}_shuffled.json'
# run_llm_inference(formatted_data, model, tokenizer, max_length=3050, output_results_file=output_results_file)

In [8]:
import pandas as pd
import re

output_file = pd.read_json(f"/home/hb/LLM-research/finetune_main/finetuning_tabular/table_read/table135-20split-2k-with-outputs-{split_size}.json")
output_file = output_file["output"]
output_file.to_csv(f"table135-20split-2k-outputs-{split_size}.csv")

timestamps = []

timestamp_pattern = re.compile(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}')

for output in output_file:
    matches = timestamp_pattern.findall(output)
    timestamps.extend(matches)

bgp_llm_output = list(set(timestamps))

print(timestamps)

['2024-06-10 12:00:00', '2022-03-28 13:25:00', '2022-03-28 16:55:00', '2019-05-08 15:30:00', '2018-11-12 04:15:00', '2018-11-12 07:10:00', '2018-11-12 07:15:00', '2018-11-12 07:35:00', '2018-11-12 08:45:00', '2018-11-12 13:45:00', '2018-11-12 14:05:00', '2018-11-12 14:20:00', '2018-11-12 14:25:00', '2018-11-12 20:45:00', '2018-11-12 20:45:00', '2018-11-13 03:45:00', '2018-11-13 03:40:00', '2018-11-13 03:35:00', '2018-11-13 03:30:00', '2018-11-13 03:25:00', '2017-12-12 07:35:00']


In [9]:
from tablular_eval_util import combine_csv_files

directory = '/home/hb/dataset_bgp/bgp_tab_dataset_test'
combined_df = combine_csv_files(directory)
combined_df = combined_df[['anomaly_status']]

# Filter the rows with "anomaly detected" in the anomaly_status
filtered_df = combined_df[combined_df['anomaly_status'].str.contains('anomaly detected', na=False)]
# filtered_df.to_csv('/home/hb/dataset_bgp/bgp_tab_dataset_test/test_true_label.csv', index=False)

# Extract the date from the anomaly_status string
true_label = filtered_df['anomaly_status'].str.extract(r'anomaly detected at (\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})')

# Convert the dates to a list
true_label = true_label[0].tolist()

print(true_label)

['2022-03-28 10:25:00', '2022-03-28 10:30:00', '2022-03-28 10:35:00', '2022-03-28 10:40:00', '2022-03-28 10:55:00', '2022-03-28 11:00:00', '2022-03-28 11:35:00', '2022-03-28 11:40:00', '2022-03-28 11:45:00', '2022-03-28 13:25:00', '2022-03-28 13:30:00', '2022-03-28 13:35:00', '2022-03-28 14:00:00', '2022-03-28 14:05:00', '2022-03-28 14:15:00', '2022-03-28 14:20:00', '2022-03-28 14:25:00', '2022-03-28 15:55:00', '2019-05-08 15:05:00', '2019-05-08 15:10:00', '2019-05-08 15:15:00', '2019-05-08 15:30:00', '2018-11-12 01:25:00', '2018-11-12 01:30:00', '2018-11-12 02:05:00', '2018-11-12 02:10:00', '2018-11-12 04:20:00', '2018-11-12 04:25:00', '2018-11-12 07:50:00', '2018-11-12 07:55:00', '2018-11-12 08:05:00', '2018-11-12 08:10:00', '2018-11-12 08:45:00', '2018-11-12 08:50:00', '2018-11-12 08:55:00', '2018-11-12 09:25:00', '2018-11-12 09:50:00', '2018-11-12 10:00:00', '2018-11-12 13:25:00', '2018-11-12 14:20:00', '2018-11-12 14:25:00', '2018-11-12 15:40:00', '2018-11-12 15:45:00', '2018-11-1

In [10]:
import pandas as pd
from tablular_eval_util import evaluate_llm_results

evaluation_result = evaluate_llm_results(true_anomalies=true_label, llm_results=timestamps)
# print(f"Evaluation Results {split_size} split:")
print(f"Precision: {evaluation_result['precision']:.2f}")
print(f"Recall: {evaluation_result['recall']:.2f}")
print(f"F1 Score: {evaluation_result['f1_score']:.2f}")
print(f"True Positives: {evaluation_result['true_positives']}")
print(f"False Positives: {evaluation_result['false_positives']}")
print(f"False Negatives: {evaluation_result['false_negatives']}")

Precision: 0.30
Recall: 0.08
F1 Score: 0.13
True Positives: 6
False Positives: 14
False Negatives: 65


## General BGP Analysis 

In [2]:
import json
from tablular_eval_util import combine_csv_files, split_dataframe, preprocess_data, run_llm_inference

directory = '/home/hb/dataset_bgp/bgp_tab_dataset_test'
combined_df = combine_csv_files(directory)

if 'anomaly_status' in combined_df.columns:
    combined_df = combined_df.drop(columns=['anomaly_status'])
    
# Split the DataFrame into smaller chunks
split_size = 20
data_list = split_dataframe(combined_df, split_size)


# Preprocess the data into the required format
formatted_data = [preprocess_data(chunk) for chunk in data_list]
input_test = formatted_data[0]['input_seg']
prompt = f"Perform BGP analysis with the given data below: \n {input_test}"
prompt

'Perform BGP analysis with the given data below: \n [TLE] The section is related to a specific time period of BGP monitoring. [TAB] col: | timestamp | asn | num_routes | num_new_routes | num_withdrawals | num_origin_changes | num_route_changes | max_path_length | avg_path_length | max_edit_distance | avg_edit_distance | num_announcements | num_unique_prefixes_announced | row 1: | 2022-03-28 07:00:00 | 8342 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0 | 0.0 | 0 | 0 | [SEP] row 2: | 2022-03-28 07:05:00 | 8342 | 7 | 7 | 0 | 0 | 0 | 0 | 0.0 | 0 | 0.0 | 19 | 7 | [SEP] row 3: | 2022-03-28 07:10:00 | 8342 | 0 | 0 | 7 | 0 | 0 | 0 | 0.0 | 0 | 0.0 | 0 | 0 | [SEP] row 4: | 2022-03-28 07:15:00 | 8342 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0 | 0.0 | 0 | 0 | [SEP] row 5: | 2022-03-28 07:20:00 | 8342 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0 | 0.0 | 0 | 0 | [SEP] row 6: | 2022-03-28 07:25:00 | 8342 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0 | 0.0 | 0 | 0 | [SEP] row 7: | 2022-03-28 07:30:00 | 8342 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0 | 0

In [3]:
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=2050)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

<s>[INST] Perform BGP analysis with the given data below: 
 [TLE] The section is related to a specific time period of BGP monitoring. [TAB] col: | timestamp | asn | num_routes | num_new_routes | num_withdrawals | num_origin_changes | num_route_changes | max_path_length | avg_path_length | max_edit_distance | avg_edit_distance | num_announcements | num_unique_prefixes_announced | row 1: | 2022-03-28 07:00:00 | 8342 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0 | 0.0 | 0 | 0 | [SEP] row 2: | 2022-03-28 07:05:00 | 8342 | 7 | 7 | 0 | 0 | 0 | 0 | 0.0 | 0 | 0.0 | 19 | 7 | [SEP] row 3: | 2022-03-28 07:10:00 | 8342 | 0 | 0 | 7 | 0 | 0 | 0 | 0.0 | 0 | 0.0 | 0 | 0 | [SEP] row 4: | 2022-03-28 07:15:00 | 8342 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0 | 0.0 | 0 | 0 | [SEP] row 5: | 2022-03-28 07:20:00 | 8342 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0 | 0.0 | 0 | 0 | [SEP] row 6: | 2022-03-28 07:25:00 | 8342 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0 | 0.0 | 0 | 0 | [SEP] row 7: | 2022-03-28 07:30:00 | 8342 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0

# GPT Evaluation

In [None]:
import pandas as pd

gpt4o_test = pd.read_csv("/home/hb/LLM-research/finetune_main/finetuning_tabular/table_read/gpt4o_test_all.csv")
gpt4o_test = gpt4o_test["timestamp"].tolist()
print(gpt4o_test)

In [None]:
import pandas as pd

# Load the dataset
file_path = '/home/hb/dataset_bgp/test_all.csv'
data = pd.read_csv(file_path)

# Calculate summary statistics
summary_stats = data.describe()

# Identifying potential anomalies using the 3-sigma rule
anomalous_rows = pd.DataFrame()

for column in data.columns[3:]:
    if column in summary_stats.columns:
        threshold = summary_stats.loc['mean', column] + 3 * summary_stats.loc['std', column]
        anomalies = data[data[column] > threshold]
        anomalous_rows = pd.concat([anomalous_rows, anomalies])

# Remove duplicates
anomalous_rows = anomalous_rows.drop_duplicates()

# Save the anomalous timestamps to a CSV file
anomalous_timestamps = anomalous_rows['timestamp'].tolist()
print(anomalous_timestamps)