# 1. Loading LLaMA

In [1]:
import transformers
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    pipeline,
    logging,
)
from typing import List
import torch
from torch import cuda, bfloat16
from datasets import load_dataset
import os
 
import matplotlib.pyplot as plt
import matplotlib as mpl
 
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE

'cuda'

In [2]:
model_id = 'meta-llama/Llama-2-13b-chat-hf'
# model_id = 'codellama/CodeLlama-7b-hf'
# model_id = 'hyonbokan/bgp-llama7b-6k'
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

bnb_config = transformers.BitsAndBytesConfig(
    load_in_8bit=True,
)


# Need auth token for these
hf_auth = os.environ.get('hf_token')

model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

# device_map = {"": 0}

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map="auto",
    use_auth_token=hf_auth
)

model.eval()
print(f"Model loaded on {device}")



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Model loaded on cuda:0


In [3]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)
 
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# 2. Train Data Loading & Processing

In [4]:
data = load_dataset("json", data_files="/home/hb/LLM-research/dataset/5G/network_analysis/network_analysis_main_new.json")
data["train"]

Found cached dataset json (/home/hb/.cache/huggingface/datasets/json/default-a868141dc84e87dc/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)


  0%|          | 0/1 [00:00<?, ?it/s]

Dataset({
    features: ['instruction', 'input', 'output', 'most_similar_instructions', 'avg_similarity_score'],
    num_rows: 3001
})

In [5]:
CUTOFF_LEN = 2048

def generate_prompt(data_point):
    return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.  # noqa: E501
### Instruction:
{data_point["instruction"]}
### Input:
{data_point["input"]}
### Response:
{data_point["output"]}"""
 
 
def tokenize(prompt, add_eos_token=True):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=CUTOFF_LEN,
        padding=False,
        return_tensors=None,
    )
    if (
        result["input_ids"][-1] != tokenizer.eos_token_id
        and len(result["input_ids"]) < CUTOFF_LEN
        and add_eos_token
    ):
        result["input_ids"].append(tokenizer.eos_token_id)
        result["attention_mask"].append(1)
 
    result["labels"] = result["input_ids"].copy()
 
    return result
 
def generate_and_tokenize_prompt(data_point):
    full_prompt = generate_prompt(data_point)
    tokenized_full_prompt = tokenize(full_prompt)
    return tokenized_full_prompt

In [6]:
train_val = data["train"].train_test_split(
    test_size=300, shuffle=True, seed=42
)
train_data = (
    train_val["train"].map(generate_and_tokenize_prompt)
)
val_data = (
    train_val["test"].map(generate_and_tokenize_prompt)
)

Loading cached split indices for dataset at /home/hb/.cache/huggingface/datasets/json/default-a868141dc84e87dc/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-fa3204159f5a6a01.arrow and /home/hb/.cache/huggingface/datasets/json/default-a868141dc84e87dc/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-ca8d46125de1787c.arrow


Map:   0%|          | 0/2701 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

# 3. Fine-tuning

In [7]:
from peft import LoraConfig, get_peft_model
from transformers import TrainingArguments

lora_alpha = 16
lora_dropout = 0.1
lora_r = 64
 
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM"
)

output_dir = "./hyonbo/BGP-LLaMA13-BGPStream6k-cutoff-1024-max-2048_fpFalse"
per_device_train_batch_size = 4
gradient_accumulation_steps = 1
optim = "paged_adamw_32bit"
save_steps = 200
logging_steps = 500
learning_rate = 1e-4
max_grad_norm = 0.3
max_steps = 2000
warmup_ratio = 0.05
lr_scheduler_type = "cosine"

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    fp16=True,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
    num_train_epochs=5.0
)

In [8]:
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 52,428,800 || all params: 13,068,293,120 || trainable%: 0.40119087870597137


In [9]:
data_collator = transformers.DataCollatorForSeq2Seq(
    tokenizer, return_tensors="pt", padding=True
)

In [10]:
from trl import SFTTrainer

max_seq_length = 2048

trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=val_data,
    peft_config=peft_config,
    dataset_text_field="output",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
)

Map:   0%|          | 0/2701 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [11]:
trainer.train()

You are using 8-bit optimizers with a version of `bitsandbytes` < 0.41.1. It is recommended to update your version as a major bug has been fixed in 8-bit optimizers.
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
[34m[1mwandb[0m: Currently logged in as: [33mmkkanhb[0m ([33mdnlab_2023[0m). Use [1m`wandb login --relogin`[0m to force relogin
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true

OutOfMemoryError: CUDA out of memory. Tried to allocate 430.00 MiB. GPU 

# 4. Saving Finetuned Model

In [None]:
new_model = "LLaMA13-analysis-3k"

trainer.model.save_pretrained(new_model)
tokenizer.save_pretrained(new_model)

('BGP-LLaMA7-BGPStream1k-real-time-increment/tokenizer_config.json',
 'BGP-LLaMA7-BGPStream1k-real-time-increment/special_tokens_map.json',
 'BGP-LLaMA7-BGPStream1k-real-time-increment/tokenizer.model',
 'BGP-LLaMA7-BGPStream1k-real-time-increment/added_tokens.json',
 'BGP-LLaMA7-BGPStream1k-real-time-increment/tokenizer.json')

In [1]:
from peft import LoraConfig, PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    pipeline,
    logging,
    TextStreamer
)
import torch
from torch import cuda, bfloat16

model_id = 'meta-llama/Llama-2-7b-chat-hf'
# model_id = 'codellama/CodeLlama-7b-hf'
# model_id = 'hyonbokan/bgp-llama7b-6k'
# new_model = "BGP-LLaMA7-BGPStream1k-real-time-increment"
new_model = 'BGP-LLaMA7-BGPStream3k'

# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map='auto',
)
model = PeftModel.from_pretrained(base_model, new_model)
# model.load_adapter(new_model2)

model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [2]:
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
streamer = TextStreamer(tokenizer)
prompt = "Create a PyBGPStream script that generates reports on the time trend of the total span of address space advertised by AS7296. The script should collect data from BGP announcements from 'rrc00', calculate the span of address space from 02-03-2024 13:00 to 14:59:59, and aggregate this data into a time-series report. Do not add ASN to 'filter' parameter."
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=1024)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

<s>[INST] Create a PyBGPStream script that generates reports on the time trend of the total span of address space advertised by AS7296. The script should collect data from BGP announcements from 'rrc00', calculate the span of address space from 02-03-2024 13:00 to 14:59:59, and aggregate this data into a time-series report. Do not add ASN to 'filter' parameter. [/INST]  Sure, here is a PyBGPStream script that generates reports on the time trend of the total span of address space advertised by AS7296 from 'rrc00' from 02-03-2024 13:00 to 14:59:59:

import pybgpstream
import time

start_time = "02-03-2024 13:00"
end_time = "02-03-2024 14:59:59"

stream = pybgpstream.BGPStream(
    project="ris-live",
    record_type="updates",
    filter="type:rib",
    filter_list=["collector:rrc00"],
    start_time=start_time,
    end_time=end_time
)

total_address_space = 0

for rec in stream.records():
    for elem in rec:
        as_path = elem.fields['as-path'].split()
        prefix = elem.fields[

In [3]:
real_time_prompt = "Create a Python script that leverages the PyBGPStream library for real-time monitoring of BGP updates, focusing specifically on the announcements and withdrawals related to a particular Autonomous System (AS), here marked as '24482'. Initialize the script by setting up a BGPStream instance configured to fetch updates from the 'ris-live' project. Prepare two pandas DataFrames, one each for tracking announcements and withdrawals, with columns for the time of the update, the prefix involved, and a count of occurrences. Design a function called process_bgp_data that operates continuously for a predetermined duration, here 180 seconds, to collect BGP updates. Within this function, filter the updates to identify those related to the AS of interest and update the respective DataFrame by either adding a new row for each unique prefix occurrence or incrementing the count for existing entries. Implement a mechanism to print out the top 5 prefixes by count from each DataFrame every 60 seconds, showcasing the most frequent announcements and withdrawals during that period before resetting the DataFrames for the next cycle of data collection. Initiate this data collection process in a separate thread to allow for concurrent execution without blocking other operations. Ensure that the entire script is designed to provide insights into the most active prefixes related to the specified AS within the live BGP feed, offering valuable information on BGP dynamics over a short, specified timeframe."

In [4]:
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
streamer = TextStreamer(tokenizer)
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=1024)
result = pipe(f"<s>[INST] {real_time_prompt} [/INST]")
print(result[0]['generated_text'])

<s>[INST] Create a Python script that leverages the PyBGPStream library for real-time monitoring of BGP updates, focusing specifically on the announcements and withdrawals related to a particular Autonomous System (AS), here marked as '24482'. Initialize the script by setting up a BGPStream instance configured to fetch updates from the 'ris-live' project. Prepare two pandas DataFrames, one each for tracking announcements and withdrawals, with columns for the time of the update, the prefix involved, and a count of occurrences. Design a function called process_bgp_data that operates continuously for a predetermined duration, here 180 seconds, to collect BGP updates. Within this function, filter the updates to identify those related to the AS of interest and update the respective DataFrame by either adding a new row for each unique prefix occurrence or incrementing the count for existing entries. Implement a mechanism to print out the top 5 prefixes by count from each DataFrame every 60 s

In [8]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [9]:
model.push_to_hub('hyonbokan/bgp-llama7b-6k')
tokenizer.push_to_hub('hyonbokan/bgp-llama7b-6k')

model-00002-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/3.59G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/hyonbokan/bgp-llama7b-6k/commit/1cccd6959afbc7bca94c445da9f378e0345c371d', commit_message='Upload tokenizer', commit_description='', oid='1cccd6959afbc7bca94c445da9f378e0345c371d', pr_url=None, pr_revision=None, pr_num=None)

## 3k real cases + real time

In [None]:
import pybgpstream
import time

start_time = "02-03-2024 13:00"
end_time = "02-03-2024 14:59:59"

stream = pybgpstream.BGPStream(
    project="ris-live",
    record_type="updates",
    filter="type:rib",
    filter_list=["collector:rrc00"],
    start_time=start_time,
    end_time=end_time
)

total_address_space = 0

for rec in stream.records():
    for elem in rec:
        as_path = elem.fields['as-path'].split()
        prefix = elem.fields['prefix']
        if '7296' in as_path:
            total_address_space += 1

time_series_report = {}
time_series_report['Time'] = []
time_series_report['Address Space'] = []

for i in range(0, int(end_time.timestamp()/1000, 10)):
    time_string = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(i))
    time_series_report['Time'].append(time_string)
    time_series_report['Address Space'].append(total_address_space)
    total_address_space = 0

print(time_series_report)

In [None]:

import pandas as pd
from datetime import datetime
from threading import Thread
import time

# Initialize the DataFrames
announcements = pd.DataFrame(columns=['time', 'prefix', 'count'])
withdrawals = pd.DataFrame(columns=['time', 'prefix', 'count'])

# Set up the BGPStream instance
stream = pybgpstream.BGPStream(
    project="ris-live",
    record_type="updates",
)

# Define the function to process BGP data
def process_bgp_data():
    for rec in stream.records():
        for elem in rec:
            as_path = elem.fields.get('as-path', '')
            if as_path == '24482':
                prefix = elem.fields['prefix']
                if elem.type == 'A':
                    announcements = announcements.append({'time': elem.time, 'prefix': prefix, 'count': 1}, ignore_index=True)
                elif elem.type == 'W':
                    withdrawals = withdrawals.append({'time': elem.time, 'prefix': prefix, 'count': 1}, ignore_index=True)

    # Update the DataFrames
    announcements['time'] = announcements['time'].astype('datetime64[s]')
    withdrawals['time'] = withdrawals['time'].astype('datetime64[s]')

    # Reset the DataFrames
    announcements['count'] = 0
    withdrawals['count'] = 0

    # Print the top 5 prefixes by count every 60 seconds
    print('Announcements:')
    top_announcements = announcements.groupby('prefix')['count'].nlargest(5, 'prefix').reset_index().sort_values(by='prefix', ascending=False).head(5)
    print(top_announcements)
    print('Withdrawals:')
    top_withdrawals = withdrawals.groupby('prefix')['count'].nlargest(5, 'prefix').reset_index().sort_values(by='prefix', ascending=False).head(5)
    print(top_withdrawals)

    # Start the data collection process in a separate thread
    thread = Thread(target=process_bgp_data)
    thread.start()

# Set the duration of the data collection process
end_time = time.time() + 180

while time.time() < end_time:
    # Check if the 180 seconds have passed
    if time.time() > end_time:
        break
    # Update the DataFrames
    process_bgp_data()
    time.sleep(1)

## 6K model outputs: pybgpstream v1

In [None]:
# time series
import pybgpstream

# Initialize the stream
stream = pybgpstream.BGPStream(
    from_time="2024-02-03 13:00:00",
    until_time="2024-02-03 14:59:59",
    collectors=["rrc00"],
    record_type="updates",
)

# Calculate the total span of address space advertised
total_span = 0
for rec in stream.records():
    for elem in rec:
        if 'prefix' in elem.fields:
            total_span += len(elem.fields['prefix'])

# Calculate the time trend of the total span
time_trend = []
for i in range(int(stream.get_next_time() - stream.get_start_time()) / 60):
    time_trend.append(total_span)

# Print the time-series report
print("Time-Series Report:")
for time_point in time_trend:
    print(f"Time: {time_point} | Total Span: {time_point} ASNs")

In [None]:
# Real time
import pandas as pd
import time
import threading
import pybgpstream

# Initialize the BGPStream instance
stream = pybgpstream.BGPStream(
    from_time=time.time(),
    project="ris-live",
    collectors="ris-live",
    record_type="updates"
)

# Initialize the two DataFrames
announce_df = pd.DataFrame(columns=["time", "prefix", "count"])
withdraw_df = pd.DataFrame(columns=["time", "prefix", "count"])

# Set up the thread to run the BGPStream instance
thread = threading.Thread(target=stream.start)

# Define the function to process BGP data
def process_bgp_data():
    while True:
        # Filter the BGP updates to find the relevant prefixes
        relevant_updates = stream.get_updates(
            filter={"peer-asn": "24482"}
        )

        # Create a new DataFrame for each type of update
        announce_df_new = pd.DataFrame(
            columns=["time", "prefix", "count"],
            data=[(relevant_update.time, relevant_update.fields["prefix"], 1) for relevant_update in relevant_updates]
        )
        withdraw_df_new = pd.DataFrame(
            columns=["time", "prefix", "count"],
            data=[(relevant_update.time, relevant_update.fields["prefix"], 1) for relevant_update in relevant_updates]
        )

        # Update the respective DataFrames
        announce_df = announce_df.append(announce_df_new, ignore_index=True)
        withdraw_df = withdraw_df.append(withdraw_df_new, ignore_index=True)

        # Print out the top 5 prefixes every 60 seconds
        if (time.time() - thread.last_active) > 60:
            top_5_announce_prefixes = announce_df.sort_values(by="count", ascending=False).head(5)
            top_5_withdraw_prefixes = withdraw_df.sort_values(by="count", ascending=False).head(5)

            print(f"Top 5 announcements: {top_5_announce_prefixes['prefix']}")
            print(f"Top 5 withdrawals: {top_5_withdraw_prefixes['prefix']}")

        thread.last_active = time.time()

# Start the thread
thread.start()