In [3]:
# %pip show bitsandbytes peft accelerate transformers

Name: bitsandbytes
Version: 0.45.0
Summary: k-bit optimizers and matrix multiplication routines.
Home-page: https://github.com/bitsandbytes-foundation/bitsandbytes
Author: Tim Dettmers
Author-email: dettmers@cs.washington.edu
License: MIT
Location: /home/hb/.conda/envs/unsloth_env/lib/python3.10/site-packages
Requires: numpy, torch, typing_extensions
Required-by: unsloth
---
Name: peft
Version: 0.14.0
Summary: Parameter-Efficient Fine-Tuning (PEFT)
Home-page: https://github.com/huggingface/peft
Author: The HuggingFace team
Author-email: benjamin@huggingface.co
License: Apache
Location: /home/hb/.local/lib/python3.10/site-packages
Requires: accelerate, huggingface-hub, numpy, packaging, psutil, pyyaml, safetensors, torch, tqdm, transformers
Required-by: unsloth, unsloth_zoo
---
Name: accelerate
Version: 1.2.1
Summary: Accelerate
Home-page: https://github.com/huggingface/accelerate
Author: The HuggingFace team
Author-email: zach.mueller@huggingface.co
License: Apache
Location: /home/hb/.

In [1]:
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA version: {torch.version.cuda}")

PyTorch version: 2.2.0
CUDA version: 12.1


In [2]:
from unsloth import FastLanguageModel
import torch
from torch import cuda, bfloat16
from datasets import load_dataset
import os

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


  from .autonotebook import tqdm as notebook_tqdm


Unsloth: OpenAI failed to import - ignoring for now.
🦥 Unsloth Zoo will now patch everything to make training faster!


'cuda'

In [3]:
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
# model_id = "unsloth/Meta-Llama-3.1-8B",

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(device)

max_seq_length = 2048
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_id,
    max_seq_length = max_seq_length,
    dtype = dtype,
    # load_in_4bit = load_in_4bit,
    # token = "meta-llama/Meta-Llama-3.1-8B-Instruct",
)

model.eval()
print(f"Model loaded on {device}")

==((====))==  Unsloth 2024.12.12: Fast Llama patching. Transformers: 4.47.1.
   \\   /|    GPU: NVIDIA RTX A6000. Max memory: 47.529 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.2.0. CUDA: 8.6. CUDA Toolkit: 12.1. Triton: 2.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.24. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model loaded on cuda:0


In [4]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 64, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2024.12.12 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [5]:
data = load_dataset("json", data_files="/home/hb/LLM-research/openai/generated_instructions/test_3.json")
data["train"]

Dataset({
    features: ['instruction', 'input', 'output', 'most_similar_instructions', 'avg_similarity_score'],
    num_rows: 2264
})

In [6]:
CUTOFF_LEN = 2048

def generate_prompt(data_point):
    """
    Create the text prompt from your instruction, input, and output fields.
    """
    return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{data_point["instruction"]}

### Input:
{data_point["input"]}

### Response:
{data_point["output"]}"""

def tokenize(prompt, add_eos_token=True):
    """
    Tokenizes the prompt. Optionally pads to max_length=2048 and appends an EOS token.
    Copies input_ids to labels for causal LM.
    """
    # Here, we use padding="max_length" to get uniform-length sequences of 2048.
    # Alternatively, you can use padding=False and rely on a data collator.
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=CUTOFF_LEN,
        padding="max_length",   # or padding=False + data_collator
        return_tensors=None,    # return raw Python lists
    )

    input_ids = result["input_ids"]
    attention_mask = result["attention_mask"]

    # Optionally place an EOS token at the very end if there's room
    if (
        add_eos_token
        and len(input_ids) == CUTOFF_LEN
        and input_ids[-1] != tokenizer.eos_token_id
    ):
        # Replace last token with EOS if you'd like
        input_ids[-1] = tokenizer.eos_token_id
        attention_mask[-1] = 1

    labels = input_ids.copy()
    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels,
    }

def generate_and_tokenize_prompt(data_point):
    """
    Combines prompt generation with tokenization.
    """
    full_prompt = generate_prompt(data_point)
    return tokenize(full_prompt)

# Example: split the "train" set into train/val
train_val = data["train"].train_test_split(test_size=200, shuffle=True, seed=42)
train_data = train_val["train"].map(generate_and_tokenize_prompt)
val_data   = train_val["test"].map(generate_and_tokenize_prompt)


Map: 100%|██████████| 200/200 [00:01<00:00, 138.10 examples/s]


In [7]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported


output_dir = "/home/hb/dataset_bgp/BGP-LLaMA3-5k"
per_device_train_batch_size = 1
gradient_accumulation_steps = 1
optim = "paged_adamw_32bit"
save_steps = 200
logging_steps = 500
learning_rate = 1e-4
max_grad_norm = 0.3
max_steps = 10000
warmup_ratio = 0.05
lr_scheduler_type = "cosine"


trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_data,
    eval_dataset = val_data,
    dataset_text_field = "output",
    logging_steps = 500,
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 4,
        gradient_accumulation_steps = 1,
        # warmup_steps = 5,
        warmup_ratio = 0.05,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = max_steps,
        learning_rate = 1e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "paged_adamw_32bit",
        weight_decay = 0.01,
        lr_scheduler_type = "cosine",
        seed = 3407,
        output_dir = "/home/hb/dataset_bgp/BGP-LLaMA3-5k/outputs",
        report_to = "none",
    ),
)


In [8]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 2,064 | Num Epochs = 20
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 1
\        /    Total batch size = 4 | Total steps = 10,000
 "-____-"     Number of trainable parameters = 167,772,160
  5%|▌         | 500/10000 [40:44<12:55:00,  4.89s/it]

{'loss': 0.319, 'grad_norm': 0.1332283467054367, 'learning_rate': 0.0001, 'epoch': 0.97}


 10%|█         | 1000/10000 [1:21:49<12:14:31,  4.90s/it]

{'loss': 0.1595, 'grad_norm': 0.11934316158294678, 'learning_rate': 9.931806517013612e-05, 'epoch': 1.94}


 15%|█▌        | 1500/10000 [2:05:34<13:08:07,  5.56s/it]

{'loss': 0.1387, 'grad_norm': 0.10617193579673767, 'learning_rate': 9.729086208503174e-05, 'epoch': 2.91}


 20%|██        | 2000/10000 [2:51:06<10:54:36,  4.91s/it]

{'loss': 0.1223, 'grad_norm': 0.11179500818252563, 'learning_rate': 9.397368756032445e-05, 'epoch': 3.88}


 25%|██▌       | 2500/10000 [3:32:14<10:12:30,  4.90s/it]

{'loss': 0.1058, 'grad_norm': 0.1258484572172165, 'learning_rate': 8.945702546981969e-05, 'epoch': 4.84}


 30%|███       | 3000/10000 [4:13:20<9:31:23,  4.90s/it] 

{'loss': 0.0885, 'grad_norm': 0.18343402445316315, 'learning_rate': 8.386407858128706e-05, 'epoch': 5.81}


 35%|███▌      | 3500/10000 [4:54:25<8:50:44,  4.90s/it] 

{'loss': 0.0726, 'grad_norm': 0.13948319852352142, 'learning_rate': 7.734740790612136e-05, 'epoch': 6.78}


 40%|████      | 4000/10000 [5:35:32<8:09:52,  4.90s/it] 

{'loss': 0.0574, 'grad_norm': 0.1467686891555786, 'learning_rate': 7.008477123264848e-05, 'epoch': 7.75}


 45%|████▌     | 4500/10000 [6:19:33<8:08:32,  5.33s/it] 

{'loss': 0.0443, 'grad_norm': 0.22314485907554626, 'learning_rate': 6.227427435703997e-05, 'epoch': 8.72}


 50%|█████     | 5000/10000 [7:05:02<8:24:38,  6.06s/it] 

{'loss': 0.0336, 'grad_norm': 0.18578533828258514, 'learning_rate': 5.4128967273616625e-05, 'epoch': 9.69}


 55%|█████▌    | 5500/10000 [7:50:40<6:41:50,  5.36s/it] 

{'loss': 0.0258, 'grad_norm': 0.16621412336826324, 'learning_rate': 4.5871032726383386e-05, 'epoch': 10.66}


 60%|██████    | 6000/10000 [8:32:32<5:26:13,  4.89s/it]

{'loss': 0.0205, 'grad_norm': 0.08136153221130371, 'learning_rate': 3.772572564296005e-05, 'epoch': 11.63}


 65%|██████▌   | 6500/10000 [9:13:26<4:45:59,  4.90s/it]

{'loss': 0.017, 'grad_norm': 0.2654050886631012, 'learning_rate': 2.991522876735154e-05, 'epoch': 12.6}


 70%|███████   | 7000/10000 [9:55:52<4:28:46,  5.38s/it]

{'loss': 0.015, 'grad_norm': 0.04962505027651787, 'learning_rate': 2.2652592093878666e-05, 'epoch': 13.57}


 75%|███████▌  | 7500/10000 [10:45:06<4:23:18,  6.32s/it]

{'loss': 0.0136, 'grad_norm': 0.057419996708631516, 'learning_rate': 1.6135921418712956e-05, 'epoch': 14.53}


 80%|████████  | 8000/10000 [11:35:51<3:32:05,  6.36s/it]

{'loss': 0.0129, 'grad_norm': 0.048003681004047394, 'learning_rate': 1.0542974530180327e-05, 'epoch': 15.5}


 85%|████████▌ | 8500/10000 [12:23:08<2:08:00,  5.12s/it]

{'loss': 0.0123, 'grad_norm': 0.04844491183757782, 'learning_rate': 6.026312439675552e-06, 'epoch': 16.47}


 90%|█████████ | 9000/10000 [13:07:33<1:27:12,  5.23s/it]

{'loss': 0.012, 'grad_norm': 0.05417900159955025, 'learning_rate': 2.7091379149682685e-06, 'epoch': 17.44}


 95%|█████████▌| 9500/10000 [13:51:41<43:39,  5.24s/it]  

{'loss': 0.0117, 'grad_norm': 0.05017364025115967, 'learning_rate': 6.819348298638839e-07, 'epoch': 18.41}


100%|██████████| 10000/10000 [14:32:57<00:00,  4.89s/it]

{'loss': 0.0114, 'grad_norm': 0.05167119577527046, 'learning_rate': 0.0, 'epoch': 19.38}


100%|██████████| 10000/10000 [14:33:02<00:00,  5.24s/it]

{'train_runtime': 52382.053, 'train_samples_per_second': 0.764, 'train_steps_per_second': 0.191, 'train_loss': 0.06469777879714966, 'epoch': 19.38}





In [3]:
new_model = "/home/hb/dataset_bgp/finetuned_models/LLaMA3-8B-instruct-analysis-10k-adam32"

In [11]:
trainer.model.save_pretrained(new_model)
tokenizer.save_pretrained(new_model)

('/home/hb/dataset_bgp/finetuned_models/LLaMA3-8B-instruct-analysis-10k-adam32/tokenizer_config.json',
 '/home/hb/dataset_bgp/finetuned_models/LLaMA3-8B-instruct-analysis-10k-adam32/special_tokens_map.json',
 '/home/hb/dataset_bgp/finetuned_models/LLaMA3-8B-instruct-analysis-10k-adam32/tokenizer.json')

In [4]:
from peft import LoraConfig, PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    pipeline,
    logging,
    TextStreamer
)
import torch
from torch import cuda, bfloat16

model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"

# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map='auto',
)
model = PeftModel.from_pretrained(base_model, new_model)

model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    model.resize_token_embeddings(len(tokenizer))

tokenizer.padding_side = "left"

Loading checkpoint shards: 100%|██████████| 4/4 [00:07<00:00,  1.97s/it]
The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
The new lm_head weights will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


In [None]:
logging.set_verbosity(logging.CRITICAL)

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=1012)
result = pipe(f"Summarize the AS paths for each prefix associated with ASN AS4766 over the period oct 28 13:00 to oct 28 13:15, 2024. Provide minimum, maximum, and median AS path lengths and highlight any significant path changes observed in BGP updates.")
print(result[0]['generated_text'])

Summarize the AS paths for each prefix associated with ASN AS4766 over the period oct 28 13:00 to oct 28 13:15, 2024. Provide minimum, maximum, and median AS path lengths and highlight any significant path changes observed in BGP updates. 

```python
import os
import re
import pybgpstream
import pandas as pd
from datetime import datetime, timezone
from collections import defaultdict

def analyze_as_paths():
    target_asn = "4766"
    from_time_str = "2024-10-28 13:00:00"
    until_time_str = "2024-10-28 13:15:00"
    from_time = datetime.strptime(from_time_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
    until_time = datetime.strptime(until_time_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
    directory = "/home/hb/routeviews_bgp_updates/2024/10/route-views2"
    pattern = r'^updates\.(\d{8})\.(\d{4})\.(bz2|gz)$'
    prefix_as_paths = defaultdict(list)
    
    for root, _, files in os.walk(directory):
        for file in files:
            match = re.match(pattern

: 

In [5]:
model.push_to_hub('hyonbokan/bgp-llama-3.1-instruct-10kSteps-2kDataset')
tokenizer.push_to_hub('hyonbokan/bgp-llama-3.1-instruct-10kSteps-2kDataset')

100%|██████████| 4/4 [22:56<00:00, 344.06s/it]
100%|██████████| 1/1 [00:03<00:00,  3.29s/it]


CommitInfo(commit_url='https://huggingface.co/hyonbokan/bgp-llama-3.1-instruct-10kSteps-2kDataset/commit/b08ba811e84206d23993e5ec24841de70e0aa72a', commit_message='Upload tokenizer', commit_description='', oid='b08ba811e84206d23993e5ec24841de70e0aa72a', pr_url=None, repo_url=RepoUrl('https://huggingface.co/hyonbokan/bgp-llama-3.1-instruct-10kSteps-2kDataset', endpoint='https://huggingface.co', repo_type='model', repo_id='hyonbokan/bgp-llama-3.1-instruct-10kSteps-2kDataset'), pr_revision=None, pr_num=None)