# 1. Loading LLaMA

In [1]:
import transformers
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    pipeline,
    logging,
)
from typing import List
import torch
from torch import cuda, bfloat16
from datasets import load_dataset

import os
 
import matplotlib.pyplot as plt
import matplotlib as mpl
# import seaborn as sns
from pylab import rcParams
# from trl import SFTTrainer
 
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE

'cuda'

In [2]:
model_id = 'meta-llama/Llama-2-13b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

bnb_config = transformers.BitsAndBytesConfig(
    load_in_8bit=True,
)


# Need auth token for these
hf_auth = os.environ.get('hf_token')

model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)

model.eval()
print(f"Model loaded on {device}")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Model loaded on cuda:0


In [3]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)
 
# tokenizer.pad_token_id = (0)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# 2. Train Data Loading & Processing

In [4]:
data = load_dataset("json", data_files="/home/hb/LLM-research/finetuning_dataset/5G/Mobile_LLaMA_1.json")
data["train"]

Found cached dataset json (/home/hb/.cache/huggingface/datasets/json/default-9a80aede2aaf45ed/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)


  0%|          | 0/1 [00:00<?, ?it/s]

Dataset({
    features: ['instruction', 'input', 'output', 'most_similar_instructions', 'avg_similarity_score'],
    num_rows: 13610
})

In [5]:
CUTOFF_LEN = 2048
# Change the prompt based on common errors.
def generate_prompt(data_point):
    return f"""Below is an instruction that describes a task, paired with an output that provides the completion of the task.  # noqa: E501
### Instruction:
{data_point["instruction"]}
### Response:
{data_point["output"]}"""

def tokenize(prompt, add_eos_token=True):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=CUTOFF_LEN,
        padding=False,
        return_tensors=None,
    )
    if (
        result["input_ids"][-1] != tokenizer.eos_token_id
        and len(result["input_ids"]) < CUTOFF_LEN
        and add_eos_token
    ):
        result["input_ids"].append(tokenizer.eos_token_id)
        result["attention_mask"].append(1)

    result["labels"] = result["input_ids"].copy()

    return result

def generate_and_tokenize_prompt(data_point):
    full_prompt = generate_prompt(data_point)
    tokenized_full_prompt = tokenize(full_prompt)
    return tokenized_full_prompt


In [6]:
train_val = data["train"].train_test_split(
    test_size=1300, shuffle=True, seed=42
)
train_data = (
    train_val["train"].map(generate_and_tokenize_prompt)
)
val_data = (
    train_val["test"].map(generate_and_tokenize_prompt)
)

Loading cached split indices for dataset at /home/hb/.cache/huggingface/datasets/json/default-9a80aede2aaf45ed/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-d0eebc9f5069d181.arrow and /home/hb/.cache/huggingface/datasets/json/default-9a80aede2aaf45ed/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-196bbf850e15c107.arrow
Loading cached processed dataset at /home/hb/.cache/huggingface/datasets/json/default-9a80aede2aaf45ed/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-b7b0accd17af5511.arrow
Loading cached processed dataset at /home/hb/.cache/huggingface/datasets/json/default-9a80aede2aaf45ed/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-06da6036ed081662.arrow


# 3. Fine-tuning

In [7]:
from peft import LoraConfig, get_peft_model
from transformers import TrainingArguments
# from transformers.generation.utils import TrainingArguments

lora_alpha = 16
lora_dropout = 0.1
lora_r = 64
 
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM"
)

output_dir = "hyonbo/mobile_llama_2kEpoch"
per_device_train_batch_size = 4
gradient_accumulation_steps = 1
optim = "paged_adamw_32bit"
save_steps = 500
logging_steps = 200
learning_rate = 1e-4
max_grad_norm = 0.3
max_steps = 10000
warmup_ratio = 0.05
lr_scheduler_type = "cosine"

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    # save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    fp16=True,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
    num_train_epochs=5.0
    
)

In [8]:
data_collator = transformers.DataCollatorForSeq2Seq(
    tokenizer, return_tensors="pt", padding=True
)

In [9]:
from trl import SFTTrainer

max_seq_length = None

trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=val_data,
    peft_config=peft_config,
    dataset_text_field="output",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
)

Loading cached processed dataset at /home/hb/.cache/huggingface/datasets/json/default-9a80aede2aaf45ed/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-761e9898627458b0.arrow


Map:   0%|          | 0/1300 [00:00<?, ? examples/s]

In [10]:
trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[34m[1mwandb[0m: Currently logged in as: [33mmkkanhb[0m ([33mdnlab_2023[0m). Use [1m`wandb login --relogin`[0m to force relogin


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss
200,1.1486
400,0.7325
600,0.6503
800,0.6064
1000,0.5883
1200,0.5716
1400,0.5679
1600,0.5629
1800,0.5649
2000,0.5725




RuntimeError: CUDA error: an illegal memory access was encountered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


# 4. Saving Finetuned Model

In [None]:
new_model = "mobile_llama_10k_epoch5"

trainer.model.save_pretrained(new_model)
tokenizer.save_pretrained(new_model)

('mobile_llama_10k_fp16_false_epoch5/tokenizer_config.json',
 'mobile_llama_10k_fp16_false_epoch5/special_tokens_map.json',
 'mobile_llama_10k_fp16_false_epoch5/tokenizer.model',
 'mobile_llama_10k_fp16_false_epoch5/added_tokens.json',
 'mobile_llama_10k_fp16_false_epoch5/tokenizer.json')

In [1]:
from peft import LoraConfig, PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    pipeline,
    logging,
)
import torch

model_id = 'meta-llama/Llama-2-13b-chat-hf'
new_model = "mobile_llama_10k_epoch5"

# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map='auto',
)
model = PeftModel.from_pretrained(base_model, new_model)
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
model.push_to_hub('hyonbokan/mobile_llama_10kEpoch')
tokenizer.push_to_hub('hyonbokan/mobile_llama_10kEpoch')

# 5. Evaluation

In [2]:
# Run text generation pipeline with our next model
prompt = "Generate Python code to calculate 5G network performance KPIs: Total Network Capacity, Capacity per Area, Capacity per Point, Cost per Capacity, Cost per Area, and Surplus per Area. Load data from '5G_Infrastructure/demand_driven_postcode_data_results.csv'. Use keywords: 'capacity', 'cost', 'area', 'numpoints' to identify relevant columns."
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=1024)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<s>[INST] Generate Python code to calculate 5G network performance KPIs: Total Network Capacity, Capacity per Area, Capacity per Point, Cost per Capacity, Cost per Area, and Surplus per Area. Load data from '5G_Infrastructure/demand_driven_postcode_data_results.csv'. Use keywords: 'capacity', 'cost', 'area', 'numpoints' to identify relevant columns. [/INST]  Here is a Python script that calculates the 5G network performance KPIs you requested:

import pandas as pd

# Load the CSV file
df = pd.read_csv('5G_Infrastructure/demand_driven_postcode_data_results.csv')

# Calculate Total Network Capacity
total_capacity = df['capacity'].sum()

# Calculate Capacity per Area
capacity_per_area = df['capacity'] / df['area']

# Calculate Capacity per Point
capacity_per_point = df['capacity'] / df['numpoints']

# Calculate Cost per Capacity
cost_per_capacity = df['cost'] / df['capacity']

# Calculate Cost per Area
cost_per_area = df['cost'] / df['area']

# Calculate Surplus per Area
surplus_per_area 

In [3]:
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "Given a n3.pcap file containing network traffic data, create a Python script using Scapy to process the PCAP. The script should extract the source IP, destination IP, source port, destination port, and the raw payload data for each packet in the PCAP. Impletent try-except blocks for errors if expected layers IP, TCP/UDP, Raw. Store this data in a pandas DataFrame and display it."
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=712)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

<s>[INST] Given a n3.pcap file containing network traffic data, create a Python script using Scapy to process the PCAP. The script should extract the source IP, destination IP, source port, destination port, and the raw payload data for each packet in the PCAP. Impletent try-except blocks for errors if expected layers IP, TCP/UDP, Raw. Store this data in a pandas DataFrame and display it. [/INST]  Sure! Here is a Python script using Scapy to process a n3.pcap file and extract the source IP, destination IP, source port, destination port, and the raw payload data for each packet in the PCAP:
```
import pandas as pd
import scapy.all as scapy

# Load the PCAP file
def load_pcap(file_path):
    try:
        # Read the PCAP file
        packets = rdpcap(file_path)
        return packets
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None

# Extract packet information
def extract_packet_info(packets):
    packet_data = []
    for packet in packets:
   