### Import Necessary Libraries

In [1]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging
)

from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model
)

import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format, SFTConfig
from dotenv import load_dotenv
import bitsandbytes as bnb

### Huggingface and WandB authentication

In [2]:
load_dotenv()

HF_KEY = os.getenv("HUGGINGFACEHUB_API_TOKEN")

In [3]:
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_KEY

In [4]:
os.environ["WANDB_NOTEBOOK_NAME"] = "C:/Users/User/Data Science/Deep Learning/Generative AI/Fine Tuning LLMs/fine-tuning llama 3.2 1B/research.ipynb"

In [5]:
wandb.login()

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


True

In [6]:
run = wandb.init(
    project='Fine-tune Llama 3.2 on Customer Support Dataset',
    job_type="training",
    anonymous="allow"
)

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

In [7]:
base_model = "meta-llama/Llama-3.2-1B-Instruct"
new_model = "llama-3.2-3b-it-Ecommerce-ChatBot"
dataset_name = "bitext/Bitext-customer-support-llm-chatbot-training-dataset"

### Set the data type and attention implementation

In [8]:
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install -qqq flash-attn
    torch_dtype = torch.bfloat16
    attn_implementation = "flash_attention_2"
else:
    torch_dtype = torch.float16
    attn_implementation = "eager"

### QLoRA Config

In [9]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True
)

### Load Model

In [10]:
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

  _ = torch.tensor([0], device=i)


### Load Tokenizer

In [11]:
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)

### Importing the dataset

In [86]:
dataset = load_dataset(dataset_name, split="train")

FIrst, we will train with only 1000 samples of the dataset

In [87]:
test_dataset = dataset.shuffle(seed=65).select(range(1000, 1200))

In [88]:
len(test_dataset)

200

In [36]:
len(dataset['instruction'])

1000

### Data Preprocessing to match our chat template

In [44]:
instruction = """You are a top-rated customer service agent named John. 
    Be polite to customers and answer all their questions.
    """

In [51]:
def format_chat_template(row):
    from transformers import AutoTokenizer

    base_model = "meta-llama/Llama-3.2-1B-Instruct"
    tokenizer = AutoTokenizer.from_pretrained(
        base_model, trust_remote_code=True)

    instruction = """You are a top-rated customer service agent named John. 
        Be polite to customers and answer all their questions.
        """

    row_json = [{"role": "system", "content": instruction},
                {"role": "user", "content": row["instruction"]},
                {"role": "assistant", "content": row["response"]}]

    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

In [52]:
dataset = dataset.map(format_chat_template, num_proc=2)

Map (num_proc=2):   0%|          | 0/1000 [00:00<?, ? examples/s]

In [89]:
test_dataset = test_dataset.map(format_chat_template, num_proc=2)

Map (num_proc=2):   0%|          | 0/200 [00:00<?, ? examples/s]

In [69]:
dataset.push_to_hub("customer-support-1k", token=HF_KEY)

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/Victorano/customer-support-1k/commit/157c1da2fb0e842461161ea69d8551fc26715b4d', commit_message='Upload dataset', commit_description='', oid='157c1da2fb0e842461161ea69d8551fc26715b4d', pr_url=None, pr_revision=None, pr_num=None)

In [90]:
test_dataset.push_to_hub("customer-support-test-200", token=HF_KEY)

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/Victorano/customer-support-test-200/commit/af6cd0525f81ae93586b3035fb3e5adb819a651b', commit_message='Upload dataset', commit_description='', oid='af6cd0525f81ae93586b3035fb3e5adb819a651b', pr_url=None, pr_revision=None, pr_num=None)

In [33]:
dataset['text'][3]

'<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\nYou are a top-rated customer service agent named John. \n        Be polite to customers and answer all their questions.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nwhere do i enter a different shipping address<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nAh, I understand that you\'re looking to enter a different shipping address. Allow me to guide you through the process:\n\n1. Log in to your account on our website.\n2. Navigate to the "My Account" or "Profile" section, which can usually be found in the top right corner of the page.\n3. Look for the "Shipping Addresses" or similar option. It may also be labeled as "Manage Addresses" or "Delivery Information."\n4. Click on that option to access your saved addresses.\n5. To enter a different shipping address, you\'ll most likely have the choice to either "Edit" an existing address or "Add

### Downloading the saved datasets from HF

In [12]:
dataset = load_dataset("Victorano/customer-support-1k", split="train")

In [29]:
dataset['text'][999]

'<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\nYou are a top-rated customer service agent named John. \n        Be polite to customers and answer all their questions.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nchange to {{Account Type}} acount<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nThank you for expressing interest in upgrading to the {{Account Type}} account! I\'d be delighted to guide you through the process:\n\n1. Access Your Account: Start by logging in to our platform using your credentials.\n2. Go to Account Settings: Once you\'re logged in, navigate to the "Account Settings" section.\n3. Upgrade Option: Look for the option that allows you to upgrade your account type.\n4. Select {{Account Type}} Account: Choose the "{{Account Type}} Account" option from the available choices.\n5. Follow the Prompts: Follow the prompts on the screen to complete the upgrade process succe

In [15]:
test_dataset = load_dataset("Victorano/customer-support-test-200", split="train")

In [28]:
test_dataset['text'][0]

"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\nYou are a top-rated customer service agent named John. \n        Be polite to customers and answer all their questions.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nis it possible to buy several items<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nThank you for your interest in purchasing several items from us! We have a wide range of products available for you to choose from. To place your order, simply browse through our online catalog and add the items you want to your shopping cart. Once you've selected all the items you wish to purchase, proceed to the checkout page and follow the prompts to complete your order. If you have any questions or need assistance during the process, feel free to reach out to our customer service team who will be more than happy to help. Happy shopping!<|eot_id|>"

In [30]:
len(dataset), len(test_dataset)

(1000, 200)

### Setting Up the Model
extract the linear model name from the model

In [31]:
def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

In [32]:
modules = find_all_linear_names(model)

In [33]:
modules

['down_proj', 'o_proj', 'up_proj', 'q_proj', 'v_proj', 'gate_proj', 'k_proj']

Using the Linear Module name to create the LoRA Adapter, this is only what we will be fine tuning

### Lora Config

In [34]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules
)

In [35]:
model, tokenizer = setup_chat_format(model, tokenizer)
model = get_peft_model(model, peft_config)

### SFT Hyperparameter

In [36]:
training_arguments = SFTConfig(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    eval_strategy="steps",
    max_seq_length=512,
    dataset_text_field="text",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)

### Supervised Fine Tuning

In [37]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    eval_dataset=test_dataset,
    peft_config=peft_config,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False
)

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [None]:
trainer.train()



  0%|          | 0/500 [00:00<?, ?it/s]

{'loss': 2.0753, 'grad_norm': 2.98009991645813, 'learning_rate': 2e-05, 'epoch': 0.0}
{'loss': 2.196, 'grad_norm': 3.350388288497925, 'learning_rate': 4e-05, 'epoch': 0.0}
{'loss': 2.4107, 'grad_norm': 3.648029088973999, 'learning_rate': 6e-05, 'epoch': 0.01}
{'loss': 2.463, 'grad_norm': 3.2274019718170166, 'learning_rate': 8e-05, 'epoch': 0.01}
{'loss': 2.3088, 'grad_norm': 2.405561923980713, 'learning_rate': 0.0001, 'epoch': 0.01}
{'loss': 2.1519, 'grad_norm': 2.094228506088257, 'learning_rate': 0.00012, 'epoch': 0.01}
{'loss': 1.892, 'grad_norm': 1.8950233459472656, 'learning_rate': 0.00014, 'epoch': 0.01}
{'loss': 1.9028, 'grad_norm': 1.8293263912200928, 'learning_rate': 0.00016, 'epoch': 0.02}
{'loss': 1.7341, 'grad_norm': 1.8322831392288208, 'learning_rate': 0.00018, 'epoch': 0.02}
{'loss': 1.7392, 'grad_norm': 1.7804890871047974, 'learning_rate': 0.0002, 'epoch': 0.02}
{'loss': 1.3917, 'grad_norm': 1.875309944152832, 'learning_rate': 0.0001995918367346939, 'epoch': 0.02}
{'loss'

We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


{'loss': 0.7397, 'grad_norm': 1.674275517463684, 'learning_rate': 0.00016326530612244898, 'epoch': 0.2}


  0%|          | 0/200 [00:00<?, ?it/s]