### Import Necessary Libraries

In [1]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging
)

from peft import (
    LoraConfig,
    PeftModel,
    PeftConfig,
    prepare_model_for_kbit_training,
    get_peft_model
)

from accelerate import infer_auto_device_map, init_empty_weights

import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format, SFTConfig
from dotenv import load_dotenv
import bitsandbytes as bnb
from pathlib import Path

### Huggingface and WandB authentication

In [2]:
load_dotenv()

HF_KEY = os.getenv("HUGGINGFACEHUB_API_TOKEN")

In [3]:
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_KEY

In [4]:
os.environ["WANDB_NOTEBOOK_NAME"] = "C:/Users/User/Data Science/Deep Learning/Generative AI/Fine Tuning LLMs/fine-tuning llama 3.2 1B/research.ipynb"

In [5]:
wandb.login()

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


True

In [6]:
run = wandb.init(
    project='Fine-tune Llama 3.2 on Customer Support Dataset',
    job_type="training",
    anonymous="allow"
)

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

In [4]:
base_model = "meta-llama/Llama-3.2-1B-Instruct"
new_model = "llama-3.2-1b-it-Ecommerce-ChatBot"
dataset_name = "bitext/Bitext-customer-support-llm-chatbot-training-dataset"

### Set the data type and attention implementation

In [6]:
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install -qqq flash-attn
    torch_dtype = torch.bfloat16
    attn_implementation = "flash_attention_2"
else:
    torch_dtype = torch.float16
    attn_implementation = "eager"

### QLoRA Config

In [7]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True
)

### Load Model

try lower version of bitsandbytes and use the load_in_4bit=True directly on the model

In [8]:
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

  _ = torch.tensor([0], device=i)


### Load Tokenizer

In [9]:
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)

### Importing the dataset

In [20]:
dataset = load_dataset(dataset_name, split="train")

In [12]:
len(dataset)

26872

FIrst, we will train with only 1000 samples of the dataset

In [21]:
train_dataset = dataset.shuffle(seed=65).select(range(1000))

In [22]:
test_dataset = dataset.shuffle(seed=65).select(range(1000, 1200))

In [88]:
len(test_dataset)

200

In [36]:
len(dataset['instruction'])

1000

### Data Preprocessing to match our chat template

change name  to 'Procurtech Assistant' when trying out the next training

In [23]:
instruction = """You are a top-rated customer service agent named John. 
    Be polite to customers and answer all their questions.
    If the question is out of context and not related to your job as a customer service agent, let the customer know that you can not help and they should look elsewhere for answers.
    """

In [24]:
def format_chat_template(row):
    from transformers import AutoTokenizer

    base_model = "meta-llama/Llama-3.2-1B-Instruct"
    tokenizer = AutoTokenizer.from_pretrained(
        base_model, trust_remote_code=True)

    instruction = """You are a top-rated customer service agent named 'Procurtech Assistant'. 
        Be polite to customers and answer all their questions.
        If the question is out of context and not related to your job as a customer service agent, let the customer know that you can not help and they should look elsewhere for answers.
        """

    row_json = [{"role": "system", "content": instruction},
                {"role": "user", "content": row["instruction"]},
                {"role": "assistant", "content": row["response"]}]

    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

In [25]:
train_dataset = train_dataset.map(format_chat_template, num_proc=2)

Map (num_proc=2):   0%|          | 0/1000 [00:00<?, ? examples/s]

In [26]:
test_dataset = test_dataset.map(format_chat_template, num_proc=2)

Map (num_proc=2):   0%|          | 0/200 [00:00<?, ? examples/s]

In [69]:
dataset.push_to_hub("customer-support-1k", token=HF_KEY)

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/Victorano/customer-support-1k/commit/157c1da2fb0e842461161ea69d8551fc26715b4d', commit_message='Upload dataset', commit_description='', oid='157c1da2fb0e842461161ea69d8551fc26715b4d', pr_url=None, pr_revision=None, pr_num=None)

In [90]:
test_dataset.push_to_hub("customer-support-test-200", token=HF_KEY)

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/Victorano/customer-support-test-200/commit/af6cd0525f81ae93586b3035fb3e5adb819a651b', commit_message='Upload dataset', commit_description='', oid='af6cd0525f81ae93586b3035fb3e5adb819a651b', pr_url=None, pr_revision=None, pr_num=None)

In [33]:
dataset['text'][3]

'<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\nYou are a top-rated customer service agent named John. \n        Be polite to customers and answer all their questions.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nwhere do i enter a different shipping address<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nAh, I understand that you\'re looking to enter a different shipping address. Allow me to guide you through the process:\n\n1. Log in to your account on our website.\n2. Navigate to the "My Account" or "Profile" section, which can usually be found in the top right corner of the page.\n3. Look for the "Shipping Addresses" or similar option. It may also be labeled as "Manage Addresses" or "Delivery Information."\n4. Click on that option to access your saved addresses.\n5. To enter a different shipping address, you\'ll most likely have the choice to either "Edit" an existing address or "Add

### Downloading the saved datasets from HF

In [12]:
dataset = load_dataset("Victorano/customer-support-1k", split="train")

In [29]:
dataset['text'][999]

'<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\nYou are a top-rated customer service agent named John. \n        Be polite to customers and answer all their questions.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nchange to {{Account Type}} acount<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nThank you for expressing interest in upgrading to the {{Account Type}} account! I\'d be delighted to guide you through the process:\n\n1. Access Your Account: Start by logging in to our platform using your credentials.\n2. Go to Account Settings: Once you\'re logged in, navigate to the "Account Settings" section.\n3. Upgrade Option: Look for the option that allows you to upgrade your account type.\n4. Select {{Account Type}} Account: Choose the "{{Account Type}} Account" option from the available choices.\n5. Follow the Prompts: Follow the prompts on the screen to complete the upgrade process succe

In [15]:
test_dataset = load_dataset("Victorano/customer-support-test-200", split="train")

In [28]:
test_dataset['text'][0]

"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\nYou are a top-rated customer service agent named John. \n        Be polite to customers and answer all their questions.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nis it possible to buy several items<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nThank you for your interest in purchasing several items from us! We have a wide range of products available for you to choose from. To place your order, simply browse through our online catalog and add the items you want to your shopping cart. Once you've selected all the items you wish to purchase, proceed to the checkout page and follow the prompts to complete your order. If you have any questions or need assistance during the process, feel free to reach out to our customer service team who will be more than happy to help. Happy shopping!<|eot_id|>"

In [30]:
len(dataset), len(test_dataset)

(1000, 200)

### Setting Up the Model
extract the linear model name from the model

In [18]:
def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

In [19]:
modules = find_all_linear_names(model)

In [33]:
modules

['down_proj', 'o_proj', 'up_proj', 'q_proj', 'v_proj', 'gate_proj', 'k_proj']

Using the Linear Module name to create the LoRA Adapter, this is only what we will be fine tuning

### Lora Config

In [20]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules
)

In [21]:
model, tokenizer = setup_chat_format(model, tokenizer)
model = get_peft_model(model, peft_config)

In [10]:
len(tokenizer)

128256

### SFT Hyperparameter

In [31]:
training_arguments = SFTConfig(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    eval_strategy="steps",
    max_seq_length=512,
    dataset_text_field="text",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    # report_to="wandb"
)

### Supervised Fine Tuning

In [32]:
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    peft_config=peft_config,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False
)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [33]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888889738, max=1.0…

  0%|          | 0/500 [00:00<?, ?it/s]

{'loss': 2.082, 'grad_norm': 2.9098920822143555, 'learning_rate': 2e-05, 'epoch': 0.0}
{'loss': 2.1738, 'grad_norm': 3.2849280834198, 'learning_rate': 4e-05, 'epoch': 0.0}
{'loss': 2.3609, 'grad_norm': 3.45859432220459, 'learning_rate': 6e-05, 'epoch': 0.01}
{'loss': 2.4198, 'grad_norm': 2.80244517326355, 'learning_rate': 8e-05, 'epoch': 0.01}
{'loss': 2.2475, 'grad_norm': 2.3314037322998047, 'learning_rate': 0.0001, 'epoch': 0.01}
{'loss': 2.1225, 'grad_norm': 1.9117026329040527, 'learning_rate': 0.00012, 'epoch': 0.01}
{'loss': 1.8666, 'grad_norm': 1.7173044681549072, 'learning_rate': 0.00014, 'epoch': 0.01}
{'loss': 1.8698, 'grad_norm': 1.875807285308838, 'learning_rate': 0.00016, 'epoch': 0.02}
{'loss': 1.6927, 'grad_norm': 1.9178657531738281, 'learning_rate': 0.00018, 'epoch': 0.02}
{'loss': 1.6488, 'grad_norm': 1.7952669858932495, 'learning_rate': 0.0002, 'epoch': 0.02}
{'loss': 1.2951, 'grad_norm': 1.8028624057769775, 'learning_rate': 0.0001995918367346939, 'epoch': 0.02}
{'loss

We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


{'loss': 0.5571, 'grad_norm': 1.250440001487732, 'learning_rate': 0.00016326530612244898, 'epoch': 0.2}


  0%|          | 0/200 [00:00<?, ?it/s]

{'eval_loss': 0.6914978623390198, 'eval_runtime': 358.3243, 'eval_samples_per_second': 0.558, 'eval_steps_per_second': 0.558, 'epoch': 0.2}
{'loss': 0.9285, 'grad_norm': 1.285718560218811, 'learning_rate': 0.00016285714285714287, 'epoch': 0.2}
{'loss': 0.9195, 'grad_norm': 1.4854040145874023, 'learning_rate': 0.00016244897959183672, 'epoch': 0.2}
{'loss': 0.8655, 'grad_norm': 1.4016894102096558, 'learning_rate': 0.0001620408163265306, 'epoch': 0.21}
{'loss': 0.7427, 'grad_norm': 1.2824108600616455, 'learning_rate': 0.0001616326530612245, 'epoch': 0.21}
{'loss': 0.8597, 'grad_norm': 1.2052674293518066, 'learning_rate': 0.00016122448979591838, 'epoch': 0.21}
{'loss': 0.9721, 'grad_norm': 1.2787489891052246, 'learning_rate': 0.00016081632653061224, 'epoch': 0.21}
{'loss': 0.633, 'grad_norm': 1.0599462985992432, 'learning_rate': 0.00016040816326530613, 'epoch': 0.21}
{'loss': 0.7294, 'grad_norm': 1.1384029388427734, 'learning_rate': 0.00016, 'epoch': 0.22}
{'loss': 0.8522, 'grad_norm': 1.1

  0%|          | 0/200 [00:00<?, ?it/s]

{'eval_loss': 0.6227226257324219, 'eval_runtime': 358.4178, 'eval_samples_per_second': 0.558, 'eval_steps_per_second': 0.558, 'epoch': 0.4}
{'loss': 0.7704, 'grad_norm': 0.9951809644699097, 'learning_rate': 0.00012204081632653061, 'epoch': 0.4}
{'loss': 0.8358, 'grad_norm': 1.064465045928955, 'learning_rate': 0.00012163265306122449, 'epoch': 0.4}
{'loss': 0.6479, 'grad_norm': 1.0055352449417114, 'learning_rate': 0.00012122448979591839, 'epoch': 0.41}
{'loss': 0.5478, 'grad_norm': 0.9605713486671448, 'learning_rate': 0.00012081632653061226, 'epoch': 0.41}
{'loss': 0.8093, 'grad_norm': 1.0541173219680786, 'learning_rate': 0.00012040816326530613, 'epoch': 0.41}
{'loss': 0.8588, 'grad_norm': 1.2313696146011353, 'learning_rate': 0.00012, 'epoch': 0.41}
{'loss': 0.7009, 'grad_norm': 1.2183709144592285, 'learning_rate': 0.00011959183673469388, 'epoch': 0.41}
{'loss': 0.8127, 'grad_norm': 1.1026209592819214, 'learning_rate': 0.00011918367346938777, 'epoch': 0.42}
{'loss': 0.6524, 'grad_norm': 

  0%|          | 0/200 [00:00<?, ?it/s]

{'eval_loss': 0.5676608085632324, 'eval_runtime': 358.3484, 'eval_samples_per_second': 0.558, 'eval_steps_per_second': 0.558, 'epoch': 0.6}
{'loss': 0.8192, 'grad_norm': 1.0733829736709595, 'learning_rate': 8.122448979591836e-05, 'epoch': 0.6}
{'loss': 0.5838, 'grad_norm': 1.2251416444778442, 'learning_rate': 8.081632653061225e-05, 'epoch': 0.6}
{'loss': 0.598, 'grad_norm': 1.0642205476760864, 'learning_rate': 8.040816326530612e-05, 'epoch': 0.61}
{'loss': 0.8126, 'grad_norm': 1.1311894655227661, 'learning_rate': 8e-05, 'epoch': 0.61}
{'loss': 0.5604, 'grad_norm': 1.0192772150039673, 'learning_rate': 7.959183673469388e-05, 'epoch': 0.61}
{'loss': 0.6134, 'grad_norm': 1.087037444114685, 'learning_rate': 7.918367346938775e-05, 'epoch': 0.61}
{'loss': 0.6472, 'grad_norm': 1.201444387435913, 'learning_rate': 7.877551020408164e-05, 'epoch': 0.61}
{'loss': 0.5207, 'grad_norm': 1.0680526494979858, 'learning_rate': 7.836734693877551e-05, 'epoch': 0.62}
{'loss': 0.5491, 'grad_norm': 1.161010622

  0%|          | 0/200 [00:00<?, ?it/s]

{'eval_loss': 0.5340343117713928, 'eval_runtime': 358.3847, 'eval_samples_per_second': 0.558, 'eval_steps_per_second': 0.558, 'epoch': 0.8}
{'loss': 0.7614, 'grad_norm': 0.9188481569290161, 'learning_rate': 4.0408163265306124e-05, 'epoch': 0.8}
{'loss': 0.784, 'grad_norm': 0.9329474568367004, 'learning_rate': 4e-05, 'epoch': 0.8}
{'loss': 0.6955, 'grad_norm': 0.9609838724136353, 'learning_rate': 3.9591836734693876e-05, 'epoch': 0.81}
{'loss': 0.5845, 'grad_norm': 1.0288286209106445, 'learning_rate': 3.9183673469387755e-05, 'epoch': 0.81}
{'loss': 0.7611, 'grad_norm': 1.1097233295440674, 'learning_rate': 3.8775510204081634e-05, 'epoch': 0.81}
{'loss': 0.6164, 'grad_norm': 1.0679646730422974, 'learning_rate': 3.836734693877551e-05, 'epoch': 0.81}
{'loss': 0.5526, 'grad_norm': 1.0703438520431519, 'learning_rate': 3.795918367346939e-05, 'epoch': 0.81}
{'loss': 0.5348, 'grad_norm': 1.0354113578796387, 'learning_rate': 3.7551020408163264e-05, 'epoch': 0.82}
{'loss': 0.6693, 'grad_norm': 1.15

  0%|          | 0/200 [00:00<?, ?it/s]

{'eval_loss': 0.5205636024475098, 'eval_runtime': 358.3779, 'eval_samples_per_second': 0.558, 'eval_steps_per_second': 0.558, 'epoch': 1.0}




{'train_runtime': 5559.2092, 'train_samples_per_second': 0.18, 'train_steps_per_second': 0.09, 'train_loss': 0.6572189555764198, 'epoch': 1.0}


TrainOutput(global_step=500, training_loss=0.6572189555764198, metrics={'train_runtime': 5559.2092, 'train_samples_per_second': 0.18, 'train_steps_per_second': 0.09, 'total_flos': 1382619445641216.0, 'train_loss': 0.6572189555764198, 'epoch': 1.0})

### Testing the Fine Tuned Model

#### Getting some test data

In [34]:
eval_dataset = dataset.shuffle(seed=65).select(range(1200, 1205))

In [35]:
eval_dataset[0]

{'flags': 'BELMQZ',
 'instruction': 'i do not know how to inform of a trouble withpayments',
 'category': 'PAYMENT',
 'intent': 'payment_issue',
 'response': "Don't worry, I'm here to guide you through the process of reporting any trouble you're experiencing with your payments. To inform us about the issue, you can reach out to our customer support team via phone or chat. They are available {{Customer Support Hours}} at {{Customer Support Phone Number}} or through the Live Chat on our website at {{Website URL}}. Our dedicated team will assist you step-by-step in notifying us about the trouble, ensuring that it gets resolved as quickly as possible. We appreciate you bringing this to our attention, and we want to assure you that we're committed to providing you with the best assistance for your payment concerns."}

#### Getting The Lora Model

In [5]:
peft_model_id = "C:/Users/User/Data Science/Deep Learning/Generative AI/Fine Tuning LLMs/fine-tuning llama 3.2 1B/llama-3.2-1b-it-Ecommerce-ChatBot/checkpoint-500"

In [6]:
model = AutoModelForCausalLM.from_pretrained(base_model, return_dict=True, torch_dtype=torch.float16, device_map="auto")

  _ = torch.tensor([0], device=i)


In [9]:
tokenizer = AutoTokenizer.from_pretrained(base_model)

In [10]:
_, tokenizer = setup_chat_format(model, tokenizer)

In [9]:
len(tokenizer)

128258

In [11]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128258, 2048)
    (layers): ModuleList(
      (0-15): 16 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=512, bias=False)
          (v_proj): Linear(in_features=2048, out_features=512, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (up_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm):

In [11]:
_

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128258, 2048)
    (layers): ModuleList(
      (0-15): 16 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=512, bias=False)
          (v_proj): Linear(in_features=2048, out_features=512, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (up_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm):

In [14]:
model.resize_token_embeddings(len(tokenizer))

Embedding(128258, 2048)

In [13]:
config = PeftConfig.from_pretrained(peft_model_id)

In [14]:
lora_model = PeftModel.from_pretrained(model, peft_model_id)

In [17]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128258, 2048)
    (layers): ModuleList(
      (0-15): 16 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=2048, out_features=2048, bias=False)
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.05, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=2048, out_features=16, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=16, out_features=2048, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
            (lora_magnitude_vector): ModuleDict()
          )
          (k_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=2048, out_features=512, bias=False)
            (lora_dropout): ModuleDict(
       

In [15]:
lora_model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128258, 2048)
        (layers): ModuleList(
          (0-15): 16 x LlamaDecoderLayer(
            (self_attn): LlamaSdpaAttention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=2048, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora.Li

#### Preprocessing the eval data

write a function that takes the instruction string and returns the prompt

In [16]:
instruction = """You are a top-rated customer service agent named John. 
    Be polite to customers and answer all their questions.
    If the question is out of context and not related to your job as a customer service agent, let the customer know that you can not help and they should look elsewhere for answers.
    """

messages = [{"role": "system", "content": instruction},
            {"role": "user", "content": "I have to see what payment payment modalities are accepted"}]

In [17]:
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

In [42]:
prompt

'<|im_start|>system\nYou are a top-rated customer service agent named John. \n    Be polite to customers and answer all their questions.\n    If the question is out of context and not related to your job as a customer service agent, let the customer know that you can not help and they should look elsewhere for answers.\n    <|im_end|>\n<|im_start|>user\nI have to see what payment payment modalities are accepted<|im_end|>\n<|im_start|>assistant\n'

In [30]:
def format_eval_template(row):
    from transformers import AutoTokenizer

    base_model = "meta-llama/Llama-3.2-1B-Instruct"
    tokenizer = AutoTokenizer.from_pretrained(
        base_model, trust_remote_code=True)

    instruction = """You are a top-rated customer service agent named John. 
        Be polite to customers and answer all their questions.
        If the question is out of context and not related to your job as a customer service agent, let the customer know that you can not help and they should look elsewhere for answers.
        """

    row_json = [{"role": "system", "content": instruction},
                {"role": "user", "content": row["instruction"]}]

    prompt = tokenizer.apply_chat_template(row_json, tokenize=False, add_generation_prompt=True)
    return prompt

In [31]:
eval_prompt_1 = format_eval_template(eval_dataset[0])

NameError: name 'eval_dataset' is not defined

In [45]:
eval_dataset['instruction'][0]

'i do not know how to inform of a trouble withpayments'

In [46]:
eval_prompt_1

'<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\nYou are a top-rated customer service agent named John. \n        Be polite to customers and answer all their questions.\n        If the question is out of context and not related to your job as a customer service agent, let the customer know that you can not help and they should look elsewhere for answers.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\ni do not know how to inform of a trouble withpayments<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n'

In [25]:
def preprocess_eval_data_and_pred(prompt):
    inputs = tokenizer(prompt, return_tensors='pt', truncation=True).to("cuda")
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"
    outputs = lora_model.generate(**inputs, max_new_tokens=150, num_return_sequences=1)
    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return text

In [45]:
eval_dataset[0]

{'flags': 'BELMQZ',
 'instruction': 'i do not know how to inform of a trouble withpayments',
 'category': 'PAYMENT',
 'intent': 'payment_issue',
 'response': "Don't worry, I'm here to guide you through the process of reporting any trouble you're experiencing with your payments. To inform us about the issue, you can reach out to our customer support team via phone or chat. They are available {{Customer Support Hours}} at {{Customer Support Phone Number}} or through the Live Chat on our website at {{Website URL}}. Our dedicated team will assist you step-by-step in notifying us about the trouble, ensuring that it gets resolved as quickly as possible. We appreciate you bringing this to our attention, and we want to assure you that we're committed to providing you with the best assistance for your payment concerns."}

In [48]:
text = preprocess_eval_data_and_pred(eval_prompt_1)

In [49]:
print(text.split("assistant")[1])



I'm here to help. Don't worry, I'm here to assist you. Informing us about a trouble with payments can be a bit tricky, but it's a common issue. Let me walk you through the steps.

To start, could you please tell me the following information so I can better understand your concern?

1. What payment method were you using (e.g., credit card, PayPal, etc.)?
2. When did you first notice the issue?
3. What happened after that (e.g., did you try to pay online or by phone, etc.)?
4. Have you received any notifications or emails from us regarding the issue?

Once I have this information, I can guide you through the process of resolving the issue


In [20]:
lora_model.to("cuda")

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128258, 2048)
        (layers): ModuleList(
          (0-15): 16 x LlamaDecoderLayer(
            (self_attn): LlamaSdpaAttention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=2048, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora.Li

In [19]:
response = preprocess_eval_data_and_pred(prompt)

  attn_output = torch.nn.functional.scaled_dot_product_attention(


In [20]:
print(response.split("assistant")[1])


I'll take care of it! I'm here to assist you with checking the payment modalities we accept. We offer a variety of options to ensure convenience and flexibility for our customers. You can choose from credit/debit cards, PayPal, bank transfer, Apple Pay, Google Pay, and Visa. If you have any specific questions or need further assistance with any of these options, please let me know. I'm here to help!


In [52]:
message_1 = [{"role": "system", "content": instruction},
            {"role": "user", "content": "what is your name and what do you do"}]

In [53]:
prompt_1 = tokenizer.apply_chat_template(message_1, tokenize=False, add_generation_prompt=True)

In [54]:
eval_prompt_2 = preprocess_eval_data_and_pred(prompt_1)

In [55]:
print(eval_prompt_2.split("assistant")[1])


Hello! My name is John, and I'm a customer service agent. I'm here to help you with any questions or concerns you may have. Please feel free to ask me anything, and I'll do my best to assist you.


In [35]:
message_2 = [{"role": "system", "content": instruction},
             {"role": "user", "content": "what do you know about wind and air"}]

In [36]:
prompt_2 = tokenizer.apply_chat_template(
    message_2, tokenize=False, add_generation_prompt=True)

In [37]:
response_2 = preprocess_eval_data_and_pred(prompt_2)

In [38]:
print(response_2.split("assistant")[1])


Hi there! As a customer service agent, I'm happy to help you with any questions or concerns you may have about wind and air. Wind and air are fascinating topics, and I'd be delighted to provide you with some information.

Wind and air are essential components of our atmosphere, and understanding their behavior is crucial for various fields, including meteorology, aviation, and environmental science. Here are some key points about wind and air:

1. **Wind direction and speed**: Wind direction and speed are measured in knots (kt) and miles per hour (mph). Wind direction is measured from 0 to 360 degrees, while wind speed is measured from 0 to 600 mph. Wind direction is typically measured from north to south, and wind


from the response above regarding a non customer support response, we can deduce that the datasets does not contain outliers and the response the model should give in that case wasnt specified even if we added such in the prompt `I can try a few shot inference prompt and see the effect on the response`

### Merging and Exporting Fine-tuned Model

In [21]:
new_model_url = "llama-3.2-1b-it-Ecommerce-customer_support"

In [22]:
lora_model.merge_and_unload()

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128258, 2048)
    (layers): ModuleList(
      (0-15): 16 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=512, bias=False)
          (v_proj): Linear(in_features=2048, out_features=512, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (up_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm):

In [23]:
lora_model.save_pretrained(new_model_url)



In [24]:
tokenizer.save_pretrained(new_model_url)

('llama-3.2-1b-it-Ecommerce-customer_support\\tokenizer_config.json',
 'llama-3.2-1b-it-Ecommerce-customer_support\\special_tokens_map.json',
 'llama-3.2-1b-it-Ecommerce-customer_support\\tokenizer.json')

### Testing our Merged Model

In [26]:
message_3 = [{"role": "system", "content": instruction},
             {"role": "user", "content": "i want to cancel my order, it is taking too long"}]
prompt_3 = tokenizer.apply_chat_template(
    message_3, tokenize=False, add_generation_prompt=True)

In [27]:
response_3 = preprocess_eval_data_and_pred(prompt_3)

In [28]:
print(response_3.split("assistant")[1])


I apologize for the inconvenience you're facing with your order. I understand that you would like to cancel it. I'm here to assist you in making the necessary changes. To proceed with the cancellation, could you please provide me with the order number? Once I have that information, I will guide you through the cancellation process step by step.


In [31]:
message_4 = [{"role": "system", "content": instruction},
             {"role": "user", "content": "i do not know how to inform of a trouble withpayments"}]
prompt_4 = tokenizer.apply_chat_template(
    message_4, tokenize=False, add_generation_prompt=True)

In [33]:
response_4 = preprocess_eval_data_and_pred(prompt_4)

In [34]:
print(response_4.split("assistant")[1])


I apologize for any inconvenience you may be facing with your payments. To inform us of the trouble you're experiencing, you can reach out to our dedicated customer support team through our website or by calling our toll-free number. They will be more than happy to assist you in resolving the issue and ensuring a smooth payment process for you. We appreciate your patience and cooperation.
