# Fine tuning Falcon 7B model with Ecommerce FAQ dataset

PEFT : Parameter Efficienct Fine Tuning
* In short, PEFT approaches enable you to get performance comparable to full fine-tuning while only having a small number of trainable parameters.
* https://huggingface.co/blog/peft

In [None]:
!pip install -Uqqq pip --progress-bar off
# !pip install -qqq torch --progress-bar off
!pip install -qqq -U git+https://github.com/huggingface/transformers.git@e303a9cc --progress-bar off
!pip install -qqq -U git+https://github.com/huggingface/peft.git@42a184f --progress-bar off
# !pip install -qqq -U git+https://github.com/huggingface/accelerate.git@c9fbb71 --progress-bar off
!pip install -qqq datasets loralib accelerate einops ipywidgets --progress-bar off
# !pip install -q git+https://github.com/huggingface/transformers.git@main git+https://github.com/huggingface/peft.git
!pip install -Uqqq accelerate --progress-bar off
!pip install -Uqqq deepspeed transformers --progress-bar off
!pip install -qqq tensorboard --progress-bar off
!pip install -qqq optuna bert_score evaluate --progress-bar off
!pip install -Uq bitsandbytes --progress-bar off

In [None]:
!nvidia-smi

In [None]:
!pip show torch
!pip show accelerate

In [None]:
import sys
print(sys.version)

In [None]:
import json
import os 
from pprint import pprint

import bitsandbytes as bnb
import pandas as pd
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset, DatasetDict
from evaluate import load
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training,
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
)
from bert_score import score
import optuna
import warnings

# Filter specific warnings
warnings.filterwarnings('ignore', category=UserWarning, message="None of the inputs have requires_grad=True. Gradients will be None")
warnings.filterwarnings('ignore', message="`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...")
warnings.filterwarnings('ignore', message="You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.")

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

## Data

In [None]:
with open("data/Ecommerce_FAQ_Chatbot_dataset.json") as json_file:
    data = json.load(json_file)

In [None]:
# pprint(data["questions"][0], sort_dicts=False)
# pprint(data["questions"][1], sort_dicts=False)
# pprint(data["questions"][2], sort_dicts=False)
# pprint(data["questions"][3], sort_dicts=False)

In [None]:
with open("data/dataset.json", "w") as f:
    json.dump(data["questions"], f)

In [None]:
pd.DataFrame(data["questions"]).head()

## Load Falcon Model & Tokenizer

In [None]:
def get_model_tokenizer(model_name):

    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
    )

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        trust_remote_code=True,
        quantization_config=bnb_config,
    )

    tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side='left')
   
    # tokenizer.pad_token  = tokenizer.eos_token 
    
#     additional_tokens = "<|pad|>"
#     num_added_toks = tokenizer.add_tokens(additional_tokens)
#     print(f"Added {num_added_toks} tokens")

#     model.resize_token_embeddings(len(tokenizer))  # Resize the model vocabulary
#     tokenizer.pad_token = "<|pad|>"
    tokenizer.pad_token = tokenizer.eos_token
    
    config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CASUAL_LM",
    )
    model = get_peft_model(model, config)

    return model, tokenizer

model_name = "tiiuae/falcon-7b"
model, tokenizer = get_model_tokenizer(model_name)

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [None]:
# model_name = "tiiuae/falcon-7b"

# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_use_double_quant=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_compute_dtype=torch.bfloat16,
# )

# model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     device_map="auto",
#     trust_remote_code=True,
#     quantization_config=bnb_config,
# )

# tokenizer = AutoTokenizer.from_pretrained(model_name)
# tokenizer.pad_token = tokenizer.eos_token

In [None]:
def print_trainable_parameters(model):
    """
    print the number of trainable parameters in the model
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
            
    print(f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}")

In [None]:
# model.gradient_checkpointing_enable()
# model = prepare_model_for_kbit_training(model)

In [None]:
print(model)

## Inference Before Training

In [None]:
prompt = f"""
<human>: How can I create an account?/n
<assistant>:
""".strip()
print(prompt)

In [None]:
generation_config = model.generation_config
generation_config.max_new_tokens = 100
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

In [None]:
%%time
# DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DEVICE = "cuda:0"
# torch.cuda.set_device(DEVICE)

In [None]:
print(next(model.parameters()).device)  # This will show the device of the model

In [None]:
encoding = tokenizer(prompt, return_tensors="pt").to(DEVICE)

with torch.inference_mode():
    outputs = model.generate(
        input_ids = encoding.input_ids,
        attention_mask = encoding.attention_mask,
        generation_config = generation_config,
    )

In [None]:
# inference result before fine-tuning
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
eval_questions = [
    "What are the steps to create an account?",  # Original: 'How can I create an account?'
    "Which types of payment can I use?",  # Original: 'What payment methods do you accept?'
    "How do I monitor the status of my order?",  # Original: 'How can I track my order?'
    "Can you describe your policy on returns?",  # Original: 'What is your return policy?'
    "Is it possible to return an item that was bought during a final sale or clearance?",  # Original: 'Can I return a product if it was a clearance or final sale item?'
    "What happend if I return clearance item?"
]

for question in eval_questions:
    prompt = f"""
    User: {question}
    AI: 
    """.strip()
    encoding = tokenizer(prompt, return_tensors="pt").to(DEVICE)

    with torch.inference_mode():
        outputs = model.generate(
            input_ids = encoding.input_ids,
            attention_mask = encoding.attention_mask,
            generation_config = generation_config,
        )
    print(tokenizer.decode(outputs[0], skip_special_tokens=True))

## Build huggingface Dataset

In [None]:
data = load_dataset("json", data_files="data/dataset.json")
# data = data["questions"]
data

In [None]:
data["train"][0]

In [None]:
def generate_prompt(data_point):
    return f"""
    User: {data_point["question"]}
    AI: {data_point["answer"]}
    """.strip()

def generate_and_tokenize_prompt(data_point):
    full_prompt = generate_prompt(data_point)
    # tokenized_full_prompt = tokenizer(full_prompt, padding=True, truncation=True)
    tokenized_full_prompt = tokenizer(full_prompt, padding=True, truncation=True)
    # print(tokenized_full_prompt)
    return tokenized_full_prompt

In [None]:
train_data = data["train"].shuffle().map(generate_and_tokenize_prompt)

In [None]:
# Drop the 'question', 'answer', 'token_type_ids' columns
train_data = train_data.remove_columns(['question', 'answer', 'token_type_ids'])
train_data

In [None]:
len(train_data), type(train_data)

## HPO

In [None]:
output_dir = "experiments"

In [None]:
eval_questions = [
    "What are the steps to create an account?",  # Original: 'How can I create an account?'
    "Which types of payment can I use?",  # Original: 'What payment methods do you accept?'
    "How do I monitor the status of my order?",  # Original: 'How can I track my order?'
    "Can you describe your policy on returns?",  # Original: 'What is your return policy?'
    "Is it possible to return an item that was bought during a final sale or clearance?",  # Original: 'Can I return a product if it was a clearance or final sale item?'
    "What happend if I return clearance item?"
]

eval_answers = [
    "To create an account, click on the 'Sign Up' button on the top right corner of our website and follow the instructions to complete the registration process.",
    "We accept major credit cards, debit cards, and PayPal as payment methods for online orders.",
    "You can track your order by logging into your account and navigating to the 'Order History' section. There, you will find the tracking information for your shipment.",
    "Our return policy allows you to return products within 30 days of purchase for a full refund, provided they are in their original condition and packaging. Please refer to our Returns page for detailed instructions.",
    "Clearance or final sale items are typically non-returnable and non-refundable. Please review the product description or contact our customer support team for more information.",
    "Returning clearance items is generally not possible. Please check the product description or contact our customer support team for more information. Please note that clearance items are typically final sale and cannot be returned or exchanged. I hope this helps! If you have any additional questions, please feel free to contact our customer support team. We are happy to assist you." ]

In [None]:
def inference_data(prompt, model, tokenizer):
   
    print('@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@')
    print(f"Question for the inference:\n{prompt}")
    
    # encoding = tokenizer(prompt, return_tensors="pt").to('cuda')
    encoding = tokenizer(prompt, padding=True, truncation=True, return_tensors="pt").to('cuda')
        
    print(model.parameters().__next__().device)
    model.config.gradient_checkpointing = False
    model.config.use_cache = False
    
    with torch.inference_mode():
        outputs = model.generate(
            input_ids = encoding.input_ids.requires_grad_(False),
            attention_mask = encoding.attention_mask.requires_grad_(False),
            generation_config = generation_config,
        )
    # inference result before fine-tuning
    prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return prediction

def bertscore_metrics(eval_questions, eval_answers, model, tokenizer):
    predictions = []
    labels = []
    
    for i in range(len(eval_questions)):
        prediction = inference_data(eval_questions[i], model, tokenizer)
        label = eval_answers[i]
        print(f"----------------------------predictions----------------------------\n{prediction}")
        lines = prediction.split('\n')
        print(lines)
        print(f"----------------------------answers----------------------------\n{label}")

        
        predictions.append(prediction.strip().lower())
        labels.append(label.strip().lower())

    bertscore = load("bertscore")
    results = bertscore.compute(predictions=predictions, references=labels, model_type="distilbert-base-uncased")
    # print("results in bertscore_batch_metrics",results)
    precisions, recalls, f1_scores = results['precision'], results['recall'], results['f1']
    avg_precision, avg_recall, avg_f1_score =(sum(precisions) / len(precisions)),( sum(recalls) / len(recalls)), ( sum(f1_scores) / len(f1_scores))
    avg_results = {'precision':avg_precision, 'recall':avg_recall, 'f1':avg_f1_score}
  
    print("avg_results", avg_results)
    return avg_results



In [None]:
def objective(trial):
    # Define hyperparameters
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-3, log=True)
    num_train_epochs = trial.suggest_int("num_train_epochs", 1, 10)
    max_steps = trial.suggest_categorical("max_steps", list(range(80,241,20)))
    per_device_train_batch_size = trial.suggest_categorical("per_device_train_batch_size", [1, 2, 4])
    # warmup_steps = trial.suggest_categorical("warmup_steps", list(range(50,301,50)))
    # per_device_train_batch_size = 1
    
    print("######################################################################################################################")
    print(f"[{trial.number+1}/100] --- learning_rate:{learning_rate} | num_train_epochs:{num_train_epochs} | max_steps:{max_steps} | per_device_train_batch_size:{per_device_train_batch_size}")
    # Use hyperparameters in TrainingArguments
    training_args = TrainingArguments(
        output_dir=output_dir,
        learning_rate=learning_rate,
        per_device_train_batch_size=per_device_train_batch_size,
        num_train_epochs=num_train_epochs,
        warmup_ratio=0.1,
        max_steps = max_steps,
        # Other fixed parameters
        remove_unused_columns=False,
        fp16=False,
        save_total_limit=3,  
        logging_steps=10,
        optim="paged_adamw_8bit",
        lr_scheduler_type="cosine",
        # report_to="tensorboard"
    )
    
    
    model_name = "tiiuae/falcon-7b"
    new_model, new_tokenizer = get_model_tokenizer(model_name)

    model.config.gradient_checkpointing = False
    print(model.parameters().__next__().device)
    # model = prepare_model_for_kbit_training(model)

    # Define Trainer
    trainer = Trainer(
        model=new_model,
        args=training_args,
        train_dataset=train_data,
        data_collator=transformers.DataCollatorForLanguageModeling(new_tokenizer, mlm=False)
    )
    
    # Train the model
    model.config.use_cache = False  # Disable caching
    trainer.train()
    print("after training")
 
    avg_results = bertscore_metrics(eval_questions, eval_answers, new_model, new_tokenizer)
    print(avg_results)  
    
    return avg_results['f1']    

# Create a study to run hyperparameter optimization
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)


## Training with best parameter

In [None]:
# Get the best hyperparameters

best_trial = study.best_trial
print(f"Best Trial: score {best_trial.value}, params {best_trial.params}")

best_score = best_trial.value
if best_score < 0.8:
    
    print(f'Best score is {best_trial.value} < 0.8 !!!!! Start the loop')
    while best_trial.value < 0.8:
        # Create a study to run hyperparameter optimization
        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=50)
        best_trial = study.best_trial
        best_score = best_trial.value

print(f'Best score is {best_trial.value}!!!!!')

best_params = best_trial.params

model_name = "tiiuae/falcon-7b"
model, tokenizer = get_model_tokenizer(model_name)

model.config.gradient_checkpointing = False
print(model.parameters().__next__().device)

# Train model with best hyperparameters
training_args = TrainingArguments(
    output_dir=output_dir,
    learning_rate=best_params['learning_rate'],
    per_device_train_batch_size=best_params['per_device_train_batch_size'],
    num_train_epochs=best_params['num_train_epochs'],
    warmup_ratio=0.1,
    max_steps = best_params['max_steps'],
    # Other fixed parameters
    remove_unused_columns=False,
    fp16=False,
    save_total_limit=3,  
    logging_steps=1,
    optim="paged_adamw_8bit",
    lr_scheduler_type="cosine",
    # report_to="tensorboard"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

trainer.train()

# Save the model
peft_model_dir = './ecommerce-FAQ-chatbot-model/Optuna'
model.save_pretrained(peft_model_dir)
trainer.save_model(peft_model_dir)
tokenizer.save_pretrained(peft_model_dir)

### Load fine-tuned model and tokenizer

https://huggingface.co/blog/peft

In [None]:
# Load the model
config = PeftConfig.from_pretrained(peft_model_dir)

bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
    )

model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path, 
    return_dict=True,
    quantization_config=bnb_config,
    # device_map="auto",
    trust_remote_code=True,
)
model = PeftModel.from_pretrained(model, peft_model_dir)
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path,  trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

## Evaluation

### True dataset
1. {'question': 'How can I create an account?',
 'answer': "To create an account, click on the 'Sign Up' button on the top "
           'right corner of our website and follow the instructions to '
           'complete the registration process.'}
           
           
2. {'question': 'What payment methods do you accept?',
 'answer': 'We accept major credit cards, debit cards, and PayPal as payment '
           'methods for online orders.'}
           
           
3. {'question': 'How can I track my order?',
 'answer': 'You can track your order by logging into your account and '
           "navigating to the 'Order History' section. There, you will find "
           'the tracking information for your shipment.'}
           
                    
4. {'question':'What is your return policy?'
'answer':'Our return policy allows you to return products within 30 days of purchase for a full refund, provided they are in their original condition and packaging. Please refer to our Returns page for detailed instructions.'}


5. {'question':'Can I return a product if it was a clearance or final sale item?'
'answer':'Clearance or final sale items are typically non-returnable and non-refundable. Please review the product description or contact our customer support team for more information.'}

In [None]:
eval_questions = [
    "What are the steps to establish an account?",  # Original: 'How can I create an account?'
    "Which types of payment can I use?",  # Original: 'What payment methods do you accept?'
    "How do I monitor the status of my order?",  # Original: 'How can I track my order?'
    "Can you describe your policy on returns?",  # Original: 'What is your return policy?'
    "Is it possible to return an item that was bought during a final sale or clearance?",  # Original: 'Can I return a product if it was a clearance or final sale item?'
    "What happend if I return clearance item?"
]

eval_answers = [
    "To create an account, click on the 'Sign Up' button on the top right corner of our website and follow the instructions to complete the registration process.",
    "We accept major credit cards, debit cards, and PayPal as payment methods for online orders.",
    "You can track your order by logging into your account and navigating to the 'Order History' section. There, you will find the tracking information for your shipment.",
    "Our return policy allows you to return products within 30 days of purchase for a full refund, provided they are in their original condition and packaging. Please refer to our Returns page for detailed instructions.",
    "Clearance or final sale items are typically non-returnable and non-refundable. Please review the product description or contact our customer support team for more information.",
    "Clearance or final sale items are typically non-returnable and non-refundable. Please review the product description or contact our customer support team for more information."
]


In [None]:
avg_results = bertscore_metrics(eval_questions, eval_answers, model, tokenizer)
print(avg_results)

In [None]:
# clear the GPU cache
torch.cuda.empty_cache()

## Inference

In [None]:
generation_config = model.generation_config
generation_config.max_new_tokens = 100
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

In [None]:
tokenizer.eos_token_id

In [None]:
model.to('cuda')

In [None]:
def cut_at_last_comma(text):
    # Find the last occurrence of a comma in the text
    last_comma_index = text.rfind('.')

    # If a comma is found, cut the text up to the character after the last comma
    if last_comma_index != -1:
        return text[:last_comma_index + 1]
    
    # If no comma is found, return the original text
    return text

def post_processing_response(question, model, tokenizer):
    
    prompt = f"""
        User: {question}
        AI:
        """.strip()
    encoding = tokenizer(prompt, return_tensors="pt")
    # model.to(device)  
    with torch.inference_mode():
        outputs = model.generate(
            input_ids=encoding.input_ids.to('cuda'),
            attention_mask=encoding.attention_mask.to('cuda'),
            generation_config=generation_config,
        )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    assistant_start = "AI:"
    response_start = response.find(assistant_start)
    
    if response_start >= 0:
        # Find the second occurrence
        responce_end = response.find(assistant_start, response_start + len(assistant_start))
        # print(f"'{assistant_start}' found in response: {responce_end}")
    else:
        print(f"'{assistant_start}' not found in response")
        
    print(response)
    print("------------------------------------------------------------------------------------\n")
    
    processed_response = response[response_start+len(assistant_start):responce_end].strip()
    processed_response = cut_at_last_comma(processed_response)
    
    return processed_response

### True dataset
1. {'question': 'How can I create an account?',
 'answer': "To create an account, click on the 'Sign Up' button on the top "
           'right corner of our website and follow the instructions to '
           'complete the registration process.'}
           
           
2. {'question': 'What payment methods do you accept?',
 'answer': 'We accept major credit cards, debit cards, and PayPal as payment '
           'methods for online orders.'}
           
           
3. {'question': 'How can I track my order?',
 'answer': 'You can track your order by logging into your account and '
           "navigating to the 'Order History' section. There, you will find "
           'the tracking information for your shipment.'}
           
           
4. {'question': 'What is your return policy?',
 'answer': 'Our return policy allows you to return products within 30 days of '
           'purchase for a full refund, provided they are in their original '
           'condition and packaging. Please refer to our Returns page for '
           'detailed instructions.'}

In [None]:
prompt = "Can I return a product if it was a clearance or final sale item?"
print(post_processing_response(prompt, model, tokenizer))

In [None]:
prompt = "What happens when I return a clearance item?"
print(post_processing_response(prompt, model, tokenizer))

In [None]:
prompt = "How do I know when I'll receive my order?"
print(post_processing_response(prompt, model, tokenizer))

In [None]:
prompt = "Do you accept credit caards or paypal?"
print(post_processing_response(prompt, model, tokenizer))

In [None]:
prompt = "Tell me how to make a new account"
print(post_processing_response(prompt, model, tokenizer))

In [None]:
prompt = "I want to track my order, can you tell me how to do?"
print(post_processing_response(prompt, model, tokenizer))

In [None]:
prompt = "Tell me the return policy"
print(post_processing_response(prompt, model, tokenizer))

## Compare the zero-shot results and fine-tuned results

In [None]:
model_name = "tiiuae/falcon-7b"
ori_model, ori_tokenizer = get_model_tokenizer(model_name)

In [None]:
prompt = "Can I return a product if it was a clearance or final sale item?"

In [None]:
print(post_processing_response(prompt, ori_model, ori_tokenizer))
print(post_processing_response(prompt, model, tokenizer))

In [None]:
prompt = "What happens when I return a clearance item?"

In [None]:
print(post_processing_response(prompt, ori_model, ori_tokenizer))
print(post_processing_response(prompt, model, tokenizer))

In [None]:
model_name = "tiiuae/falcon-7b"
ori_model, ori_tokenizer = get_model_tokenizer(model_name)

for question in eval_questions:
    print("Zero-Shot results:\n")
    print(post_processing_response(question, ori_model, ori_tokenizer))
    print("###############################################################")
    print("Fine-tuned results:\n")
    print(post_processing_response(question, model, tokenizer))