<a href="https://colab.research.google.com/github/agdev/Routing/blob/main/Fine_tuning_Classification_for_Routing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Fine Tuning Model for comparison with Semantic Router**

# **Installing dependencies**

In [1]:
!pip install datasets --quiet
!pip install transformers wandb --quiet
!pip install transformers[torch] --quiet
!pip install accelerate -U --quiet
!pip install evaluate --quiet
!pip install torch --quiet
# !pip install sentencepiece --quiet
# !pip install rouge_score --quiet
# !pip install rouge --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.4/9.4 MB[0m [31m64.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m309.1/309.1 kB[0m [31m16.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25h

# **Importing Libraries**

In [2]:
import pandas as pd
import numpy as np
from pprint import pprint
from huggingface_hub import login
from google.colab import userdata
import evaluate
import torch
import wandb

In [4]:
hf_api_key=userdata.get('HuggingFace')
login(token = hf_api_key, add_to_git_credential=True)
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mitrewardsolutions[0m ([33mitrewardsolutions-top[0m). Use [1m`wandb login --relogin`[0m to force relogin


Token is valid (permission: write).
Your token has been saved in your configured git credential helpers (store).
Your token has been saved to /root/.cache/huggingface/token
Login successful


True

# **Dataset**

In [5]:
from datasets import load_dataset

dataset_name = "bitext/Bitext-customer-support-llm-chatbot-training-dataset"
# Load the dataset
base_ds = load_dataset(dataset_name, split="train")
# set columns
pprint(base_ds.features)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/11.9k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/19.2M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/26872 [00:00<?, ? examples/s]

{'category': Value(dtype='string', id=None),
 'flags': Value(dtype='string', id=None),
 'instruction': Value(dtype='string', id=None),
 'intent': Value(dtype='string', id=None),
 'response': Value(dtype='string', id=None)}


In [6]:
pprint(base_ds[:10])

{'category': ['ORDER',
              'ORDER',
              'ORDER',
              'ORDER',
              'ORDER',
              'ORDER',
              'ORDER',
              'ORDER',
              'ORDER',
              'ORDER'],
 'flags': ['B', 'BQZ', 'BLQZ', 'BL', 'BCELN', 'BI', 'BCLN', 'BL', 'BL', 'BLQ'],
 'instruction': ['question about cancelling order {{Order Number}}',
                 'i have a question about cancelling oorder {{Order Number}}',
                 'i need help cancelling puchase {{Order Number}}',
                 'I need to cancel purchase {{Order Number}}',
                 'I cannot afford this order, cancel purchase {{Order Number}}',
                 'can you help me cancel order {{Order Number}}?',
                 'I can no longer afford order {{Order Number}}, cancel it',
                 'I am trying to cancel purchase {{Order Number}}',
                 'I have got to cancel purchase {{Order Number}}',
                 'i need help canceling purchase {

# **Testing model before fine tuning**

In [7]:
base_df = base_ds.to_pandas()
base_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26872 entries, 0 to 26871
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   flags        26872 non-null  object
 1   instruction  26872 non-null  object
 2   category     26872 non-null  object
 3   intent       26872 non-null  object
 4   response     26872 non-null  object
dtypes: object(5)
memory usage: 1.0+ MB


In [8]:
base_df['category'].value_counts() # -> Pretty heavily concentrated on ACCOUNT, ORDER and REFUND categories.

Unnamed: 0_level_0,count
category,Unnamed: 1_level_1
ACCOUNT,5986
ORDER,3988
REFUND,2992
CONTACT,1999
INVOICE,1999
PAYMENT,1998
FEEDBACK,1997
DELIVERY,1994
SHIPPING,1970
SUBSCRIPTION,999


# **Converting labels to numeric**

In [9]:
numeric_labels, unique_labels = pd.factorize(base_df['category'].unique())
label_mapping = {label: int(numeric_label) for label, numeric_label in zip(unique_labels, numeric_labels)}
id_to_label = {str(numeric_label): label for label, numeric_label in label_mapping.items()}

In [10]:
label_mapping

{'ORDER': 0,
 'SHIPPING': 1,
 'CANCEL': 2,
 'INVOICE': 3,
 'PAYMENT': 4,
 'REFUND': 5,
 'FEEDBACK': 6,
 'CONTACT': 7,
 'ACCOUNT': 8,
 'DELIVERY': 9,
 'SUBSCRIPTION': 10}

In [11]:
id_to_label

{'0': 'ORDER',
 '1': 'SHIPPING',
 '2': 'CANCEL',
 '3': 'INVOICE',
 '4': 'PAYMENT',
 '5': 'REFUND',
 '6': 'FEEDBACK',
 '7': 'CONTACT',
 '8': 'ACCOUNT',
 '9': 'DELIVERY',
 '10': 'SUBSCRIPTION'}

# **Loading Model**

In [12]:
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments

model_name: str = "vineetsharma/customer-support-intent-albert"
# Load the model
model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                           num_labels=len(id_to_label),
                                                           ignore_mismatched_sizes=True)
model.config.id2label = id_to_label
model.config.label2id = label_mapping

config.json:   0%|          | 0.00/2.44k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/46.8M [00:00<?, ?B/s]

Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at vineetsharma/customer-support-intent-albert and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([27]) in the checkpoint and torch.Size([11]) in the model instantiated
- classifier.weight: found shape torch.Size([27, 768]) in the checkpoint and torch.Size([11, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
import torch

# Set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move the model to the device
model.to(device)

def classify_text(text):
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)

    # Move inputs to the same device as the model
    inputs = {key: value.to(device) for key, value in inputs.items()}

    # Get model predictions
    outputs = model(**inputs)
    logits = outputs.logits
    probabilities = torch.nn.functional.softmax(logits, dim=-1)

    # Get the predicted class
    predicted_class = torch.argmax(probabilities, dim=-1).item()

    # Return the class and confidence
    return predicted_class, probabilities[0][predicted_class].item()

# **Splitting dataset**

In [14]:
# Split the dataset into training and validation sets
train_test_split = base_ds.train_test_split(test_size=0.2)  # 20% for validation
train_dataset = train_test_split['train']
validation_dataset = train_test_split['test']

# **Loading Tokenizer**

In [15]:
from transformers import AutoTokenizer
# Load the tokenizer for ALBERT
tokenizer = AutoTokenizer.from_pretrained(model_name)

tokenizer_config.json:   0%|          | 0.00/495 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.27M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/286 [00:00<?, ?B/s]

# **Testing Model before training**

In [16]:
# prompt: select 100 items from train_dataset, pass value of category column to classify_text
from sklearn.metrics import accuracy_score, f1_score

# Select 100 items from train_dataset
subset = train_dataset.select(range(100))
predictions = []
true_labels = []
# Classify and print results
for item in subset:
  predicted_class, confidence = classify_text(item['instruction'])
  predictions.append(predicted_class)
  true_labels.append(label_mapping[item['category']])
  if predicted_class in id_to_label:
      print(f"Text: {item['instruction']}, Predicted Class: {predicted_class} ({id_to_label[predicted_class]}), Confidence: {confidence}")
  else:
      print(f"Text: {item['instruction']}, Predicted Class: {predicted_class}, Confidence: {confidence} (No label mapping available)")

accuracy = accuracy_score(true_labels, predictions)
f1 = f1_score(true_labels, predictions, average='weighted')

print(f"Accuracy: {accuracy}")
print(f"F1 Score: {f1}")

Text: checking order {{Order Number}} status, Predicted Class: 2, Confidence: 0.15091562271118164 (No label mapping available)
Text: how to switch a product of order {{Order Number}}?, Predicted Class: 6, Confidence: 0.17357105016708374 (No label mapping available)
Text: I do not know what I have to do to cancel the premium acount, Predicted Class: 6, Confidence: 0.16830183565616608 (No label mapping available)
Text: what do I have to do to acquire some products?, Predicted Class: 0, Confidence: 0.12465822696685791 (No label mapping available)
Text: is it possible to report signup errors?, Predicted Class: 9, Confidence: 0.19319039583206177 (No label mapping available)
Text: I want assistance checking the payment modalities, Predicted Class: 3, Confidence: 0.17626045644283295 (No label mapping available)
Text: I have to lodge a customer reclamation against your business, Predicted Class: 10, Confidence: 0.15197041630744934 (No label mapping available)
Text: can you help me leave my fee

# **DataCollator**

In [17]:
from transformers import DataCollatorWithPadding
# Data collator for dynamic padding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# **Tokenize the dataset**

In [18]:
# Tokenize the dataset

def preprocess_function(examples):
  inputs = tokenizer(examples['instruction'], truncation=True, padding=True)
  labels = [label_mapping[label] for label in examples['category']]
  # pprint(labels)
  inputs['labels'] = labels

  return inputs

train_procs_ds = train_dataset.map(preprocess_function, batched=True)
val_procs__ds = validation_dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/21497 [00:00<?, ? examples/s]

Map:   0%|          | 0/5375 [00:00<?, ? examples/s]

In [19]:
train_procs_ds.to_pandas().head()

Unnamed: 0,flags,instruction,category,intent,response,input_ids,token_type_ids,attention_mask,labels
0,BK,checking order {{Order Number}} status,ORDER,track_order,Glad you contacted us! I'm clearly cognizant t...,"[2, 9886, 389, 13, 1, 7861, 234, 1, 1782, 3, 0...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, ...",0
1,BIL,how to switch a product of order {{Order Numbe...,ORDER,change_order,We understand that you're seeking guidance on ...,"[2, 184, 20, 5521, 21, 2374, 16, 389, 13, 1, 7...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",0
2,BELZ,I do not know what I have to do to cancel the ...,ACCOUNT,delete_account,Thank you for sharing your question about canc...,"[2, 31, 107, 52, 143, 98, 31, 57, 20, 107, 20,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",8
3,BILM,what do I have to do to acquire some products?,ORDER,place_order,"First of all, thank you for expressing your in...","[2, 98, 107, 31, 57, 20, 107, 20, 8942, 109, 1...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, ...",0
4,BILM,is it possible to report signup errors?,ACCOUNT,registration_problems,I'll get right on it! We understand that signu...,"[2, 25, 32, 938, 20, 1330, 1676, 576, 11908, 6...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, ...",8


In [20]:
metric = evaluate.load("accuracy")

def compute_accuracy(eval_pred):
    logits, labels = eval_pred

    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

In [22]:
# Define training arguments
training_args = TrainingArguments(
    report_to="wandb",
    run_name="customer-support-categ_classification-albert_v3",
    output_dir="./results",
    evaluation_strategy="steps",
    save_strategy="steps",
    logging_strategy="steps",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    max_steps=30000,
    logging_steps=100, #really important, it is when everything is reported to WandB
    eval_steps=5000,
    save_steps=10000,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_procs_ds,
    eval_dataset=val_procs__ds,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_accuracy  # Custom metrics function
)

# Train the model
trainer.train()

eval_results = trainer.evaluate()

print(eval_results)

max_steps is given, it will override any value given in num_train_epochs


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113109066666817, max=1.0…

Step,Training Loss,Validation Loss,Accuracy
5000,0.0071,0.01686,0.997767
10000,0.0085,0.011032,0.998326
15000,0.0083,0.011416,0.99907
20000,0.0,0.007748,0.999442
25000,0.0,0.008193,0.999442
30000,0.0,0.008623,0.999442


{'eval_loss': 0.007748061791062355, 'eval_accuracy': 0.9994418604651163, 'eval_runtime': 10.5374, 'eval_samples_per_second': 510.087, 'eval_steps_per_second': 31.886, 'epoch': 22.321428571428573}


## **Evaluate the model**

In [23]:
# Evaluate the model
trainer.evaluate()

{'eval_loss': 0.007748061791062355,
 'eval_accuracy': 0.9994418604651163,
 'eval_runtime': 11.0307,
 'eval_samples_per_second': 487.278,
 'eval_steps_per_second': 30.461,
 'epoch': 22.321428571428573}

In [24]:

fine_tuned_model_name: str ="customer-support-categ_classification-albert_v3"
model.save_pretrained(fine_tuned_model_name, push_to_hub=True, private=False)

model.safetensors:   0%|          | 0.00/46.8M [00:00<?, ?B/s]

In [25]:
# Save the tokenizer to a directory
# tokenizer.save_pretrained(fine_tuned_model_name)
# Save the tokenizer to hub
tokenizer.push_to_hub(f"AIEnthusiast369/{fine_tuned_model_name}")

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/AIEnthusiast369/customer-support-categ_classification-albert_v3/commit/a08cf23d97e144e12f3abf5b74638e02e91b1ff1', commit_message='Upload tokenizer', commit_description='', oid='a08cf23d97e144e12f3abf5b74638e02e91b1ff1', pr_url=None, pr_revision=None, pr_num=None)

# **Testing fine tuned model**

In [26]:
def test_model(samples):
  for text in samples:
    predicted_class, confidence = classify_text(text)
    print(f"Text: {text}\nPredicted Class: {predicted_class} ({id_to_label[str(predicted_class)]}), Confidence: {confidence:.4f}\n")
    # if predicted_class in id_to_label:
    #   print(f"Text: {text}\nPredicted Class: {predicted_class} ({id_to_label[predicted_class]}), Confidence: {confidence:.4f}\n")
    # else:
    #   print(f"Text: {text}\nPredicted Class: {predicted_class}, Confidence: {confidence:.4f}\n")

In [27]:
# Test with some example texts
text_samples = [
    "I reqeust immediate refund",
    "I was billed incorrectly",
    "Where do I leave a tip",
    "Not worth the money, would not buy again. I want to cancel order.",
    "I would like to speak with the manager"
]

test_model(text_samples)

Text: I reqeust immediate refund
Predicted Class: 5 (REFUND), Confidence: 1.0000

Text: I was billed incorrectly
Predicted Class: 4 (PAYMENT), Confidence: 0.9996

Text: Where do I leave a tip
Predicted Class: 6 (FEEDBACK), Confidence: 1.0000

Text: Not worth the money, would not buy again. I want to cancel order.
Predicted Class: 0 (ORDER), Confidence: 1.0000

Text: I would like to speak with the manager
Predicted Class: 7 (CONTACT), Confidence: 1.0000

