In [1]:
!pip install -U trl peft
!pip install -U bitsandbytes
import torch
import wandb
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig
from trl import SFTConfig, SFTTrainer
from datasets import load_dataset
from transformers import TrainingArguments
original_dataset = load_dataset("bitext/Bitext-customer-support-llm-chatbot-training-dataset", split="train")
def create_prompt(sample):
    return f"""<|user|>
{sample['instruction']}<|end|>
<|assistant|>
Intent: {sample['intent']}
Category: {sample['category']}
Response: {sample['response']}<|end|>"""
model_id = 'microsoft/Phi-3-mini-4k-instruct'
device = 'cuda'
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)
lora_config = LoraConfig(
    r=16,  
    lora_alpha=32, 
    target_modules="all-linear", 
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=False 
)
model.config.use_cache = False
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token 
tokenizer.padding_side = "right"
training_arguments = SFTConfig(  
        output_dir="./phi3-tuned-chatbot",
        dataset_text_field="text",
        max_length=512,
        per_device_train_batch_size=1, 
        gradient_accumulation_steps=4, 
        learning_rate=2e-4,
        max_steps=1000,
        logging_steps=25,
    save_steps=100,
    fp16=True, # Use mixed precision
    optim="paged_adamw_8bit",
    
)
trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=original_dataset,
    peft_config=lora_config,
    processing_class = tokenizer,
    formatting_func=create_prompt,
)
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
my_secret = user_secrets.get_secret("wandb_key") 
wandb.login(key=my_secret)
print("training on.........")
trainer.train()
trainer.save_model("./phi3-tuned-chatbot/final_checkpoint")

Collecting trl
  Downloading trl-0.23.0-py3-none-any.whl.metadata (11 kB)
Collecting peft
  Downloading peft-0.17.1-py3-none-any.whl.metadata (14 kB)
Collecting transformers>=4.56.1 (from trl)
  Downloading transformers-4.56.2-py3-none-any.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.1/40.1 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets>=3.0.0->trl)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_cupti_cu12-12.

2025-09-21 12:23:18.228157: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758457398.423766      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758457398.481898      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


README.md: 0.00B [00:00, ?B/s]

Bitext_Sample_Customer_Support_Training_(…):   0%|          | 0.00/19.2M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/26872 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/599 [00:00<?, ?B/s]

Applying formatting function to train dataset:   0%|          | 0/26872 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/26872 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/26872 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/26872 [00:00<?, ? examples/s]

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mmihirmoolchamdani[0m ([33mmihirmoolchamdani-dhirubhai-ambani-institute-of-informat[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


training on.........


  return fn(*args, **kwargs)


Step,Training Loss
25,1.1822
50,0.9011
75,0.7927
100,0.7419
125,0.763
150,0.6865
175,0.6937
200,0.6632
225,0.6864
250,0.6781


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


In [10]:
from peft import PeftModel
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=False
)
tuned_model = PeftModel.from_pretrained(base_model, "./phi3-tuned-chatbot/final_checkpoint")
pipl = transformers.pipeline(
    "text-generation",
    model=tuned_model,
    tokenizer=tokenizer,
    device_map="auto"
)
user_query = "My phone is not working"
prompt = f"<|user|>\n{user_query}<|end|>\n<|assistant|>"
sequences = pipl(
    prompt,
    max_new_tokens=256,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
)
print(sequences[0]['generated_text'])

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


<|user|>
My phone is not working<|end|>
<|assistant|> Intent: troubleshoot_device
Category: DEVICE
Response: I'm sorry to hear that your phone is not working. I understand how frustrating it can be when technology doesn't behave as expected. Let's work together to identify the issue and find a solution. Here are a few steps you can try:

1. Check your charger and cable: Make sure that your charger and cable are properly connected and in good condition. A loose or damaged cable could cause your phone to malfunction.

2. Restart your phone: Sometimes, a simple restart can resolve the issue. Press and hold the power button on your phone, and then select the "Restart" option.

3. Update your phone's software: Check for any available software updates for your phone. Outdated software can sometimes cause compatibility issues that affect the phone's functionality.

4. Clear your phone's cache: Clearing the cache can help resolve performance issues and restore normal functioning. Go to your ph

In [None]:
user_query = "My dog is lost"
prompt = f"<|user|>\n{user_query}<|end|>\n<|assistant|>"
sequences = pipl(
    prompt,
    max_new_tokens=256,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
)
print(sequences[0]['generated_text'])