# Problem Statement
Fine tune the "microsoft/phi-2" model to generate customized response to the customer queries.

# Install Required Packages

In [None]:
# !pip install torch transformers datasets peft bitsandbytes huggingface_hub python-dotenv

# Import Package and Load ENV variables

In [None]:
import os
import torch
import random
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
from dotenv import load_dotenv
from huggingface_hub import login

In [None]:
import os

# Detect if running in Google Colab
def in_colab():
    try:
        import google.colab
        return True
    except ImportError:
        return False

if in_colab():
    from google.colab import userdata
    os.environ["HF_TOKEN"] = userdata.get("HF_TOKEN_WRITE")
else:
    # Running locally → load from .env
    from dotenv import load_dotenv
    load_dotenv()  # loads variables from .env into environment
    os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")


In [None]:
import sys
print(sys.executable)

In [None]:
# -----------------------------
# Device & dtype setup
# -----------------------------
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if device == "cuda" else torch.float32

print(f"Using device: {device} with dtype: {torch_dtype}")

# Prepare Dataset for Training

In [None]:
# -----------------------------
# Synthetic dataset
# -----------------------------
instructions = [
    "Customer asks about refund window",
    "Customer wants to cancel an order",
    "Order arrived late",
    "Wrong item received",
    "Product not working",
    "Shipping cost inquiry",
    "Change delivery address",
    "Request for invoice",
    "Ask about warranty",
    "Technical support request"
]

responses = [
    "Our refund window is 30 days from delivery.",
    "You can cancel your order from your account dashboard within 24 hours.",
    "Sorry for the delay. A delivery credit has been applied.",
    "We’ll ship the correct item and provide a return label.",
    "Please try resetting the product. Contact support if the issue persists.",
    "Shipping cost depends on your location and chosen delivery speed.",
    "You can update your delivery address before the order ships.",
    "An invoice will be emailed to you after purchase.",
    "Your product comes with a 12-month warranty.",
    "Our tech support team will contact you shortly."
]

train_data = [
    {"instruction": f"{instructions[random.randint(0,len(instructions)-1)]} #{i+1}",
     "response": responses[random.randint(0,len(responses)-1)]}
    for i in range(300)
]

dataset = Dataset.from_list(train_data)

In [None]:
import pandas as pd
from datasets import Dataset

# -----------------------------
# Load dataset from CSV
# -----------------------------
# csv_path = "/content/drive/MyDrive/buildllm/_data/customer_response_data.csv"  # path to your CSV file
csv_path = "https://huggingface.co/datasets/mishrabp/customer-support-responses/resolve/main/train.csv"

# Read CSV with pandas
df = pd.read_csv(csv_path)

df.head(5)

# Ensure columns are named 'instruction' and 'response'
assert "instruction" in df.columns and "response" in df.columns, "CSV must have 'instruction' and 'response' columns"

# Convert to Hugging Face Dataset
dataset = Dataset.from_pandas(df)

print("Number of training examples:", len(dataset))
print("First example:", dataset[0])
print(df.head(5))

# Load the Base Model

In [8]:
# -----------------------------
# Model + Tokenizer
# -----------------------------
model_name = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

# QLoRA only if GPU available
bnb_config = None
if device == "cuda":
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4"
    )

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto" if device == "cuda" else None,
    torch_dtype=torch_dtype,
    quantization_config=bnb_config
)


Cancellation requested; stopping current tasks.


KeyboardInterrupt: 

# Preprocessing Data

In [None]:
# -----------------------------
# Preprocessing
# -----------------------------
def preprocess(example):
    prompt = f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['response']}"
    enc = tokenizer(prompt, padding="max_length", truncation=True, max_length=256)
    enc["labels"] = enc["input_ids"].copy()
    return enc

tokenized_dataset = dataset.map(preprocess)
tokenized_dataset.set_format(type="torch", columns=["input_ids","attention_mask","labels"])

# Configure LoRA and Quantize

In [None]:
# -----------------------------
# LoRA config
# -----------------------------
lora_cfg = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj","v_proj"],  # adjust for phi-2 if needed
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(base_model, lora_cfg)

# Train Model

In [None]:
# -----------------------------
# Training arguments
# -----------------------------
training_args = TrainingArguments(
    output_dir="./outputs",
    learning_rate=2e-4,
    per_device_train_batch_size=1 if device=="cpu" else 2,
    num_train_epochs=1,
    logging_steps=5,
    save_strategy="no",
    report_to="none",
    fp16=True if device=="cuda" else False
)

trainer = Trainer(model=model, args=training_args, train_dataset=tokenized_dataset)
trainer.train()

# Upload Model to Hugging Face

In [None]:
# -----------------------------
# Save & push adapters to Hugging Face
# -----------------------------
model.save_pretrained("./phi2-qlora")
tokenizer.save_pretrained("./phi2-qlora")

model.push_to_hub("mishrabp/phi2-qlora-finetuned", use_auth_token=os.environ["HF_TOKEN"])
tokenizer.push_to_hub("mishrabp/phi2-qlora-finetuned", use_auth_token=os.environ["HF_TOKEN"])

# Upload README.md to Hugging Face

In [None]:
import os
from google.colab import userdata
os.environ["HF_TOKEN"] = userdata.get("HF_TOKEN_WRITE")

readme_content = """
---
license: mit
tags:
  - causal-lm
  - instruction-following
  - loRA
  - QLoRA
  - quantized
language: en
library_name: transformers
base_model: microsoft/phi-2
---

# Phi-2 QLoRA Fine-Tuned Model


**Model:** `mishrabp/phi2-qlora-finetuned`

**Base Model:** [`microsoft/phi-2`](https://huggingface.co/microsoft/phi-2)

**Fine-Tuning Method:** QLoRA (4-bit quantized LoRA)

**Task:** Instruction-following / Customer Support Responses

---

## Model Description

This repository contains a **Phi-2 language model fine-tuned using QLoRA** on a synthetic dataset of customer support instructions and responses. The fine-tuning uses **4-bit quantized LoRA adapters** for memory-efficient training and can run on GPU or CPU (slower on CPU).

The model is designed for **instruction-following tasks** like customer support, FAQs, or other dialog generation tasks.

---

## Training Data

The fine-tuning dataset is synthetic, consisting of 3000 instruction-response pairs:

**Example:**

```text
Instruction: "Customer asks about refund window #1"
Response: "Our refund window is 30 days from delivery."
```

Here is the dataset that was used for fine-tunning:
https://huggingface.co/datasets/mishrabp/customer-support-responses/resolve/main/train.csv

You can replace the dataset with your own CSV/JSON file to train on real-world data.

---

## Intended Use

* Generate responses to instructions in customer support scenarios.
* Small-scale instruction-following experiments.
* Educational or research purposes.

---

## How to Use

### Load the Fine-Tuned Model

```python
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# -----------------------------
# Load fine-tuned model from HF
# -----------------------------
model_name = "mishrabp/phi2-qlora-finetuned"
tokenizer = AutoTokenizer.from_pretrained(model_name)
base_model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2")
model = PeftModel.from_pretrained(base_model, model_name)

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# -----------------------------
# Sample evaluation dataset
# -----------------------------
eval_data = [
    {"instruction": "Customer asks about refund window", "reference": "Our refund window is 30 days from delivery."},
    {"instruction": "Order arrived late", "reference": "Sorry for the delay. A delivery credit has been applied."},
    {"instruction": "Wrong item received", "reference": "We’ll ship the correct item and provide a return label."},
]

# -----------------------------
# Evaluation loop
# -----------------------------
for i, example in enumerate(eval_data, 1):
    prompt = f"### Instruction:\n{example['instruction']}\n\n### Response:"
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(**inputs, max_new_tokens=50)
    generated = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    print(f"Example {i}")
    print("Instruction:", example["instruction"])
    print("Generated Response:", generated.split("### Response:")[-1].strip())
    print("Reference Response:", example["reference"])
    print("-" * 50)

# -----------------------------
# Optional: compute simple token-level accuracy or BLEU
# -----------------------------
from nltk.translate.bleu_score import sentence_bleu

bleu_scores = []
for example in eval_data:
    prompt = f"### Instruction:\n{example['instruction']}\n\n### Response:"
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(**inputs, max_new_tokens=50)
    generated = tokenizer.decode(output_ids[0], skip_special_tokens=True).split("### Response:")[-1].strip()

    reference_tokens = example["reference"].split()
    generated_tokens = generated.split()
    bleu = sentence_bleu([reference_tokens], generated_tokens)
    bleu_scores.append(bleu)

print("Average BLEU score:", sum(bleu_scores)/len(bleu_scores))

```

---

## Training Script

The training script performs the following steps:

1. Loads the **Phi-2 base model**.
2. Creates a **synthetic dataset** of instruction-response pairs.
3. Tokenizes and formats the dataset for causal language modeling.
4. Applies a **LoRA adapter**.
5. Trains using **QLoRA** if GPU is available, otherwise full-precision LoRA on CPU.
6. Saves the adapter and tokenizer to `./phi2-qlora`.
7. Pushes the adapter and tokenizer to Hugging Face Hub.

### Requirements

```bash
pip install torch transformers peft datasets huggingface_hub python-dotenv
```

---

## Parameters

* `r=8`, `lora_alpha=16`, `lora_dropout=0.05`
* `target_modules=["q_proj","v_proj"]` (adjust for different base models)
* Learning rate: `2e-4`
* Batch si

"""

with open("README.md", "w", encoding="utf-8") as f:
    f.write(readme_content)

from huggingface_hub import HfApi, Repository

repo_id = "mishrabp/phi2-qlora-finetuned"

# Option 1: Using HfApi to upload README
api = HfApi()
api.upload_file(
    path_or_fileobj="README.md",
    path_in_repo="README.md",  # must be exactly README.md for HF Hub
    repo_id=repo_id,
    repo_type="model",
    token=os.environ["HF_TOKEN"]
)


# Evaluate the Fine-Tunned Model from Hugging Face

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# -----------------------------
# Load fine-tuned model from HF
# -----------------------------
model_name = "mishrabp/phi2-qlora-finetuned"
tokenizer = AutoTokenizer.from_pretrained(model_name)
base_model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2")
model = PeftModel.from_pretrained(base_model, model_name)

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# -----------------------------
# Sample evaluation dataset
# -----------------------------
eval_data = [
    {"instruction": "Customer asks about refund window", "reference": "Our refund window is 30 days from delivery."},
    {"instruction": "Order arrived late", "reference": "Sorry for the delay. A delivery credit has been applied."},
    {"instruction": "Wrong item received", "reference": "We’ll ship the correct item and provide a return label."},
]

# -----------------------------
# Evaluation loop
# -----------------------------
for i, example in enumerate(eval_data, 1):
    prompt = f"### Instruction:\n{example['instruction']}\n\n### Response:"
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(**inputs, max_new_tokens=50)
    generated = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    print(f"Example {i}")
    print("Instruction:", example["instruction"])
    print("Generated Response:", generated.split("### Response:")[-1].strip())
    print("Reference Response:", example["reference"])
    print("-" * 50)

# -----------------------------
# Optional: compute simple token-level accuracy or BLEU
# -----------------------------
from nltk.translate.bleu_score import sentence_bleu

bleu_scores = []
for example in eval_data:
    prompt = f"### Instruction:\n{example['instruction']}\n\n### Response:"
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(**inputs, max_new_tokens=50)
    generated = tokenizer.decode(output_ids[0], skip_special_tokens=True).split("### Response:")[-1].strip()

    reference_tokens = example["reference"].split()
    generated_tokens = generated.split()
    bleu = sentence_bleu([reference_tokens], generated_tokens)
    bleu_scores.append(bleu)

print("Average BLEU score:", sum(bleu_scores)/len(bleu_scores))
