In [1]:

!pip uninstall -y langchain langchain-classic langgraph langgraph-prebuilt
!pip install -q --force-reinstall "numpy<2.0"
!pip install -q \
  websockets==11.0.3 \
  torch==2.9.0 \
  torchvision==0.24.0+cu126 \
  torchaudio==2.9.0+cu126 \
  transformers>=4.38.0 \
  datasets \
  accelerate \
  sentence-transformers \
  faiss-gpu-cu12 \
  langchain-community \
  langchain-huggingface \
  langchain-core \
  gradio==3.50.2

  # ignoring the errors for this cell as they are unrelated to the libraries used in this project and do not affect execution.

Found existing installation: langchain 0.3.27
Uninstalling langchain-0.3.27:
  Successfully uninstalled langchain-0.3.27


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-community 0.3.31 requires langchain<2.0.0,>=0.3.27, which is not installed.

[notice] A new release of pip is available: 25.0.1 -> 26.0.1
[notice] To update, run: C:\Users\Akhil Juvvanapudi\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip
ERROR: Ignored the following yanked versions: 0.1.6, 0.1.7, 0.1.8, 0.1.9, 0.2.0, 0.2.1, 0.2.2, 0.2.2.post2, 0.2.2.post3
ERROR: Could not find a version that satisfies the requirement torchvision==0.24.0+cu126 (from versions: 0.17.0, 0.17.1, 0.17.2, 0.18.0, 0.18.1, 0.19.0, 0.19.1, 0.20.0, 0.20.1, 0.21.0, 0.22.0, 0.22.1, 0.23.0, 0.24.0, 0.24.1, 0.25.0)

[notice] A new release of pip is available: 25.0.1 -> 26.0.1
[notice] To update, run: C:\Users\Akhil Juvvanapudi\AppData\Local\Microsoft\Windows

In [2]:
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
    DataCollatorForSeq2Seq
)
from sentence_transformers import CrossEncoder
from langchain_core.documents import Document
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
import gradio as gr

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)


# If getting value error in this cell, restart the session.

  from .autonotebook import tqdm as notebook_tqdm


Device: cuda


In [3]:
print("************* Loading dataset *****************")
dataset = load_dataset(
    "bitext/Bitext-customer-support-llm-chatbot-training-dataset",
    split="train"
)

print("*********** Building knowledge base ***********")
kb_docs = [
    Document(
        page_content=r["response"],
        metadata={"intent": r["intent"]}
    )
    for r in dataset.select(range(3000))
]

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

vector_db = FAISS.from_documents(kb_docs, embeddings)

reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")


************* Loading dataset *****************




*********** Building knowledge base ***********


Loading weights: 100%|██████████| 103/103 [00:00<00:00, 568.21it/s, Materializing param=pooler.dense.weight]                             
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m
Loading weights: 100%|██████████| 105/105 [00:00<00:00, 655.35it/s, Materializing param=classifier.weight]                                    
[1mBertForSequenceClassification LOAD REPORT[0m from: cross-encoder/ms-marco-MiniLM-L-6-v2
Key                          | Status     |  | 
-----------------------------+------------+--+-
bert.embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


In [4]:
model_id = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id).to(device)


Loading weights: 100%|██████████| 282/282 [00:00<00:00, 590.95it/s, Materializing param=shared.weight]                                                       


In [5]:
def preprocess(examples):
    inputs = [f"support_agent: {x}" for x in examples["instruction"]]

    model_inputs = tokenizer(
        inputs,
        max_length=512,
        truncation=True,
        padding="max_length"
    )

    labels = tokenizer(
        text_target=examples["response"],
        max_length=128,
        truncation=True,
        padding="max_length"
    )

    labels_ids = [
        [(tok if tok != tokenizer.pad_token_id else -100) for tok in seq]
        for seq in labels["input_ids"]
    ]

    model_inputs["labels"] = labels_ids
    return model_inputs


In [6]:
tokenized_data = (
    dataset
    .select(range(3000))
    .map(preprocess, batched=True)
    .train_test_split(test_size=0.1)
)


In [7]:
training_args = Seq2SeqTrainingArguments(
    output_dir="./t5_support_final",
    eval_strategy="epoch",
    learning_rate=2e-4,
    per_device_train_batch_size=8,
    gradient_accumulation_steps=2,
    num_train_epochs=3,
    bf16=True,
    fp16=False,
    report_to="none"
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_data["train"],
    eval_dataset=tokenized_data["test"],
    tokenizer=tokenizer,
    data_collator=DataCollatorForSeq2Seq(tokenizer, model=model)
)

print("*********** Fine-tuning started **************")
trainer.train()


TypeError: Seq2SeqTrainer.__init__() got an unexpected keyword argument 'tokenizer'

In [None]:
def run_agent(message, history=None):
    try:
        # Normalize input
        query = message.get("text", "") if isinstance(message, dict) else str(message)
        if not query.strip():
            return "How can I help you today?"

        # Special business rule
        if "renew" in query.lower() and "subscription" in query.lower():
            return (
                "At the moment, I can help with newsletter subscriptions. "
                "For renewing paid subscriptions, please contact our associate."
            )

        # Escalation
        if any(k in query.lower() for k in ["sue", "legal", "lawyer", "court"]):
            return " Sorry for the inconvenience. Our Customer Support Executive will be contacting you shortly."

        # Intent map
        intent_map = {
            "cancel": ["cancel_order"],
            "track": ["track_order"],
            "where is my order": ["track_order"],
            "change order": ["change_order"],
            "modify order": ["change_order"],
            "replace": ["change_order"],
            "change address": ["change_shipping_address"],
            "refund": ["get_refund", "check_refund_policy"],
            "invoice": ["get_invoice"],
            "payment": ["payment_issue"],
            "create account": ["create_account"],
            "delete account": ["delete_account"],
            "password": ["recover_password"],
            "subscribe": ["newsletter_subscription"],
            "complaint": ["complaint"],
            "human": ["contact_human_agent"],
            "contact": ["contact_customer_service"],
        }

        detected_intent = None
        for k, v in intent_map.items():
            if k in query.lower():
                detected_intent = v
                break

        # Intent-aware retrieval (FAISS-safe)
        if detected_intent:
            filtered = [
                d for d in kb_docs if d.metadata["intent"] in detected_intent
            ]
            if filtered:
                temp_db = FAISS.from_documents(filtered, embeddings)
                docs = temp_db.similarity_search(query, k=5)
            else:
                docs = vector_db.similarity_search(query, k=5)
        else:
            docs = vector_db.similarity_search(query, k=5)

        # Rerank
        pairs = [[query, d.page_content] for d in docs]
        scores = reranker.predict(pairs)
        best_policy = docs[scores.argmax()].page_content

        # Prompt
        prompt = (
            "SYSTEM: You are a helpful customer support assistant.\n"
            f"POLICY: {best_policy}\n"
            f"CUSTOMER: {query}\n"
            "ASSISTANT:"
        )

        inputs = tokenizer(prompt, return_tensors="pt").to(device)

        with torch.no_grad():
            output = model.generate(
                **inputs,
                max_new_tokens=120,
                do_sample=True,
                temperature=0.6,
                top_p=0.9,
                repetition_penalty=2.0
            )

        return tokenizer.decode(output[0], skip_special_tokens=True)

    except Exception as e:
        return f"Error: {e}"


In [None]:
gui = gr.ChatInterface(
    fn=run_agent,
    title="Intelligent Customer Support Chatbot",
    description="Hi, I'm AI Assistant. How can I help you?",
    examples=[
        "Where is my order?",
        "Cancel my order",
        "Replace my order",
        "Renew my subscription",
        "Change delivery Address",
        "I want to esclate this issue"
    ]
)

gui.launch(share=True, debug=True)


IMPORTANT: You are using gradio version 3.50.2, however version 4.44.1 is available, please upgrade.
--------
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://5d8c4314821fc05601.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://5d8c4314821fc05601.gradio.live


