<a href="https://colab.research.google.com/github/abissankar/ChatBot-KEC/blob/main/CHATBOT_KONGU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q torch datasets accelerate peft bitsandbytes trl
!pip install langchain playwright html2text sentence_transformers faiss-gpu gradio
!pip install langchain-community
!playwright install chromium

In [None]:
# Import required modules
import transformers
from transformers import AutoTokenizer, BitsAndBytesConfig, AutoModelForCausalLM
import torch
from langchain.memory import ConversationBufferMemory
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import AsyncChromiumLoader
from langchain.document_transformers import Html2TextTransformer
from langchain.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
import nest_asyncio
import gradio as gr

In [None]:
# Set up asyncio
nest_asyncio.apply()

In [None]:
# Set model and token details
model_name = "ministral/Ministral-3b-instruct"






In [None]:
from huggingface_hub import login
login()   # will ask for your new HF token


In [None]:
# Initialize model configuration
model_config = transformers.AutoConfig.from_pretrained(
    model_name, trust_remote_code=True
)


In [None]:
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True
)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
# BitsAndBytes configuration
use_4bit = True
bnb_4bit_compute_dtype = "float16"
bnb_4bit_quant_type = "nf4"
use_nested_quant = False
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

In [None]:
# Load the model with 4-bit precision
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)

In [None]:
# Install Hugging Face Transformers if not already
!pip install transformers accelerate bitsandbytes -q

#  Imports
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

In [None]:
# Build pipeline
text_generation_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    do_sample=False,         # deterministic output
    repetition_penalty=1.1,
    return_full_text=False,  # ✅ only the model's answer
    max_new_tokens=300,
    pad_token_id=tokenizer.eos_token_id
)

In [None]:
# Setup LangChain memory for conversation history
memory = ConversationBufferMemory(memory_key="chat_history", input_key="human_input", return_messages=True)

In [None]:
# Define the prompt template
prompt_template = """
### [INST]
Instruction: Answer the question based on your
Kongu Engineering College knowledge. Here is context to help:

{context}

### QUESTION:
{question}

[/INST]
"""

In [None]:
# Initialize HuggingFacePipeline with the model
mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

In [None]:
# Create a prompt using the template
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

In [None]:
# Create the LLMChain
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt, memory=memory)

In [None]:
# Setup embedding and retriever (for RAG)
# Define the articles to scrape
articles = [
    "https://en.wikipedia.org/wiki/Kongu_Engineering_College",
    "https://www.kongu.edu/aboutus.html"

]

In [None]:
# Scrape the articles for context
!pip install playwright
!playwright install

loader = AsyncChromiumLoader(articles)
docs = loader.load()

In [None]:
# Convert HTML to plain text
!pip install html2text

html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)

In [None]:
!pip install faiss-cpu


In [None]:
# Chunk the text for indexing
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
chunked_documents = text_splitter.split_documents(docs_transformed)
# Load chunked documents into FAISS index
db = FAISS.from_documents(chunked_documents, HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))

In [None]:
# Convert FAISS into a retriever
retriever = db.as_retriever(search_type="similarity", search_kwargs={'k': 4})

In [None]:
# Improved chatbot interaction function
def chatbot_interaction(context, question):
    # Retrieve context if not provided
    if not context.strip():
        docs = retriever.get_relevant_documents(question)
        context = "\n".join([doc.page_content for doc in docs])

    # Debug print to see what context is retrieved
    print("DEBUG: Retrieved context:\n", context)

    # More precise prompt to minimize hallucination
    query = f"""
Use ONLY the information in the context below to answer the question.
If the answer is not contained in the context, respond with "I don't know."

Context:
{context}

Question:
{question}
"""
    response = text_generation_pipeline(query)[0]["generated_text"]
    return response.strip()


In [None]:
# Create Gradio app
iface = gr.Interface(
    fn=chatbot_interaction,
    inputs=[
        gr.Textbox(lines=5, label="Context (optional)", placeholder="Paste context here or leave blank for auto-fetch"),
        gr.Textbox(lines=2, label="Question", placeholder="Enter your question here")
    ],
    outputs=gr.Textbox(label="Response"),
    title="Kongu Engineering College Chatbot",
    description="Ask questions about Kongu Engineering College. Provide context or let the chatbot retrieve information!"
)

In [None]:
# Launch Gradio app
iface.launch()