<a href="https://colab.research.google.com/github/abissankar/ChatBot-KEC/blob/main/CHATBOT_(2).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install necessary libraries
!pip install -q torch datasets accelerate peft bitsandbytes trl
!pip install langchain playwright html2text sentence_transformers faiss-gpu gradio
!pip install langchain-community
!playwright install chromium

In [None]:
# Import required modules
import transformers
from transformers import AutoTokenizer, BitsAndBytesConfig, AutoModelForCausalLM
import torch
from langchain.memory import ConversationBufferMemory
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import AsyncChromiumLoader
from langchain.document_transformers import Html2TextTransformer
from langchain.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
import nest_asyncio
import gradio as gr


In [None]:
# Set up asyncio
nest_asyncio.apply()

In [None]:
# Set model
model_name = "HuggingFaceH4/zephyr-7b-beta"

In [None]:
# Configuring model and Initializing tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"



In [None]:
# BitsAndBytes config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)

In [None]:
# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)

In [None]:
# Initialize the text generation pipeline
text_generation_pipeline = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.0,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=300,
)

In [None]:
# Setup LangChain memory for conversation history
memory = ConversationBufferMemory(memory_key="chat_history", input_key="human_input", return_messages=True)

In [None]:
prompt_template = """<|system|>
You are an assistant knowledgeable about Kongu Engineering College.
<|user|>
Here is some context: {context}

Question: {question}
<|assistant|>
"""

In [None]:
# ✅ Create PromptTemplate object
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template
)

In [None]:
# Initialize HuggingFacePipeline with the model
zep_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

In [None]:
# Create the LLMChain
llm_chain = LLMChain(llm=zep_llm, prompt=prompt, memory=memory)

In [None]:
# Setup embedding and retriever (for RAG)
# Define the articles to scrape
articles = [
    "https://en.wikipedia.org/wiki/Kongu_Engineering_College",
    "https://www.kongu.edu/aboutus.html"
]

In [None]:
!pip install playwright
!playwright install chromium


In [None]:
# Scrape the articles for context
loader = AsyncChromiumLoader(articles)
docs = loader.load()

In [None]:
!pip install html2text
!pip install faiss-cpu


In [None]:
# Convert HTML to plain text
html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)

In [None]:
# Chunk the text for indexing
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
chunked_documents = text_splitter.split_documents(docs_transformed)
# Load chunked documents into FAISS index
db = FAISS.from_documents(chunked_documents, HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))

In [None]:
# Convert FAISS into a retriever
retriever = db.as_retriever(search_type="similarity", search_kwargs={'k': 4})

In [None]:
# Gradio interface to interact with the chatbot
def chatbot_interaction(context, question):
    # If no context provided, retrieve from RAG setup
    if not context.strip():
        docs = retriever.get_relevant_documents(question)
        context = "\n".join([doc.page_content for doc in docs])
        # Interact with the chatbot
    query_data = {"context": context, "question": question, "human_input": question}
    response = llm_chain.invoke(query_data)
    return response['text']

In [None]:
# Create Gradio app
iface = gr.Interface(
    fn=chatbot_interaction,
    inputs=[
        gr.Textbox(lines=5, label="Context (optional)", placeholder="Paste context here or leave blank for auto-fetch"),
        gr.Textbox(lines=2, label="Question", placeholder="Enter your question here")
    ],
    outputs=gr.Textbox(label="Response"),
    title="Kongu Engineering College Chatbot",
    description="Ask questions about Kongu Engineering College. Provide context or let the chatbot retrieve information!"
)

In [None]:
# Launch Gradio app
iface.launch()
