In [2]:
pip install langchain-community langchain-openai pypdf

Collecting langchain-community
  Downloading langchain_community-0.3.14-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-openai
  Downloading langchain_openai-0.2.14-py3-none-any.whl.metadata (2.7 kB)
Collecting pypdf
  Downloading pypdf-5.1.0-py3-none-any.whl.metadata (7.2 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain<0.4.0,>=0.3.14 (from langchain-community)
  Downloading langchain-0.3.14-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.4.0,>=0.3.29 (from langchain-community)
  Downloading langchain_core-0.3.29-py3-none-any.whl.metadata (6.3 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.7.1-py3-none-any.whl.metadata (3.5 kB)
Collecting openai<2.0.0,>=1.58.1 (from

In [18]:
!pip install pydantic==2.10.1 chainlit



In [4]:
pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.5/27.5 MB[0m [31m28.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.9.0.post1


In [5]:
#importing all the necessaries libraries

import os
from dotenv import load_dotenv
from typing import List
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.schema import Document
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain.memory import ConversationBufferMemory
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import chainlit as cl
import json

In [12]:
FEEDBACK_FILE = "feedback_log.json"
api_key = "your_api_key"

In [7]:
# Function to load and process PDFs from a folder
def process_pdf_folder(folder_path: str) -> List[Document]:
    documents = []
    for file in os.listdir(folder_path):
        if file.endswith('.pdf'):
            file_path = os.path.join(folder_path, file)
            loader = PyPDFLoader(file_path)
            documents.extend(loader.load())

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    return text_splitter.split_documents(documents)

In [13]:
# Function to initialize and save the vector store
def setup_vector_store(chunks: List[Document], store_path: str):
    vector_store = FAISS.from_documents(documents=chunks, embedding=OpenAIEmbeddings(api_key=api_key))
    vector_store.save_local(store_path)
    return vector_store

In [14]:
# Function to save feedback

def save_feedback(query: str, response: str, feedback: str):
    feedback_data = {"query": query, "response": response, "feedback": feedback}
    if os.path.exists(FEEDBACK_FILE):
        with open(FEEDBACK_FILE, "r") as file:
            feedback_list = json.load(file)
    else:
        feedback_list = []
    feedback_list.append(feedback_data)
    with open(FEEDBACK_FILE, "w") as file:
        json.dump(feedback_list, file, indent=4)

In [15]:
# Load and process PDF documents
pdf_folder = "./relevant_docs"
processed_chunks = process_pdf_folder(pdf_folder)

# Set up and save vector store
vector_store_path = "./vector_store"
vector_store = setup_vector_store(processed_chunks, vector_store_path)

# Initialize the chatbot model
chat_model = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, api_key=api_key)



In [20]:
# Chatbot behavior
@cl.on_chat_start
async def on_chat_start():
    retriever = vector_store.as_retriever()
    message_history = ChatMessageHistory()
    memory = ConversationBufferMemory(
        memory_key="chat_history",
        output_key="response",
        chat_memory=message_history,
        return_messages=True,
    )

    chat_chain = ConversationalRetrievalChain.from_llm(
        llm=chat_model,
        chain_type="stuff",
        retriever=retriever,
        memory=memory,
        return_source_documents=True,
    )
    cl.user_session.set("chat_chain", chat_chain)

@cl.on_message
async def handle_message(message: cl.Message):
    chat_chain = cl.user_session.get("chat_chain")
    response = await chat_chain.acall(message.content)
    answer = response["response"]
    source_docs = response.get("source_documents", [])

    sources = "\nSources: " + ", ".join([f"Page {doc.metadata['page_number']}" for doc in source_docs])
    await cl.Message(content=answer + sources).send()

    # 4. Asking for feedback
    feedback_message = "Was this answer helpful? Please reply with 'Yes' or 'No'."
    await cl.Message(content=feedback_message).send()

    # 5. Handling the feedback
    user_feedback = await cl.next_message()
    feedback = user_feedback.content.lower()
    save_feedback(query, response, feedback)
    feedback_reply = "Thank you for your feedback!" if feedback in ["yes", "no"] else "Invalid feedback received. Please reply with 'Yes' or 'No'."
    await cl.Message(content=feedback_reply).send()

