In [None]:
from langchain_community.document_loaders.pdf import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter


#loader = PyPDFLoader(file_path="drive/MyDrive/Colab Notebooks/ds-book.pdf")
loader = PyPDFLoader(file_path="travel-policy.pdf")
documents = loader.load()

In [None]:
documents[1]

In [None]:
text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=50, separator="\n")
docs = text_splitter.split_documents(documents=documents)


In [None]:
from langchain_community.vectorstores import FAISS
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

#embeddings = OpenAIEmbeddings()

embeddings= HuggingFaceEmbeddings()

vectorstore = FAISS.from_documents(docs, embeddings)
vectorstore.save_local("faiss_store1")

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI
from langchain.chains.combine_documents.stuff import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain

vectorstore = FAISS.load_local(
    "faiss_store1", embeddings=embeddings, allow_dangerous_deserialization=True
)

retriever = vectorstore.as_retriever(
    search_type="similarity",  # mmr or similarity_score_threshold
    search_kwargs={"k": 3},
)
message = """
        Answer this question using the provided context only.
        If the information is not available in the context, just reply with "i dont know"
        {input}
        Context:
        {context}
        """
prompt = ChatPromptTemplate.from_messages(
    [("human", message)],
)
llm = ChatOpenAI()
question_answer_chain = create_stuff_documents_chain(llm, prompt)
#print(question_answer_chain)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
print(rag_chain)

# rag_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm
# rag_chain

In [None]:
response = rag_chain.invoke({"input": "tell me about all the reimbursement policies"})
print(response)
print(response['answer'])
for  doc in response["context"]:
    print(doc.page_content)

In [None]:
import tempfile
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

def load_pdf_into_vectorstore(file: tempfile) -> str:
    try:
        print("======Loading file==================")
        file_path = file.name
        loader = PyPDFLoader(file_path=file_path)
        documents = loader.load()
        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=30, separator="\n")
        docs = text_splitter.split_documents(documents=documents)
        embeddings = HuggingFaceEmbeddings()

        #vectorstore = FAISS.from_documents(docs, embeddings)


        vectorstore = Chroma.from_documents(
            documents, embedding=embeddings , persist_directory="chromadb11"
        )
       # vectorstore.save_local("pdf_store")

        print("======File Loaded================== ")

        return 'Document uploaded and index created successfully. You can chat now.'
    except Exception as e:
        print(e)
        return e

In [None]:
import gradio as gr
from langchain import OpenAI, PromptTemplate
from langchain.document_loaders import PyPDFLoader

from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage


model = ChatOpenAI()


def getresponse(query, history:list) -> tuple:

   vectorstore = Chroma(
      persist_directory="chromadb11", embedding_function=embeddings
  )

 #  vectorstore = FAISS.load_local("pdf_store", embeddings= OpenAIEmbeddings(), allow_dangerous_deserialization=True)

   message = """
    Answer this question using the provided context . If information is not available in the context,
      Just respond saying "I dont know"
    {input}
    Context:
    {context}
    """

   prompt = ChatPromptTemplate.from_messages([("human", message)])
   llm = ChatOpenAI()


   question_answer_chain = create_stuff_documents_chain(llm, prompt)
#print(question_answer_chain)
   rag_chain = create_retrieval_chain(vectorstore.as_retriever(), question_answer_chain)

   # rag_chain = (
   #    {"context": vectorstore.as_retriever(), "question": RunnablePassthrough()}
   #    | prompt
   #    | llm
   # )



   response1= rag_chain.invoke({"input":query})
   print(response1)
   history.append((query, response1['answer']))
   return "",history


In [None]:
with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            file = gr.components.File(
                label='Upload your pdf file',
                file_count='single',
                file_types=['.pdf'])
            #with gr.Row():
            upload = gr.components.Button(
                    value='Upload', variant='primary')

        label = gr.components.Textbox()
    chatbot = gr.Chatbot(label='Talk to the Document')


    msg = gr.Textbox()
    clear = gr.ClearButton([msg, chatbot])
    vectorStore =None

    upload.click(load_pdf_into_vectorstore,[file],[label])

    msg.submit(getresponse, [msg,chatbot], [msg, chatbot])

if __name__ == '__main__':
    demo.launch(debug=True)