**Install Necessary Packages**

In [None]:
# Install required libraries with specified versions and suppress progress bar output for cleaner installation
!pip -qqq install pip --progress-bar off
!pip -qqq install langchain-groq==0.1.3 --progress-bar off
!pip -qqq install langchain==0.1.17 --progress-bar off
!pip -qqq install llama-parse==0.1.3 --progress-bar off
!pip -qqq install qdrant-client==1.9.1 --progress-bar off
!pip -qqq install "unstructured[md]"==0.13.6 --progress-bar off
!pip -qqq install fastembed==0.2.7 --progress-bar off
!pip -qqq install flashrank==0.2.4 --progress-bar off
!pip install gradio==3.38.0


**Import Libraries and Set Up API Key**

In [None]:
# Import necessary libraries for document processing, retrieval, and Q&A system
import os
import textwrap
from pathlib import Path
from google.colab import files
from google.colab import userdata
from IPython.display import Markdown

# Import components from LangChain and other packages for document processing
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors.flashrank_rerank import FlashrankRerank
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Qdrant
from langchain_community.document_loaders import UnstructuredMarkdownLoader
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from llama_parse import LlamaParse
import gradio as gr

# Set up environment variable for API key
os.environ["GROQ_API_KEY"] = userdata.get("GROQ_API_KEY")


**Define Helper Function for Output Formatting**

In [None]:
# Define a function to format and print responses with word-wrapping for readability
def print_response(response):
    response_txt = response["result"]
    for chunk in response_txt.split("\n"):
        if not chunk:
            print()
            continue
        print("\n".join(textwrap.wrap(chunk, 100, break_long_words=False)))


**Upload PDF File**

In [None]:
# Create a directory to store uploaded files
os.makedirs('data', exist_ok=True)

# Use Google Colab's file upload feature to upload a PDF file
uploaded = files.upload()
for filename in uploaded.keys():
    pdf_path = Path(f"{filename}")


**Set Up Parsing Instructions and Parse PDF Document**

In [None]:
# Define parsing instructions for extracting content from the PDF in a structured way
instruction = """Use the following pieces of context to provide a concise answer
to the question at the end but use at least summarize with 250 words with detailed
explanations. It contains many tables. Try to be precise while answering the questions"""

# Initialize the LlamaParse API to parse the uploaded PDF document
parser = LlamaParse(
    api_key=userdata.get("LLAMA_PARSE"),
    result_type="markdown",
    parsing_instruction=instruction,
    max_timeout=5000,
)

# Parse the PDF document asynchronously
llama_parse_documents = await parser.aload_data(str(pdf_path))
parsed_doc = llama_parse_documents[0]


**Save and Load the Parsed Document**

In [None]:
# Save parsed content to a markdown file in the 'data' directory
document_path = Path("data/parsed_document.md")
with document_path.open("a") as f:
    f.write(parsed_doc.text)

# Load the saved markdown file as a document for further processing
loader = UnstructuredMarkdownLoader(document_path)
loaded_documents = loader.load()


**Split Documents into Chunks**

In [None]:
# Split the loaded documents into chunks for processing with LangChain, using specified chunk size and overlap
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2048, chunk_overlap=128)
docs = text_splitter.split_documents(loaded_documents)


**Initialize Embeddings and Vector**

In [None]:
# Set up the embeddings model and vector store for document retrieval
embeddings = FastEmbedEmbeddings(model_name="BAAI/bge-base-en-v1.5")
qdrant = Qdrant.from_documents(
    docs,
    embeddings,
    path="./database",
    collection_name="document_embeddings",
)


**Set Up Retriever and Compressor**

In [None]:
# Set up retriever to retrieve the top 5 most relevant document chunks based on search queries
retriever = qdrant.as_retriever(search_kwargs={"k": 5})

# Configure a compressor model to re-rank retrieved documents for more contextually relevant results
compressor = FlashrankRerank(model="ms-marco-MiniLM-L-12-v2")
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)


**Initialize Language Model and Prompt Template**

In [None]:
# Initialize the language model for question answering with specified parameters
llm = ChatGroq(temperature=0, model_name="llama3-70b-8192")

# Define the template for constructing the QA prompt
prompt_template = """
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Answer the question and provide additional helpful information,
based on the pieces of information, if applicable. Be succinct.

Responses should be properly formatted to be easily read.
"""

# Create a prompt instance with specified template and variables
prompt = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)


**Set Up QA Chain**

In [None]:
# Configure a QA chain with the language model, retriever, and prompt template for answering user queries
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=compression_retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt, "verbose": False},
)


**Create Gradio Interface for User Interaction**

In [None]:
# Define a function to handle user questions and return the answer from the QA chain
def answer_question(question):
    response = qa.invoke(question)
    return response["result"]

# Set up a Gradio interface for asking questions and displaying answers
demo = gr.Interface(
    fn=answer_question,
    inputs=gr.Textbox(label="Question"),
    outputs=gr.Textbox(label="Answer"),
)

# Launch the Gradio app to allow user interaction with the QA system
demo.launch(debug=True, share=True)
