In [3]:
# Imports
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever
import os
os.add_dll_directory(r'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\bin')
os.add_dll_directory(r'C:\Users\USER\work\build\install\x64\vc17\bin')

# Suppress warnings
import warnings
warnings.filterwarnings('ignore')

# Jupyter-specific imports
from IPython.display import display, Markdown

# Set environment variable for protobuf
import os
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"

In [4]:
# Load PDF
local_path = r"C:\Users\USER\Projects\RAG\TalkToPDF\data\pdf\metagpt.pdf"
if local_path:
    loader = UnstructuredPDFLoader(file_path=local_path)
    data = loader.load()
    print(f"PDF loaded successfully: {local_path}")
else:
    print("Upload a PDF file")

PDF loaded successfully: C:\Users\USER\Projects\RAG\TalkToPDF\data\pdf\metagpt.pdf


In [5]:
# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(data)
print(f"Text split into {len(chunks)} chunks")

Text split into 109 chunks


In [6]:
# Create vector database
vector_db = Chroma.from_documents(
    documents=chunks,
    embedding=OllamaEmbeddings(model="gemma2:2b"),
    collection_name="local-rag"
)
print("Vector database created successfully")

Vector database created successfully


In [7]:
# Set up LLM and retrieval
local_model = "gemma2:2b"  # or whichever model you prefer
llm = ChatOllama(model=local_model)

In [8]:
# Query prompt template
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate 2
    different versions of the given user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

# Set up retriever
retriever = MultiQueryRetriever.from_llm(
    vector_db.as_retriever(), 
    llm,
    prompt=QUERY_PROMPT
)

In [9]:
# RAG prompt template
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)


In [10]:
# Create chain
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [11]:
def chat_with_pdf(question):
    """
    Chat with the PDF using the RAG chain.
    """
    return display(Markdown(chain.invoke(question)))

In [12]:
# Example 1
chat_with_pdf("What is the main idea of this document?")

The main idea of this document is to describe **MetaGPT's approach to meta-programming using specialized agents** and how it addresses the challenges associated with traditional programming. 

Here are some key takeaways:

* **Meta-programming:** The document focuses on the concept of "meta-programming," which refers to "programming to program." This involves crafting programs that can automatically modify or generate code based on specific needs.
* **MetaGPT's Unique Approach:** MetaGPT utilizes a system of specialized agents with distinct roles and expertise to perform various tasks in the development process. These agents handle areas like requirement analysis, system design, code generation, debugging, and execution. 
* **Benefits of Agents:** This agent-based approach allows for automation and efficiency within the meta-programming workflow.  
* **Potential Impact on Code Development:** The document suggests that this method could revolutionize software development by automating processes and enabling more comprehensive control over code creation and modification.


Overall, the document presents MetaGPT as a novel solution to accelerate and enhance meta-programming in software development. 


In [14]:
# Example 2
chat_with_pdf("Can you explain why is MetaGPT the solution in 100 words or less?")

MetaGPT is a novel framework for software development using advanced AI. It leverages structured communication between agents, drawing inspiration from human social structures and addressing limitations of purely natural language interfaces. This leads to more accurate and efficient code generation compared to existing solutions like ChatDev and general-purpose AI.  Ultimately, MetaGPT enables complex tasks to be tackled effectively through a collaborative process guided by structure and purpose. 
