# RAG application built on gemini

In [4]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("GOT-OCR-2.0-paper.pdf")
data = loader.load()  # entire PDF is loaded as a single Document
# data

In [3]:
len(data)

19

In [5]:

from langchain.text_splitter import RecursiveCharacterTextSplitter

# split data
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
docs = text_splitter.split_documents(data)


print("Total number of documents: ",len(docs))

Total number of documents:  79


In [6]:
docs[7]

Document(metadata={'producer': 'pdfTeX-1.40.26', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-09-03T06:37:45+00:00', 'author': '', 'keywords': '', 'moddate': '2024-09-03T06:37:45+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) kpathsea version 6.4.0', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'GOT-OCR-2.0-paper.pdf', 'total_pages': 19, 'page': 2, 'page_label': '3'}, page_content='OCR feature, is also wasteful.\nAccordingly, we propose the general OCR theory, i.e., OCR-2.0, to break the bottlenecks of both\ntraditional and LVLM manners on OCR tasks. We think that a model of OCR 2.0 should have the\nfollowing essential characteristics:\n• End-to-end. Compared to OCR-1.0 models with complex procedures, the OCR-2.0 model should\nenjoy a unified and end-to-end architecture to ensure lower maintenance costs. It is cool that a\nbeginner can quickly master the entire OCR system in the 2.0 era.\n• Low training and inference

In [9]:
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings

from dotenv import load_dotenv
load_dotenv() 

#Get an API key: 
# Head to https://ai.google.dev/gemini-api/docs/api-key to generate a Google AI API key. Paste in .env file

# Embedding models: https://python.langchain.com/v0.1/docs/integrations/text_embedding/

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vector = embeddings.embed_query("hello, world!")
vector[:5]
# vector

[0.05636945366859436,
 0.004828543867915869,
 -0.07625909894704819,
 -0.023642510175704956,
 0.053293220698833466]

In [10]:
vectorstore = Chroma.from_documents(documents=docs, embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001"))


In [11]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 10})

retrieved_docs = retriever.invoke("What is new in GOT-OCR-2.0?")

In [12]:
len(retrieved_docs)

10

In [13]:
print(retrieved_docs[5].page_content)

attention-guided image captioning models are summarized in
Section IV. The analyses of attention and explanations and our
proposed LRP-inference ﬁne-tuning strategy are introduced in
Section V.
Figure 5: The plain text (document) OCR ability of GOT. For double-column documents with high
text density, GOT can still handle them well, proving the excellent text perception ability.
12


In [18]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",temperature=0.3, max_tokens=500)

In [19]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [20]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [24]:
response = rag_chain.invoke({"input": "what is new in IPHONE_OCR?"})
print(response["answer"])

I am sorry, but the provided context does not contain information about IPHONE_OCR. Therefore, I cannot answer your question.
