# Building RAG Application with LangChain and Ollama

### 0 : Installing Required Libraries

In [22]:
!pip install langchain-community pypdf
!pip install -qU langchain-ollama



### 1 : Loading a PDF File

In [23]:
from langchain_community.document_loaders import PyPDFLoader

file_path = "Think-And-Grow-Rich_2011-06.pdf"
loader = PyPDFLoader(file_path)

docs = loader.load()

print(len(docs))

253


### 2 : Chunking the document

In [24]:
# splitting the document into smaller chunks
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

print("==================================")
print(all_splits[1])
print("==================================")

print(len(all_splits))

page_content='  THINK & GROW RICH   
Legal Notice & Disclaimer
This Digital Download PDF eBook edition and related web site are NOT 
prepared, approved, licensed, endorsed or sponsored or otherwise affiliated with 
Napoleon Hill; his family and heirs; the Napoleon Hill Foundation; the Ralston 
Society or any past or present publishers of this book.
The web site and eBook are dedicated to the classic work, “Think And 
Grow Rich” written by Napoleon Hill in 1937. This electronic eBook edition, 
published in 2007 at think-and-grow-rich-ebook.com, is a reproduction of the 
complete 1937 version, originally published by the The Ralston Society and is 
now in the Public Domain.
THINK AND GROW RICH™ is the registered trademark and property of 
the Napoleon Hill Foundation. 
The book title “Think And Grow Rich” as used by this Digital eBook and 
related Web site and any references used are for illustrative purposes only, 
without permission and are not authorized by, associated with, endorse

In [25]:

# generating embeddings for the document chunks

from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="deepseek-r1:latest")

In [26]:
vector_1 = embeddings.embed_query(all_splits[0].page_content)
vector_2 = embeddings.embed_query(all_splits[1].page_content)

assert len(vector_1) == len(vector_2)
print(f"Generated vectors of length {len(vector_1)}\n")
print(vector_1[:10])

Generated vectors of length 3584

[0.005771338, 0.01332812, -0.01481526, 0.006359267, -0.0043255975, 0.0054270127, -0.011758706, 0.005033477, -0.023193797, -0.028717326]
