In [1]:
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_chroma import Chroma

In [2]:
file_path = "../data/hotels details.pdf"
loader = PyMuPDFLoader(file_path)

In [3]:
docs = loader.load()
docs
print(docs[0].page_content)

1. Name: Le Meridien Dahab Resort 
Location: South Sinai, Dahab, Egypt 
Type: Resort 
Description: A beachfront resort with sweeping views of the Gulf of Aqaba and direct access to 
soft sandy beaches. Features five swimming pools, a man-made lagoon, and a diverse set of 
restaurants offering local and international cuisine. Elegant rooms have private balconies, 
flatscreens, and modern decor. Ideal for families, couples, and divers seeking comfort with 
upscale amenities. 
Amenities: Swimming Pool, Lagoon, Restaurant, Beach Access, Free WiFi, Dive Center 
Price per Night: 260 USD 
Rating: 5 
Review Count: 1,120 
Contact Email: info@lemeridiendahab.com 
Phone: +20 69 362 3000 
Website: www.lemeridiendahab.com 
 
2. Name: Swiss Inn Resort Dahab 
Location: Dahab, Egypt 
Type: Resort 
Description: A stylish beachfront property with tranquil ambiance, two in-house restaurants, a 
spa offering holistic treatments, and well-maintained swimming pools. The resort is close to 
local markets and

In [4]:
docs

[Document(metadata={'producer': 'Microsoft® Word LTSC', 'creator': 'Microsoft® Word LTSC', 'creationdate': '2025-08-12T02:40:36+03:00', 'source': '../data/hotels details.pdf', 'file_path': '../data/hotels details.pdf', 'total_pages': 4, 'format': 'PDF 1.7', 'title': '', 'author': 'Mohamed Adel', 'subject': '', 'keywords': '', 'moddate': '2025-08-12T02:40:36+03:00', 'trapped': '', 'modDate': "D:20250812024036+03'00'", 'creationDate': "D:20250812024036+03'00'", 'page': 0}, page_content='1. Name: Le Meridien Dahab Resort \nLocation: South Sinai, Dahab, Egypt \nType: Resort \nDescription: A beachfront resort with sweeping views of the Gulf of Aqaba and direct access to \nsoft sandy beaches. Features five swimming pools, a man-made lagoon, and a diverse set of \nrestaurants offering local and international cuisine. Elegant rooms have private balconies, \nflatscreens, and modern decor. Ideal for families, couples, and divers seeking comfort with \nupscale amenities. \nAmenities: Swimming Poo

In [5]:
import pprint

pprint.pp(docs[0].metadata)

{'producer': 'Microsoft® Word LTSC',
 'creator': 'Microsoft® Word LTSC',
 'creationdate': '2025-08-12T02:40:36+03:00',
 'source': '../data/hotels details.pdf',
 'file_path': '../data/hotels details.pdf',
 'total_pages': 4,
 'format': 'PDF 1.7',
 'title': '',
 'author': 'Mohamed Adel',
 'subject': '',
 'keywords': '',
 'moddate': '2025-08-12T02:40:36+03:00',
 'trapped': '',
 'modDate': "D:20250812024036+03'00'",
 'creationDate': "D:20250812024036+03'00'",
 'page': 0}


In [6]:
texts = [doc.page_content for doc in docs]
len(texts)

4

In [28]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Load example document
# with open("state_of_the_union.txt") as f:
#     state_of_the_union = f.read()

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    add_start_index = True,
    chunk_size=1500,
    chunk_overlap=500,
    length_function=len
)

chunks = text_splitter.create_documents(texts)
print(chunks)


[Document(metadata={'start_index': 0}, page_content='1. Name: Le Meridien Dahab Resort \nLocation: South Sinai, Dahab, Egypt \nType: Resort \nDescription: A beachfront resort with sweeping views of the Gulf of Aqaba and direct access to \nsoft sandy beaches. Features five swimming pools, a man-made lagoon, and a diverse set of \nrestaurants offering local and international cuisine. Elegant rooms have private balconies, \nflatscreens, and modern decor. Ideal for families, couples, and divers seeking comfort with \nupscale amenities. \nAmenities: Swimming Pool, Lagoon, Restaurant, Beach Access, Free WiFi, Dive Center \nPrice per Night: 260 USD \nRating: 5 \nReview Count: 1,120 \nContact Email: info@lemeridiendahab.com \nPhone: +20 69 362 3000 \nWebsite: www.lemeridiendahab.com \n \n2. Name: Swiss Inn Resort Dahab \nLocation: Dahab, Egypt \nType: Resort \nDescription: A stylish beachfront property with tranquil ambiance, two in-house restaurants, a \nspa offering holistic treatments, and 

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key="Your API Key")


In [30]:
db = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    persist_directory="../data/chroma_langchain_db",
    collection_name="my_chunks"
)

In [31]:
results = db.similarity_search_with_score("Le Meridien Dahab Resort", k=3)
for res, score in results:
    print(f"{score:.3f} → {res.page_content}")


0.337 → 2. Name: Swiss Inn Resort Dahab 
Location: Dahab, Egypt 
Type: Resort 
Description: A stylish beachfront property with tranquil ambiance, two in-house restaurants, a 
spa offering holistic treatments, and well-maintained swimming pools. The resort is close to
0.340 → 1. Name: Le Meridien Dahab Resort 
Location: South Sinai, Dahab, Egypt 
Type: Resort 
Description: A beachfront resort with sweeping views of the Gulf of Aqaba and direct access to 
soft sandy beaches. Features five swimming pools, a man-made lagoon, and a diverse set of
0.355 → 6. Name: Sheikh Ali Dahab Resort 
Location: Dahab, Egypt 
Type: Resort 
Description: A beloved resort known for its relaxed charm, friendly staff, and lush garden 
courtyards. Offers simple yet comfortable accommodations, an on-site restaurant, and access to


In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI

retriever = db.as_retriever()
llm = ChatGoogleGenerativeAI(
    model="models/gemini-2.5-flash",
    google_api_key="Your API Key"
)


system_prompt = (
    "Use the given context to answer the question. "
    "If you don't know the answer, say you don't know. "
    "Use three sentence maximum and keep the answer concise. "
    "Context: {context}"
)
prompt = ChatPromptTemplate.from_messages(
    [("system", system_prompt), ("human", "{input}")],
)

question_answer_chain = create_stuff_documents_chain(llm, prompt)
chain = create_retrieval_chain(retriever, question_answer_chain)

result = chain.invoke({"input": input("Enter your Question: ")})

In [35]:
print(result["answer"])

Le Meridien Dahab Resort costs 260 USD per night.
