In [21]:
import fitz  # PyMuPDF
import re
import chromadb
import google.generativeai as genai

# Configure Google Generative AI
API_KEY = "AIzaSyDleZ4xVF9dCT7aw95WBeDpfHwktn4LUQ0"
genai.configure(api_key=API_KEY)

# Step 1: Extract text from PDF
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

# Step 2: Preprocess text
def preprocess_text(text):
    text = re.sub(r'\s+', ' ', text)
    return text

# Step 3: Vectorize text using Google Embedding API
class GeminiEmbeddingFunction(chromadb.EmbeddingFunction):
    def __call__(self, input):
        model = 'models/text-embedding-004'
        title = "Custom query"
        return genai.embed_content(model=model, content=input, task_type="retrieval_document", title=title)["embedding"]

# Step 4: Add vectors to ChromaDB
def create_chroma_db(documents, name):
    chroma_client = chromadb.Client()
    try:
        db = chroma_client.create_collection(name=name, embedding_function=GeminiEmbeddingFunction())
    except Exception as e:
        if "Collection" in str(e) and "already exists" in str(e):
            chroma_client.delete_collection(name=name)  # Delete existing collection
            db = chroma_client.create_collection(name=name, embedding_function=GeminiEmbeddingFunction())  # Create new collection
        else:
            raise e  # Re-raise if it's a different exception
    for i, doc in enumerate(documents):
        db.add(documents=[doc], ids=[str(i)])
    return db

# Step 5: Query ChromaDB
def get_relevant_passage(query, db):
    passage = db.query(query_texts=[query], n_results=1)['documents'][0][0]
    return passage

# Step 6: Generate response with Gemini
def generate_response_with_gemini(prompt):
    model = genai.GenerativeModel('gemini-1.0-pro')
    answer = model.generate_content(prompt)
    return answer.text

# Prompt
def make_prompt(query, relevant_passage):
    escaped = relevant_passage.replace("'", "").replace('"', "").replace("\n", " ")
    prompt = (
        f"You are ExploreEgypt Bot, a travel assistant specialized in Egyptian destinations. "
        f"Answer questions using the reference passage below. "
        f"Provide detailed information about specific places, including name, description, location and timings. "
        f"Maintain a friendly, knowledgeable tone. "
        f"QUESTION: '{query}' "
        f"PASSAGE: '{escaped}' "
        f"ANSWER:"
    )
    return prompt

# Chat function
def chat_with_pdf(query, db):
    document_text = get_relevant_passage(query, db)
    if document_text:
        prompt = make_prompt(query, document_text)
        response = generate_response_with_gemini(prompt)
        return response
    else:
        return "No relevant document found."

# Adding PDF content to ChromaDB
pdf_path = 'dataset/best 55 places.pdf'
text = extract_text_from_pdf(pdf_path)
cleaned_text = preprocess_text(text)
documents = [cleaned_text]
db = create_chroma_db(documents, "egypt_travel")

# Test
query = "Tell me about the Great Pyramid of Giza."
response = chat_with_pdf(query, db)
print(response)


**Great Pyramid of Giza**

* **Location:** Giza, Egypt
* **Timings:** 8 AM to 5 PM, all days
* **Entry Fee:** INR 1,700 per head

The Great Pyramid of Giza, also known as the Pyramid of Khufu, is the largest and oldest of the three pyramids in the Giza pyramid complex. It is one of the Seven Wonders of the Ancient World and remains the tallest human-made structure for over 3,800 years. Constructed as a tomb for Pharaoh Khufu of the Fourth Dynasty, the pyramid showcases intricate construction techniques and astronomical knowledge. Tourists can explore the pyramid's interior chambers and marvel at its vast scale and historical significance.


In [7]:
# Test
query = "Tell me about  white desert."
response = chat_with_pdf(query, db)
print(response)

The White Desert, located near Farafra in Egypt, is a truly unique and otherworldly place. Imagine towering chalk mountains that create a landscape that looks like a snowy expanse, even in the middle of the arid desert! The entire area is full of stunning, iceberg-shaped pinnacles and enormous white boulders, making it a breathtaking sight to behold. If you're tired of exploring temples and tombs, the White Desert is a must-visit for its captivating natural beauty.  

