In [1]:
import os
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.vectorstores import Chroma
from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from IPython.display import display, Markdown

import warnings
warnings.filterwarnings("ignore")

In [2]:
# üîê Set API Key
os.environ["GOOGLE_API_KEY"] = "AIzaSyDiKvwkTPGA9lDdesVcr3dvEzqz-7qBPuU"

# Initialize components
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.5)

# Load existing vector store
vectorstore = Chroma(persist_directory="../3_Vector Storage/chroma_store", embedding_function=embedding)

In [3]:
# Check if collection exists and has documents
collection = vectorstore._client.get_collection(vectorstore._collection.name)
print(f"Number of documents in collection: {collection.count()}")

Number of documents in collection: 3652


In [4]:
# Add this at the beginning of your code (global variable)
pipeline_log = []

def log_message(message: str, print_immediately: bool = False):
    """Helper function to log messages and optionally print them"""
    pipeline_log.append(message)
    if print_immediately:
        print(message)

In [5]:
def retrieve_hotel_name(query: str) -> str:
    """Find the most relevant hotel name by searching only non-section documents."""
    log_message(f"\nüîç Searching for hotel matching: '{query}'")
    
    # Get matching documents
    docs = vectorstore.similarity_search(query, k=1000)  # Increased to find more potential matches
    
    log_message(f"\nüìÑ Found {len(docs)} documents in initial search:")
    
    # Filter for documents that:
    # 1. Have "Hotel Name" in metadata
    # 2. Do NOT have "Section" in metadata
    hotel_docs = [
        doc for doc in docs 
        if "Hotel Name" in doc.metadata 
        and "Section" not in doc.metadata
    ]
    
    log_message(f"\nüè® Found {len(hotel_docs)} non-section hotel documents:")
    
    if not hotel_docs:
        log_message("\n‚ùå No valid hotel documents found (all had sections)")
        return None
    
    # Clean the hotel name by removing "Hotel Name: " prefix
    hotel_name = hotel_docs[0].metadata["Hotel Name"]
    
    return hotel_name

In [6]:
def retrieve_hotel_info(hotel_name: str, query: str) -> dict:
    """Retrieve all chunks related to a specific hotel, separated by type, and filter reviews by query relevance."""
    log_message(f"\nFetching chunks for hotel: {hotel_name}")
    
    # General chunks (without filtering by query)
    all_chunks = vectorstore.similarity_search("", k=50, filter={"Hotel Name": hotel_name})
    
    hotel_context_chunks = [
        chunk for chunk in all_chunks 
        if ("Section" not in chunk.metadata) or 
           (chunk.metadata["Section"] != "üí¨ Reviews")
    ]
    
    # Perform query-based similarity search only on reviews
    all_relevant_chunks = vectorstore.similarity_search(query, k=50, filter={"Hotel Name": hotel_name})

    reviews_chunks = [
        chunk for chunk in all_relevant_chunks 
        if chunk.metadata.get("Section") == "üí¨ Reviews"
    ]

    log_message(f"Found {len(all_chunks)} total chunks (excluding query-specific review search):")
    log_message(f"- {len(hotel_context_chunks)} hotel context chunks")
    log_message(f"- {len(reviews_chunks)} review chunks matching query")
    
    return {
        "hotel_context_chunks": hotel_context_chunks,
        "reviews_chunks": reviews_chunks
    }

In [7]:
# Define separate prompt templates
EXTRACT_HOTEL_CONTEXT = """You are a helpful hotel information extractor. Given the following hotel data chunks, extract hotel context that would be relevant for {user_query}.

Return your response in this format:
Hotel Context: <extracted context>

hotel data chunks:
{hotel_data}
"""

EXTRACT_HOTEL_REVIEWS = """You are a helpful hotel reviews extractor. Given the following hotel review chunks, extract the most relevant recent reviews that address {user_query}.

Return your response in this format:
Relevant latest Reviews: <extracted reviews>

hotel review chunks:
{review_data}
"""

# Create prompt templates
extract_hotel_context_prompt = PromptTemplate.from_template(EXTRACT_HOTEL_CONTEXT)
extract_hotel_reviews_prompt = PromptTemplate.from_template(EXTRACT_HOTEL_REVIEWS)

FINAL_ANSWER_TEMPLATE = """You are a knowledgeable hotel concierge. Help the user with their query by considering:

User Query: {user_query}

Hotel Context:
{hotel_context}

Relevant Reviews:
{hotel_reviews}

Provide a detailed, helpful response that addresses the user's specific needs.
-Never recommend to read more reviews.
"""
final_answer_prompt = PromptTemplate.from_template(FINAL_ANSWER_TEMPLATE)

In [8]:
def hotel_recommendation_chain(query: str) -> str:
    global pipeline_log
    pipeline_log = []  # Reset the log for each new query
    
    log_message("\n" + "="*50)
    log_message("üîç Starting Hotel Recommendation Pipeline")
    log_message(f"üìù User Query: '{query}'")
    
    # Step 1: Retrieve hotel name
    log_message("\nüîÑ STEP 1: Retrieving Hotel Name")
    hotel_name = retrieve_hotel_name(query)
    if not hotel_name:
        log_message("‚ùå No hotel found matching the query")
        return "I couldn't find information about that hotel. Please check the name and try again."
    log_message(f"‚úÖ Found Hotel: {hotel_name}")
    
    # Step 2: Retrieve separated chunks for this hotel
    log_message("\nüîÑ STEP 2: Retrieving Hotel Chunks")
    chunks_dict = retrieve_hotel_info(hotel_name, query)
    hotel_context_chunks = chunks_dict["hotel_context_chunks"]
    reviews_chunks = chunks_dict["reviews_chunks"]
    
    if not hotel_context_chunks and not reviews_chunks:
        log_message(f"‚ùå Found hotel {hotel_name} but no details available")
        return f"I found {hotel_name} but couldn't retrieve any details about it."
    
    # Prepare data separately for context and reviews
    hotel_context_data = "\n\n".join(
        f"{chunk.metadata.get('Section', 'General Information')}:\n{chunk.page_content}"
        for chunk in hotel_context_chunks
    )
    
    review_data = "\n\n".join(
        f"{chunk.metadata.get('Review')}:\n{chunk.page_content}"
        for chunk in reviews_chunks
    ) if reviews_chunks else "No reviews available"
    
    log_message(f"\nüìö Retrieved chunks about {hotel_name}:")
    log_message(f"- {len(hotel_context_chunks)} context chunks")
    log_message(f"- {len(reviews_chunks)} review chunks")
    
    # Step 3: Extract relevant context and reviews separately
    log_message("\nüîÑ STEP 3: Extracting Context and Reviews")
    
    # Extract hotel context
    log_message("\nüîß Extracting Hotel Context")
    log_message(hotel_context_data)
    context_chain = extract_hotel_context_prompt | llm
    context_result = context_chain.invoke({
        "user_query": query,
        "hotel_data": hotel_context_data
    })
    hotel_context = context_result.content
    
    # Extract reviews
    log_message("\nüîß Extracting Reviews")
    log_message(review_data)
    reviews_chain = extract_hotel_reviews_prompt | llm
    reviews_result = reviews_chain.invoke({
        "user_query": query,
        "review_data": review_data
    })
    hotel_reviews = reviews_result.content
    
    # Clean up the extracted content (remove the labels if they were included)
    if "Hotel Context:" in hotel_context:
        hotel_context = hotel_context.split("Hotel Context:")[1].strip()
    
    if "Latest Reviews:" in hotel_reviews:
        hotel_reviews = hotel_reviews.split("Latest Reviews:")[1].strip()
    
    log_message("\nüìã Extracted Information:")
    log_message(f"\nüè® Hotel Context (length: {len(hotel_context)} chars):")
    log_message(hotel_context)
    log_message(f"\n‚≠ê Latest Reviews (length: {len(hotel_reviews)} chars):")
    log_message(hotel_reviews)
    
    # Step 4: Generate final answer (using existing prompt)
    log_message("\nüîÑ STEP 4: Generating Final Answer")
    final_prompt = final_answer_prompt.format(
        user_query=query,
        hotel_context=hotel_context,
        hotel_reviews=hotel_reviews
    )
    
    log_message("\nüí° Final Prompt to LLM:")
    log_message(final_prompt)
    
    answer_chain = final_answer_prompt | llm
    final_answer = answer_chain.invoke({
        "user_query": query,
        "hotel_context": hotel_context,
        "hotel_reviews": hotel_reviews
    })
    
    log_message("\n" + "="*50)
    log_message("‚úÖ Pipeline Complete")
    return final_answer.content

In [9]:
def print_pipeline_log():
    """Print all logged messages from the pipeline"""
    print("\n".join(pipeline_log))

In [10]:
# Example usage
if __name__ == "__main__":
    
    user_query = "do you recommend me to go to Nour Palace knowing that i don t have children and i want a quiet place with no animation"

    log_message("\n" + "="*50)
    log_message("üåü Starting Hotel Recommendation System")
    response = hotel_recommendation_chain(user_query)
    display(Markdown('### üå¥ Hotel Recommendation: '))
    #log_message("\nüå¥ Hotel Recommendation:\n" , True)
    log_message(response)
    log_message("="*50)
    display(Markdown(response))

### üå¥ Hotel Recommendation: 

Okay, based on your preference for a quiet place with no animation and the information I have about Nour Palace, here's my recommendation:

**I would advise against choosing Nour Palace for your trip.**

While Nour Palace is a 5-star resort with many appealing amenities, the overwhelming feedback from recent reviews highlights a strong focus on animation and entertainment. Many guests specifically praise the animation team and the variety of evening entertainment. This suggests that the hotel has a lively atmosphere, which is the opposite of what you're looking for.

Even though the hotel is described as having a "Quiet" hotel style, the reviews indicate that this is not the primary experience for most guests. The presence of a water park, kids club, and nightclub/DJ further reinforces the idea that Nour Palace caters more towards families and those seeking an active, engaging vacation rather than a tranquil retreat.

Therefore, to ensure you have a relaxing and quiet vacation without constant animation, I recommend exploring alternative hotels that are specifically marketed as adults-only or quiet resorts. These types of hotels are designed to provide a peaceful and serene environment, free from the noise and activities associated with family-oriented entertainment.


## What is happening BEHIND THE SCENES

In [12]:
# Now you can choose when to print the logs:
print("\nFULL PIPELINE LOG:")
print_pipeline_log()


FULL PIPELINE LOG:

üîç Starting Hotel Recommendation Pipeline
üìù User Query: 'do you recommend me to go to Nour Palace knowing that i don t have children and i want a quiet place with no animation'

üîÑ STEP 1: Retrieving Hotel Name

üîç Searching for hotel matching: 'do you recommend me to go to Nour Palace knowing that i don t have children and i want a quiet place with no animation'

üìÑ Found 1000 documents in initial search:

üè® Found 24 non-section hotel documents:
‚úÖ Found Hotel: Hotel Name: Hotel Nour Palace Resort & Thalasso

üîÑ STEP 2: Retrieving Hotel Chunks

Fetching chunks for hotel: Hotel Name: Hotel Nour Palace Resort & Thalasso
Found 21 total chunks (excluding query-specific review search):
- 5 hotel context chunks
- 16 review chunks matching query

üìö Retrieved chunks about Hotel Name: Hotel Nour Palace Resort & Thalasso:
- 5 context chunks
- 16 review chunks

üîÑ STEP 3: Extracting Context and Reviews

üîß Extracting Hotel Context
üõå Room Types:
- O