In [1]:
import sys
import os
import pandas as pd
sys.path.append(os.path.abspath(".."))

from rag_system import (load_vector_store, load_llm, retrieve_insights, generate_answer)

In [2]:
vector_store = load_vector_store()

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
print("Retrieving insights")
results = retrieve_insights( "What is the cancellation rate in City Hotel?", k=3)
for doc, score in results:
    print(f"  Score: {score:.3f}, Type: {doc.metadata['type']}, Content: {doc.page_content}")

Retrieving insights
  Score: 0.194, Type: cancellation_rate_by_hotel, Content: The cancellation rate for City Hotel was 41.73%
  Score: 0.416, Type: cancellation_rate_by_hotel, Content: The cancellation rate for Resort Hotel was 27.76%
  Score: 0.605, Type: cancellation_rate_by_month_year, Content: The cancellation rate for 10/2015 was 34.94%


In [5]:
llm = load_llm()

In [6]:
def answer_query_with_llm(query, k):
    """Retrieves insights and uses an LLM to generate an answer."""

    results = retrieve_insights(query, k=k)  
    context = ""
    for doc, score in results:
        context += f"Source (Type: {doc.metadata['type']}): {doc.page_content}\n"

    prompt = f"""
    You are a helpful assistant that answers questions based on provided context.
    If the answer is not in the context, respond with "I don't have enough information to answer."

    Context:
    {context}

    Question: {query}

    Answer:
    """
    answer = llm.invoke(prompt)  # Use .invoke() and get .content
    return answer.content

In [None]:
print("\nTesting the full RAG system:")
user_query = "What was the cancellation rate for resort hotels?"
answer = answer_query_with_llm(user_query, k=5)
print(answer)



Testing the full RAG system:
The cancellation rate for Resort Hotel was 27.76%


In [None]:
user_query = "People from which Country makes the most hotel bookings?"
answer = answer_query_with_llm(user_query, k=5)
print(answer)

USA makes the most hotel bookings with 2097 bookings.


In [12]:
user_query = "What was the average lead time?"
answer = answer_query_with_llm(user_query, k=5)
print(answer)

The average lead time was 104.01 days.
