## Data Ingestion

In [2]:
import json

# Define the file path
file_path = '../data/booking-data.json'

# Define the keys you want to extract
keys_to_extract = ['url', 'name', 'type', 'description', 'stars', 'price', 'currency', 'rating', 'reviews', 'breakfast', 'address', 'rooms', 'categoryReviews', 'facilities', 'image']

def load_hotel_listings(file_path, keys_to_extract):
    with open(file_path, 'r') as file:
        data = json.load(file)
    
    hotel_listings = []
    for item in data:
        # Extract only the specified keys
        listing = {key: item.get(key) for key in keys_to_extract if key in item}
        hotel_listings.append(listing)
    
    return hotel_listings

# Load the data
hotel_listings = load_hotel_listings(file_path, keys_to_extract)

# Print the number of listings loaded
print(f"Number of hotel listings loaded: {len(hotel_listings)}")

# Print the first listing to verify
if hotel_listings:
    print("First hotel listing:")
    print(json.dumps(hotel_listings[0], indent=2))

Number of hotel listings loaded: 100
First hotel listing:
{
  "url": "https://www.booking.com/hotel/ca/econo-lodge-inn-suites.html?checkin=2024-08-23&checkout=2024-08-25&selected_currency=CAD&lang=en-us&group_adults=8&group_children=0&no_rooms=4",
  "name": "Quality Inn Toronto Airport",
  "type": "inn",
  "description": "You're eligible for a Genius discount at Quality Inn Toronto Airport! To save at this property, all you have to do is sign in.\nProviding complimentary airport shuttle service to Toronto's Pearson International Airport and offering easy access to the downtown area, this Toronto hotel features a free daily breakfast and free wireless internet.\n\nThe Quality Inn Toronto Airport is conveniently placed only a short distance from public transportation systems leading to downtown Toronto. Woodbine Center shopping mall and Canada's Wonderland amusement park are also easily accessible.\n\nWhile staying at the Quality Inn Toronto Airport, guests can enjoy starting the day wit

## Serialize

In [3]:
def serialize_hotel_listing(listing):
    serialized = f"Hotel URL: {listing.get('url', 'N/A')}\n"
    serialized += f"Name: {listing.get('name', 'N/A')}\n"
    serialized += f"Description: {listing.get('description', 'N/A')}\n"
    serialized += f"Address: {listing.get('address', 'N/A')}\n"
    serialized += f"Stars: {listing.get('stars', 'N/A')}\n"
    serialized += f"Rating: {listing.get('rating', 'N/A')}\n"
    serialized += f"Number of Reviews: {listing.get('reviews', 'N/A')}\n"
    serialized += f"Breakfast Type: {listing.get('breakfast', 'N/A')}\n"
    
    if 'rooms' in listing and listing['rooms']:
        room = listing['rooms'][0]  # Taking the first room as an example
        serialized += f"Room Type: {room.get('roomType', 'N/A')}\n"
        serialized += f"Room Price: {room.get('price', 'N/A')} {room.get('currency', 'N/A')}\n"
        serialized += f"Room Features: {', '.join(room.get('features', []))}\n"
    
    if 'facilities' in listing:
        facility_names = [facility['name'] for facility in listing['facilities'] if 'name' in facility]
        serialized += f"Hotel Facilities: {', '.join(facility_names)}\n"
    
    if 'image' in listing and listing['image']:
        serialized += f"Image URL: {listing.get('image', 'N/A')}\n"
    
    return serialized.strip()


serialized_listings = [serialize_hotel_listing(listing) for listing in hotel_listings]

## Text Splitting, Embedding, and Vector-Store

In [4]:
## Text Splitting
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Initialize the text splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,  # Adjust based on your needs and model's context window
    chunk_overlap=200,
    length_function=len,
)

# Split the serialized listings
split_texts = []
for text in serialized_listings:
    split_texts.extend(text_splitter.split_text(text))

In [6]:
## Embedding and VectorStore
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma

from langchain.schema import Document
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

# Set your OpenAI API key
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')

# Initialize the OpenAI embeddings
embeddings = OpenAIEmbeddings()

# Create Document objects
documents = [
    Document(
        page_content=chunk,
        metadata={"source": f"listing_{i}"}
    ) for i, chunk in enumerate(split_texts)
]

# Create and persist the Chroma database
db = Chroma.from_documents(
    documents=documents,
    embedding=embeddings,
    persist_directory="./chroma_db"
)


## Retriever as a Tool

In [7]:
# create a retriever from the database
retriever = db.as_retriever(search_kwargs={"k": 10}) # user-defined

# test the retriever
query = "hotels near Toronto Airport"
retrieved_docs = retriever.invoke(query)

for i, doc in enumerate(retrieved_docs, 1):
    print(f"Document {i}:")
    print(doc.page_content[:200] + "...")  # Print first 200 characters
    print(f"Metadata: {doc.metadata}")
    print("-" * 50)



Document 1:
Hotel URL: https://www.booking.com/hotel/ca/econo-lodge-inn-suites.html?checkin=2024-08-23&checkout=2024-08-25&selected_currency=CAD&lang=en-us&group_adults=8&group_children=0&no_rooms=4
Name: Quality...
Metadata: {'source': 'listing_0'}
--------------------------------------------------
Document 2:
Hotel URL: https://www.booking.com/hotel/ca/travelodge-toronto-airport.html?checkin=2024-08-23&checkout=2024-08-25&selected_currency=CAD&lang=en-us&group_adults=8&group_children=0&no_rooms=4
Name: Dou...
Metadata: {'source': 'listing_126'}
--------------------------------------------------
Document 3:
Hotel URL: https://www.booking.com/hotel/ca/econo-lodge-inn-suites.html?checkin=2024-08-23&checkout=2024-08-25&selected_currency=CAD&lang=en-us&group_adults=8&group_children=0&no_rooms=4
Name: Quality...
Metadata: {'source': 'listing_0'}
--------------------------------------------------
Document 4:
Hotel URL: https://www.booking.com/hotel/ca/hilton-garden-inn-toronto-airport.html?

In [8]:
# creating retrieval chain
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
from crewai_tools import BaseTool
from langchain_community.llms import Ollama

llm = Ollama(model="llama3")

# Create a RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

# Test!
result = qa_chain.invoke("Find hotels near Toronto Airport")
print(result['result'])

Based on the provided context, I can help you find hotels near Toronto Pearson International Airport.

The hotel mentioned in the text is NU Hotel Toronto Airport. It is located at 6465 Airport Road, L4V 1E4 Mississauga, Canada, which is approximately 0.9 mi away from the airport.


In [9]:
from crewai_tools import BaseTool

class BookingSearchTool(BaseTool):
    name: str = "Hotel Information Search Tool"
    description: str = "Search any hotel related information."

    def _run(self, query: str) -> str:
        # Implementation goes here
        result = qa_chain.invoke(query)
        # Extract the answer and source documents
        answer = result['result']
        sources = result['source_documents']
        
        # Format the output
        output = f"Answer: {answer}\n\nSources:\n"
        for i, doc in enumerate(sources, 1):
            output += f"{i}. {doc.page_content[:100]}...\n"
        
        return output

# booking tool
booking_tool = BookingSearchTool()

In [10]:
from langchain.tools import tool

# calculation
class CalculatorTools():

  @tool("Make a calculation")
  def calculate(operation):
    """Useful to perform any mathematical calculations, 
    like sum, minus, multiplication, division, etc.
    The input to this tool should be a mathematical 
    expression, a couple examples are `200*7` or `5000/2*10`
    """
    return eval(operation)

calculator_tool = CalculatorTools.calculate

In [12]:
import os

from langchain_google_community import GooglePlacesTool
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv()) # important line if cannot load api key

# Getting the api keys from the .env file
os.environ["GPLACES_API_KEY"] = os.getenv('GPLACES_API_KEY')

# load tools
places_tool = GooglePlacesTool()

# list of tools
tools = [booking_tool, places_tool, calculator_tool]

In [13]:
from crewai import Agent, Task, Crew
from crewai.process import Process

# Define the Agents
travel_expert = Agent(
    role='Hotel Researcher',
    goal='Efficiently locate and compile a comprehensive list of suitable hotel options in the specified location, \
        adhering to the given criteria such as check-in/out dates, number of guests, and any other specific requirements. \
        Ensure the gathered information is accurate, up-to-date, and includes key details like pricing, amenities, and guest ratings.',
    backstory='You are an experienced digital travel concierge with a keen eye for detail and a passion for finding the perfect accommodations.\
        With years of experience in the hospitality industry and a vast knowledge of global hotel chains and boutique properties, you\
        have honed your skills in navigating various booking platforms and hotel databases. \
        Your expertise lies in quickly sifting through numerous options to identify the most suitable choices for travelers, \
        taking into account factors such as location, price, amenities, and guest reviews.',
    verbose=True,
    allow_delegation=False,
    llm=llm,
    tools=tools,   
)

hotel_reviewer = Agent(
    role='Hotel Quality Analyst',
    goal='Thoroughly evaluate and compare hotel options to provide detailed, unbiased assessments that help travelers make informed decisions. \
        Analyze each hotel\'s amenities, location, value for money, and guest experiences to create comprehensive reviews that highlight strengths, weaknesses, and unique features.',
    backstory='You are a seasoned travel industry professional with over a decade of experience in hotel evaluation and critique. \
        Your background includes working as a luxury hotel inspector, a travel journalist for renowned publications, and a consultant for hotel rating systems. \
        This diverse experience has honed your ability to assess accommodations from multiple perspectives, considering both objective criteria and subjective guest experiences. \
        Your reviews are known for their depth, fairness, and ability to capture the essence of each property. You have a particular talent for identifying hidden gems and spotting potential issues that might affect a guest\'s stay. \
        Your expertise covers a wide range of accommodations, from budget-friendly options to ultra-luxury resorts, and you\'re adept at evaluating hotels in various cultural contexts around the world.',
    verbose=True,
    allow_delegation=False,
    llm=llm,
    tools=tools, 
)

In [14]:
# Define the Tasks
hotel_search_task = Task(
    description="Search for 5 hotels in {location} for {number_of_people} adult people, checking in on {check_in} and checking out on {check_out}.",
    agent=travel_expert,
    expected_output="All the details of a specifically chosen accommodation.")

hotel_review_task = Task(
    description="Based on the recommendations provided, pick the best options based on ratings, reviews, and facilities available. \
    Consider that Budget is {budget} CAD. Try to find accommodations in and around the Budget.",
    expected_output="All the details of a specifically chosen accommodation including the price, URL, and any image if available.",
    agent=hotel_reviewer,
)

In [15]:
from crewai.process import Process

# Create the Crew
travel_agent_crew = Crew(
    agents=[travel_expert, hotel_reviewer],
    tasks=[hotel_search_task, hotel_review_task],
    verbose=True,
    process=Process.sequential,
    # step_callback=[conversation_logger]
)

event_criteria = {
    'location': 'Brampton, Ontario',
    'check_in': '23rd August, 2024',
    'check_out': '25th August, 2024',
    'number_of_people': 2,
    'budget': 600
    }

result = travel_agent_crew.kickoff(inputs=event_criteria)

[1m[95m [2024-07-14 14:01:27][DEBUG]: == Working Agent: Hotel Researcher[00m
[1m[95m [2024-07-14 14:01:27][INFO]: == Starting Task: Search for 5 hotels in Brampton, Ontario for 2 adult people, checking in on 23rd August, 2024 and checking out on 25th August, 2024.[00m


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mThought: I need to start by searching for hotels in Brampton, Ontario. Then, I'll narrow down the options based on check-in and check-out dates.

Action: Hotel Information Search Tool
Action Input: {'query': 'hotels in Brampton, Ontario[0m[32;1m[1;3mI see what's going on here! It seems like there's an issue with the input format for the Hotel Information Search Tool. Let me try again.

Thought:
Action: Hotel Information Search Tool
Action Input: {"query": "hotels in Brampton, Ontario[0m[95m 

Answer: Based on the provided context, there are two hotels mentioned:

1. Hilton Garden Inn Toronto/Brampton (https://www.booking.com/hotel/ca/brampton-2648

In [16]:
from IPython.display import Markdown

Markdown(result)

**Best Option:**

* **Hotel:** Homewood Suites by Hilton Toronto-Oakville
* **Price:** CAD 549 (Booking.com)
* **URL:** https://www.booking.com/hotel/ca/homewood-suites-by-hilton-r-toronto-oakville.html?checkin=2024-08-23&checkout=2024-08-25
* **Image:**

Note: The prices may vary based on the time of year, room type, and availability.

## Gradio UI

In [19]:
import gradio as gr
from crewai import Crew, Process
from langchain.chat_models import ChatOpenAI
from IPython.display import Markdown

# Import your agents and tasks here
# from your_module import travel_expert, hotel_reviewer, hotel_finalizer, hotel_search_task, hotel_review_task, hotel_finalize_task

def run_travel_agent_crew(location, check_in, check_out, number_of_people, budget):
    # Create the Crew
    travel_agent_crew = Crew(
    agents=[travel_expert, hotel_reviewer],
    tasks=[hotel_search_task, hotel_review_task],
    verbose=True,
    process=Process.sequential,
    )

    event_criteria = {
        'location': location,
        'check_in': check_in,
        'check_out': check_out,
        'number_of_people': int(number_of_people),
        'budget': float(budget)
    }

    result = travel_agent_crew.kickoff(inputs=event_criteria)
    
    # Convert the result to Markdown
    markdown_result = Markdown(result).data
    return markdown_result

# Create Gradio interface
iface = gr.Interface(
    fn=run_travel_agent_crew,
    inputs=[
        gr.Textbox(label="Location"),
        gr.Textbox(label="Check-in Date"),
        gr.Textbox(label="Check-out Date"),
        gr.Number(label="Number of People"),
        gr.Number(label="Budget")
    ],
    outputs=gr.Markdown(label="Result"),
    title="Travel Agent Crew",
    description="Enter your travel details to get hotel recommendations.",
    allow_flagging="never",
)

# Launch the app
iface.launch()

Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.






[1m[95m [2024-07-14 17:42:11][DEBUG]: == Working Agent: Hotel Researcher[00m
[1m[95m [2024-07-14 17:42:11][INFO]: == Starting Task: Search for 5 hotels in Brampton, Ontario for 2 adult people, checking in on 2024-08-23 and checking out on 2024-08-25.[00m


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mThought:

Action: Hotel Information Search Tool
Action Input: {'query': 'Brampton, Ontario hotels 2024-08-23 2024-08-25[0m[32;1m[1;3mLet's try again!

Thought:
We need to search for hotel options in Brampton, Ontario for two adult people checking in on August 23rd and checking out on August 25th.

Action: Hotel Information Search Tool
Action Input: {'query': "hotels in Brampton, Ontario, check-in 2024-08-23, check-out 2024-08-25, adults=2[0m[32;1m[1;3mThought:
Let's try another approach. Since we need to search for hotel options in Brampton, Ontario, and also specify some details like dates and number of guests.

Action: google_places
Action Input: {'search_qu