# AI Chatbot for booking.com

In [64]:
# This project builds an AI chatbot that can scrape booking.com and look for hotel deals.

###############################################################################
#----------------------------------IMPORTS------------------------------------#
###############################################################################

import time
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.common.exceptions import TimeoutException
from webdriver_manager.chrome import ChromeDriverManager
import os
import json
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr

###############################################################################
#------------------------------GLOBAL VARIABLES-------------------------------#
###############################################################################

# Load API keys from environment variables for secure authentication
load_dotenv()
openai_api_key = os.getenv('OPENAI_API_KEY') 

# Initialize AI model for chatbot capabilities
openai = OpenAI()

###############################################################################
#-----------------------------------CLASSES-----------------------------------#
###############################################################################

class HotelBooking:
    """A class to automate interactions with Booking.com using Selenium WebDriver."""

    def __init__(self):
        """
        Initializes the HotelBooking class.
        
        - Sets up the Selenium WebDriver.
        """
        self.driver = self.get_driver()

    def get_driver(self):
        """
        Configures and initializes the Selenium WebDriver with necessary options.
        
        - Disables GPU usage for better compatibility.
        - Sets a fixed window size for consistency in webpage rendering.
        - Moves the window off-screen (useful for headless-like behavior without enabling headless mode).
        - Disables the sandbox mode (necessary for some environments like Docker).
        - Prevents excessive memory usage by disabling /dev/shm (useful in constrained environments).
        
        Returns:
            webdriver.Chrome: A configured instance of the Chrome WebDriver.
        """
        options = Options()
        options.add_argument("--disable-gpu")  # Disables GPU acceleration for improved stability
        options.add_argument("--window-size=1920x1080")  # Sets a fixed window size for consistent UI interactions
        options.add_argument("--window-position=-2000,0")  # Moves window off-screen
        options.add_argument("--no-sandbox")  # Required for running in some restricted environments
        options.add_argument("--disable-dev-shm-usage")  # Helps prevent memory overflow issues

        # Installs and initializes the Chrome WebDriver dynamically using WebDriverManager
        driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
        return driver

    def open_page(self, url="https://www.booking.com/"):
        """
        Opens the specified URL in the web browser controlled by the WebDriver.
        
        - Defaults to opening the Booking.com homepage if no URL is provided.
        - Calls a method to close any popups that might interfere with interactions.

        Args:
            url (str, optional): The webpage URL to be opened. Defaults to "https://www.booking.com/".
        """
        self.driver.get(url)  # Navigates to the specified webpage
        self.close_popups()  # Attempts to close any initial popups

    def close_popups(self):
        """
        Closes any pop-ups that may interfere with user interactions.

        - Attempts to dismiss a sign-in prompt if present.
        - Closes the cookie banner if found.
        """
        try:
            # Wait up to 5 seconds for the sign-in alert to appear
            signin_button = WebDriverWait(self.driver, 5).until(
                EC.presence_of_element_located(
                    (By.CSS_SELECTOR, "[role='dialog'] button[aria-label='Dismiss sign-in info.']")
                )
            )
            # Click the sign-in dismissal button if it appears
            signin_button.click()
            print("Sign-in pop-up window closed.")
        except TimeoutException:
            # If the sign-in pop-up is not found, continue execution
            print("Sign-in pop-up window did not appear, continuing...")

        try:
            # Wait for the "Accept" button on the cookie banner to become clickable
            cookie_banner = WebDriverWait(self.driver, 5).until(
                EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))
            )
            # Click the cookie banner accept button
            cookie_banner.click()
            print("Cookie banner closed.")
        except TimeoutException:
            # If the cookie banner is not found or already closed, continue execution
            print("No cookie banner found or already closed.")

    def enter_destination(self, destination):
        """
        Inputs a destination into the search field and selects the first available suggestion.

        - Waits for the destination field to be located.
        - Sends the destination input and selects the first autocomplete result.
        
        Args:
            destination (str): The location to be searched for booking options.
        """
        # Locate the destination input field by its name attribute
        destination_field = self.driver.find_element(By.NAME, "ss")
        destination_field.send_keys(destination)
        
        # Pause briefly to allow the autocomplete options to populate
        time.sleep(3)
        
        # Select the first result from the autocomplete list
        first_result = WebDriverWait(self.driver, 1).until(
            EC.element_to_be_clickable((By.ID, "autocomplete-result-0"))
        )
        first_result.click()

    def select_dates(self, checkin_date, checkout_date):
        """
        Selects the check-in and check-out dates on the website's calendar.

        - Calls `pick_date()` for both check-in and check-out dates.
        - Ensures the dates are properly selected before proceeding.
        
        Args:
            checkin_date (str): The check-in date to be selected.
            checkout_date (str): The check-out date to be selected.
        """
        self.pick_date(checkin_date)
        self.pick_date(checkout_date)
        print(f"Check-in and check-out dates have been selected: {checkin_date} - {checkout_date}")

    def pick_date(self, date):
        """
        Picks a specific date from the calendar. If the date is not immediately visible,
        it navigates through the calendar until it becomes available.
        
        - Clicks the date if found in the current calendar view.
        - If the date is not found, navigates to the next month and retries.
        
        Args:
            date (str): The date to be selected.
        """
        while True:
            try:
                # Locate and click the desired date if it is available in the current month
                current_month_button = WebDriverWait(self.driver, 2).until(
                    EC.element_to_be_clickable((By.XPATH, f"//span[@aria-label='{date}']"))
                )
                current_month_button.click()
                break  # Exit the loop once the date is selected
            except TimeoutException:
                # If the date is not found, navigate to the next month and retry
                next_month_button = WebDriverWait(self.driver, 2).until(
                    EC.element_to_be_clickable((By.XPATH, "//button[@aria-label='Next month']"))
                )
                next_month_button.click()

    def set_occupancy(self, adults, children, kids_ages, rooms):
        """
        Configures the number of guests and rooms for the search.
        
        - Adjusts the number of adults, children, and rooms.
        - Sets the ages of children using dropdown selectors.
        
        Args:
            adults (int): Number of adults.
            children (int): Number of children.
            kids_ages (list[int]): List of children's ages.
            rooms (int): Number of rooms required.
        """
        # Open the occupancy selection menu
        open_occupancy = WebDriverWait(self.driver, 2).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, "button[data-testid='occupancy-config']"))
        )
        open_occupancy.click()

        # Adjust the number of adults and children
        self.adjust_counter("group_adults", adults)
        self.adjust_counter("group_children", children)

        # Set the ages of the children
        for index, age in enumerate(kids_ages):
            age_dropdowns = self.driver.find_elements(By.CSS_SELECTOR, "select[name='age']")
            select_obj = Select(age_dropdowns[index])
            select_obj.select_by_value(str(age))
            open_occupancy.click()

        # Adjust the number of rooms
        self.adjust_counter("no_rooms", rooms)

        print("Occupancy settings updated successfully.")

    def adjust_counter(self, element_id, target_value):
        """
        Adjusts the plus/minus buttons for a given counter (e.g., adults, children, rooms).
        
        - Retrieves the current counter value.
        - Clicks the appropriate button until the target value is reached.
        
        Args:
            element_id (str): The ID of the element representing the counter.
            target_value (int): The desired value for the counter.
        """
        # Locate the counter container
        counter_div = WebDriverWait(self.driver, 2).until(
            EC.presence_of_element_located((By.ID, element_id))
        ).find_element(By.XPATH, "./parent::div[@class]")
        
        # Find the increment and decrement buttons
        buttons = counter_div.find_elements(By.TAG_NAME, "button")

        # Adjust the value to match the target
        while True:
            current_value = int(self.driver.find_element(By.ID, element_id).get_attribute("aria-valuenow"))
            if current_value > target_value:
                buttons[0].click()  # Decrease value
            elif current_value < target_value:
                buttons[1].click()  # Increase value
            else:
                break  # Exit loop when target value is reached

    def submit_search(self):
        """
        Clicks the search button to initiate the search based on selected criteria.
        
        - Ensures that all necessary selections have been made before proceeding.
        """
        search_button = self.driver.find_element(By.CSS_SELECTOR, 'button[type="submit"]')
        search_button.click()
        print("Search initiated.")

    def select_filter(self, user_filter):
        """
        Selects a sorting filter from the dropdown menu based on user preference.

        - Opens the sorting dropdown.
        - Waits for sorting options to be displayed.
        - Clicks the user-specified sorting filter.
        
        Args:
            user_filter (str): The filter name to be applied to the search results.
        """
        # Locate and click the dropdown menu to reveal sorting options
        dropdown = WebDriverWait(self.driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "//button[@data-testid='sorters-dropdown-trigger']"))
        )
        dropdown.click()

        # Allow time for dropdown options to become visible
        time.sleep(2)

        # Locate and click the desired sorting option
        sort_option = self.driver.find_element(By.CSS_SELECTOR, f'button[aria-label="{user_filter}"]')
        sort_option.click()

    def report_data(self, hotel_no):
        """
        Extracts hotel details from search results.

        - Retrieves the first `hotel_no` hotel listings.
        - Extracts details such as name, rating, address, room type, average rating, total price, and booking link.
        
        Args:
            hotel_no (int): The number of hotels to extract details from.
        
        Returns:
            list[dict]: A list of dictionaries, each containing hotel details.
        """
        # Locate all property cards in search results
        property_cards = WebDriverWait(self.driver, 10).until(
            EC.presence_of_all_elements_located((By.XPATH, "//div[@data-testid='property-card']"))
        )
        selected_properties = property_cards[:hotel_no]
        extracted_data = []

        for property_card in selected_properties:
            # Extract hotel name
            try:
                name_element = property_card.find_element(By.XPATH, ".//div[@data-testid='title']")
                hotel_name = name_element.text.strip()
            except:
                hotel_name = "No hotel name available"

            # Extract star rating
            try:
                rating_container = property_card.find_element(By.XPATH, ".//div[starts-with(@data-testid, 'rating-')]")
                rating_elements = rating_container.find_elements(By.XPATH, ".//span")
                star_rating = str(len(rating_elements)) # Count the number of stars
            except:
                star_rating = "No star rating available"

            # Extract address
            try:
                address_element = property_card.find_element(By.XPATH, ".//span[@data-testid='address']")
                address = address_element.text.strip()
            except:
                address = "No address available"

            # Extract room type
            try:
                room_type_element = property_card.find_element(By.XPATH, ".//h4")
                room_type = room_type_element.text.strip()
            except:
                room_type = "No room type available"

            # Extract average rating
            try:
                rating_element = property_card.find_element(By.XPATH, ".//div[@data-testid='review-score']/div")
                rating = rating_element.text.replace("Scored", "").split("\n")[0] # Remove useless text
            except:
                rating = "No rating available"

            # Extract price
            try:
                price_element = property_card.find_element(By.XPATH, ".//span[@data-testid='price-and-discounted-price']")
                price = price_element.text.replace("&nbsp;", "").strip() # Remove useless text
            except:
                price = "No price available"

            # Extract booking link
            try:
                link_element = property_card.find_element(By.XPATH, ".//a[@data-testid='title-link']")
                link = link_element.get_attribute("href")
            except:
                link = "No link available"

            # Store extracted data in a dictionary
            property_details = {
                "Hotel name": hotel_name,
                "Star rating": star_rating,
                "Address": address,
                "Room type": room_type,
                "Average rating": rating,
                "Total price": price,
                "Link": link
            }

            extracted_data.append(property_details)

        return extracted_data  

    def close_driver(self):
        """
        Closes the WebDriver session properly.
        
        - Ensures that the browser session is properly terminated.
        """
        if self.driver:
            self.driver.quit()
            print("WebDriver closed.")

###############################################################################
#----------------------------------FUNCTIONS----------------------------------#
###############################################################################

def get_hotel_deals(destination, checkin_date, checkout_date, adults, children, kids_ages, rooms, user_filter, hotel_no):  
    """
    Retrieves hotel deals for a given destination, check-in and check-out dates, and occupancy details.
    
    - Opens the booking website and searches for hotels matching the user's criteria.
    - Filters the search results based on the specified sorting preference.
    - Extracts relevant hotel details and returns them in a structured format.
    
    Args:
        destination (str): The city where the user wants to book a hotel.
        checkin_date (str): The check-in date formatted as '9 August 2025'.
        checkout_date (str): The check-out date formatted as '9 August 2025'.
        adults (int): The number of adult guests.
        children (int): The number of children guests.
        kids_ages (list[int]): The ages of children.
        rooms (int): The total number of rooms required.
        user_filter (str): The filter applied to search results.
        hotel_no (int): The number of hotel results to retrieve (max 25).
    
    Returns:
        list[dict]: A list of dictionaries, each containing hotel details.
    """
    hotel_booking = HotelBooking()
        
    # Open the booking website
    hotel_booking.open_page()
    
    # Enter the destination where the user wants to book a hotel
    hotel_booking.enter_destination(destination)
    
    # Select the check-in and check-out dates
    hotel_booking.select_dates(checkin_date, checkout_date)
    
    # Set the number of adults, children, their ages, and rooms required
    hotel_booking.set_occupancy(adults, children, kids_ages, rooms)
    
    # Submit the search request
    hotel_booking.submit_search()
    
    # Close any popups that might interfere with the search results
    hotel_booking.close_popups()
    
    # Apply the specified filter to refine search results
    hotel_booking.select_filter(user_filter)
    
    # Retrieve and extract data from the search results
    search_results = hotel_booking.report_data(hotel_no)
    
    # Close the WebDriver session
    hotel_booking.close_driver()

    return search_results

def chat_with_gpt(message, history):
    """
    Handles a conversation related to hotel booking by interacting with GPT.
    The function processes user queries, calls the appropriate tool when necessary,
    and returns hotel search results.
    
    Parameters:
    - message (str): The latest user input message.
    - history (list): A list of past conversation messages.

    Yields:
    - str: A streaming response containing GPT's message.
    """

    # Define the tool (function) that the AI can use for hotel booking.
    hotel_booking_function = {
        "name": "get_hotel_deals",
        "description": "Retrieve hotel deals for a given destination, check-in and check-out dates and occupancy details. \
        Always ask me for missing details",
        "parameters": {
            "type": "object",
            "properties": {
                "destination": {
                    "type": "string",
                    "description": "The city where the user wants to book a hotel."
                },
                "checkin_date": {
                    "type": "string",
                    "description": "The check-in date, formatted as '9 August 2025' (day month year)."
                },
                "checkout_date": {
                    "type": "string",
                    "description": "The check-out date, formatted as '9 August 2025' (day month year)."
                },
                "adults": {
                    "type": "integer",
                    "description": "The number of adults staying at the hotel, with a minimum requirement of at least one guest."
                },
                "children": {
                    "type": "integer",
                    "description": "The number of children staying at the hotel."
                },
                "kids_ages": {
                    "type": "array",
                    "items": {"type": "integer"},
                    "description": "A list of ages for children staying at the hotel. \
                    This should only be provided if the user specifies they are bringing children. \
                    In that case, prompt the user to provide the ages of the children. \
                    If no children are included in the booking, this should be an empty array."
                },
                "rooms": {
                    "type": "integer",
                    "description": "The total number of rooms needed, with a minimum requirement of one room."
                },
                "user_filter": {
                    "type": "string",
                    "description": "The filter applied to search results. Always ask me for a filter. The only available options are: \
                    'Top picks for long stays', 'Homes & apartments first', 'Price (lowest first)', \
                    'Price (highest first)', 'Best reviewed and lowest price', 'Property rating (high to low)', \
                    'Property rating (low to high)', 'Property rating and price', 'Distance from city centre', 'Top reviewed'. \
                    Select the most appropriate filter based on user preferences and input. \
                    If no filter is specified by the user, default to 'Top picks for long stays'."
                },
                "hotel_no": {
                    "type": "integer",
                    "description": "The number of hotel results to retrieve, with a maximum limit of 25. \
                    If the user requests more than 25 results, inform them of the limit and prompt them to enter a lower number. \
                    Always ask the user how many results to display"
                }
            },
            "required": ["destination", "checkin_date", "checkout_date", "adults", "children", "rooms", "user_filter", "hotel_no"],
            "additionalProperties": False
        }
    }

    # Register the tool with the GPT model.
    tools = [{"type": "function", "function": hotel_booking_function}]

    # Define the system message to guide GPT's behavior.
    system_message = (
        "You are a helpful assistant specialized in finding hotel deals based on user preferences. \
        Always be accurate. If you don't know the answer, say so. "
    )
    
    # Construct the conversation history including the latest user message.
    messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": message}]
    
    # Request a response from OpenAI's GPT model.
    response = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=messages,
        tools=tools
    )

    # Handle tool invocation if the AI determines that a function call is needed.
    if response.choices[0].finish_reason == "tool_calls":
        message = response.choices[0].message
        tool_call = message.tool_calls[0]
        arguments = json.loads(tool_call.function.arguments)
        
        # Extract required parameters from the tool call.
        destination = arguments.get("destination")
        checkin_date = arguments.get("checkin_date")
        checkout_date = arguments.get("checkout_date")
        adults = arguments.get("adults", 1)
        children = arguments.get("children", 0)
        kids_ages = arguments.get("kids_ages", [])
        rooms = arguments.get("rooms", 1)
        user_filter = arguments.get("user_filter", "Top picks for long stays")
        hotel_no = arguments.get("hotel_no", 5)
        
        # Log extracted parameters for debugging.
        print(f"Destination: {destination}")
        print(f"Check-in date: {checkin_date}")
        print(f"Check-out date: {checkout_date}")
        print(f"Adults: {adults}")
        print(f"Children: {children}")
        print(f"Children's ages: {kids_ages}")
        print(f"Rooms: {rooms}")
        print(f"Filter: {user_filter}")
        print(f"Number of results: {hotel_no}")
        
        # Call a function to retrieve hotel deals based on extracted parameters.
        search_results = get_hotel_deals(destination, checkin_date, checkout_date, adults, children, kids_ages, rooms, user_filter, hotel_no)
        
        # Prepare response containing hotel search results.
        response = {
            "role": "tool",
            "content": [{"type": "text", "text": json.dumps(search_results, indent=2)}],
            "tool_call_id": tool_call.id
        }
        
        # Append tool response to the message history and re-query the AI model.
        messages.append(message)
        messages.append(response)
        print("Fetching updated response from OpenAI...")
        response = openai.chat.completions.create(model="gpt-4o-mini", messages=messages)

    # Stream the final AI response back to the user.
    stream = ""
    for chunk in response.choices[0].message.content:
        stream += chunk or ''
        yield stream
    
###############################################################################
#----------------------------------MAIN CODE----------------------------------#
###############################################################################

# Launch the chat interface using Gradio
gr.ChatInterface(fn=chat_with_gpt, type="messages").launch(inbrowser=True)
        

* Running on local URL:  http://127.0.0.1:7914

To create a public link, set `share=True` in `launch()`.




Traceback (most recent call last):
  File "C:\Users\fedem\anaconda3\envs\llms\Lib\site-packages\httpx\_transports\default.py", line 101, in map_httpcore_exceptions
    yield
  File "C:\Users\fedem\anaconda3\envs\llms\Lib\site-packages\httpx\_transports\default.py", line 250, in handle_request
    resp = self._pool.handle_request(req)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\fedem\anaconda3\envs\llms\Lib\site-packages\httpcore\_sync\connection_pool.py", line 256, in handle_request
    raise exc from None
  File "C:\Users\fedem\anaconda3\envs\llms\Lib\site-packages\httpcore\_sync\connection_pool.py", line 236, in handle_request
    response = connection.handle_request(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\fedem\anaconda3\envs\llms\Lib\site-packages\httpcore\_sync\connection.py", line 101, in handle_request
    raise exc
  File "C:\Users\fedem\anaconda3\envs\llms\Lib\site-packages\httpcore\_sync\connection.py", line 78, in handle_request
    stream 