In [4]:
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
import time
import pandas as pd
import json
import os

# Initialize Chrome
driver = uc.Chrome()

def scroll_to_bottom():
    """Scrolls to the bottom of the page to load more hotels."""
    last_height = driver.execute_script("return document.body.scrollHeight")
    
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)  # Wait for hotels to load
        new_height = driver.execute_script("return document.body.scrollHeight")
        
        if new_height == last_height:
            break
        last_height = new_height

def scrape_booking(city, checkin_date, checkout_date):
    """Scrapes hotel details from Booking.com"""
    search_url = f"https://www.booking.com/searchresults.html?ss={city}&checkin={checkin_date}&checkout={checkout_date}"
    driver.get(search_url)
    time.sleep(6)

    print("Scrolling to load all hotels...")
    scroll_to_bottom()

    hotels = []

    hotel_elements = driver.find_elements(By.CSS_SELECTOR, "div[data-testid='property-card']")
    print(f"Total Hotels Found: {len(hotel_elements)}")

    for hotel in hotel_elements:
        try:
            name = hotel.find_element(By.CSS_SELECTOR, "div[data-testid='title']").text
        except:
            name = "N/A"

        try:
            price = hotel.find_element(By.CSS_SELECTOR, "span[data-testid='price-and-discounted-price']").text
        except:
            price = "N/A"

        try:
            rating = hotel.find_element(By.CSS_SELECTOR, "div[data-testid='review-score']").text
        except:
            rating = "N/A"

        try:
            address = hotel.find_element(By.CSS_SELECTOR, "span[data-testid='address']").text
        except:
            address = "N/A"

        hotels.append({
            "name": name,
            "price": price,
            "rating": rating,
            "address": address,
            "city": city,
            "checkin": checkin_date,
            "checkout": checkout_date
        })

    # Folder Path
    folder_path = "./data/"
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    # Save to JSON
    if hotels:
        json_path = os.path.join(folder_path, f"{city}_{checkin_date}_{checkout_date}.json")
        with open(json_path, "w") as json_file:
            json.dump(hotels, json_file, indent=4)

        print(f"✅ Data Saved in JSON: {json_path}")
    else:
        print("❌ No Data Found!")

# Example Call
scrape_booking("Chandigarh", "2025-03-10", "2025-03-15")

# Close Driver
driver.quit()


Scrolling to load all hotels...
Total Hotels Found: 75
✅ Data Saved in JSON: ./data/Chandigarh_2025-03-10_2025-03-15.json


In [3]:
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
import time
import pandas as pd
import json
import os

# Initialize Chrome
driver = uc.Chrome()

def scroll_to_bottom():
    """Scrolls to the bottom of the page to load more hotels."""
    last_height = driver.execute_script("return document.body.scrollHeight")
    
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)  # Wait for hotels to load
        new_height = driver.execute_script("return document.body.scrollHeight")
        
        if new_height == last_height:
            break
        last_height = new_height

def get_reviews(hotel):
    """Fetches reviews from each hotel page"""
    try:
        hotel.find_element(By.CSS_SELECTOR, "a[data-testid='availability-cta-btn']").click()
        time.sleep(6)

        # Scroll to reviews section
        try:
            driver.execute_script("window.scrollBy(0, 800);")
            time.sleep(2)
        except:
            pass

        reviews = []
        review_elements = driver.find_elements(By.CSS_SELECTOR, "div[data-testid='review']")
        
        if len(review_elements) > 0:
            print(f"✅ Found {len(review_elements)} Reviews")
        else:
            print("⚠️ No Reviews Section Found")

        for review in review_elements:
            try:
                comment = review.find_element(By.CSS_SELECTOR, "span[data-testid='review-comment']").text
                reviews.append(comment)
            except:
                pass

        return reviews
    except Exception as e:
        print(f"Error while fetching reviews: {str(e)}")
        return []

def scrape_booking(city, checkin_date, checkout_date):
    """Scrapes hotel details from Booking.com"""
    search_url = f"https://www.booking.com/searchresults.html?ss={city}&checkin={checkin_date}&checkout={checkout_date}"
    driver.get(search_url)
    time.sleep(6)

    print("Scrolling to load all hotels...")
    scroll_to_bottom()

    hotels = []
    hotel_elements = driver.find_elements(By.CSS_SELECTOR, "div[data-testid='property-card']")
    print(f"Total Hotels Found: {len(hotel_elements)}")

    for i, hotel in enumerate(hotel_elements):
        try:
            name = hotel.find_element(By.CSS_SELECTOR, "div[data-testid='title']").text
        except:
            name = "N/A"

        try:
            price = hotel.find_element(By.CSS_SELECTOR, "span[data-testid='price-and-discounted-price']").text
        except:
            price = "N/A"

        try:
            rating = hotel.find_element(By.CSS_SELECTOR, "div[data-testid='review-score']").text
        except:
            rating = "N/A"

        try:
            address = hotel.find_element(By.CSS_SELECTOR, "span[data-testid='address']").text
        except:
            address = "N/A"

        print(f"{i+1}. {name} | {rating}")

        reviews = get_reviews(hotel)
        print(f"Total Reviews Collected: {len(reviews)}")

        hotels.append({
            "name": name,
            "price": price,
            "rating": rating,
            "address": address,
            "reviews": reviews,
            "city": city,
            "checkin": checkin_date,
            "checkout": checkout_date
        })

    folder_path = "./data/"
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    if hotels:
        json_path = os.path.join(folder_path, f"{city}_{checkin_date}_{checkout_date}.json")
        with open(json_path, "w") as json_file:
            json.dump(hotels, json_file, indent=4)
        print(f"✅ Data Saved in JSON: {json_path}")
    else:
        print("❌ No Data Found!")

# Example Call
scrape_booking("Chandigarh", "2025-03-10", "2025-03-15")

# Close Driver
driver.quit()


Scrolling to load all hotels...
Total Hotels Found: 75
1. Mercure Chandigarh Tribune Chowk | Scored 7.8
7.8
Good
58 reviews
⚠️ No Reviews Section Found
Total Reviews Collected: 0
2. Novotel Chandigarh Tribune Chowk | Scored 8.4
8.4
Very good
648 reviews
⚠️ No Reviews Section Found
Total Reviews Collected: 0
3. Taj Chandigarh | Scored 8.3
8.3
Very good
880 reviews
⚠️ No Reviews Section Found
Total Reviews Collected: 0
4. Hyatt Regency Chandigarh | Scored 8.5
8.5
Very good
1,037 reviews
⚠️ No Reviews Section Found
Total Reviews Collected: 0
5. Hotel The Pearl Grand - Top Rated ! Most Awarded ! Parking ! Lift ! Luxury Rooms ! Best Selling | Scored 6.8
6.8
Review score
49 reviews
⚠️ No Reviews Section Found
Total Reviews Collected: 0
6. Radisson Chandigarh Zirakpur | Scored 6.8
6.8
Review score
954 reviews
⚠️ No Reviews Section Found
Total Reviews Collected: 0
7. Hyatt Centric Sector17 Chandigarh | Scored 8.5
8.5
Very good
680 reviews
⚠️ No Reviews Section Found
Total Reviews Collected: 0
