In [1]:
# ./scraper/notebooks/scraper_test.ipynb

import sys
import os
import json
import random
from datetime import datetime, timedelta
import traceback

# Add the src directory to the path
sys.path.append('../src')

from hotel_scraper import KayakHotelScraper

# Create data directory if it doesn't exist
os.makedirs('/app/data', exist_ok=True)

# Set search parameters with randomized future dates
check_in = datetime.now() + timedelta(days=random.randint(30, 60))
check_out = check_in + timedelta(days=random.randint(2, 5))

print("Initializing scraper...")
try:
    # Initialize scraper
    scraper = KayakHotelScraper(
        city="New York",
        check_in_date=check_in,
        check_out_date=check_out
    )
    print("Scraper initialized successfully")

    # Run the scraper
    print("Starting scraping process...")
    hotels = scraper.scrape_hotels()
    
    if hotels:
        scraper.save_results()
        print(f"Successfully scraped {len(hotels)} hotels")
    else:
        print("No hotels were scraped")
    
except Exception as e:
    print(f"An error occurred during scraping:")
    print(str(e))
    print("\nFull traceback:")
    traceback.print_exc()
finally:
    try:
        scraper.close()
        print("Scraper closed successfully")
    except:
        pass

# Load and preview the results
try:
    with open('/app/data/hotel_data.json', 'r') as f:
        data = json.load(f)
    if data:
        print(f"\nNumber of hotels scraped: {len(data)}")
        print("\nFirst hotel data:")
        print(json.dumps(data[0], indent=2))
    else:
        print("\nNo data found in the JSON file")
except FileNotFoundError:
    print("No data file found. Make sure the scraping was successful.")
except json.JSONDecodeError:
    print("Error reading the JSON file. The file might be empty or corrupted.")

Initializing scraper...
Webdriver initialized successfully
Scraper initialized successfully
Starting scraping process...
Attempting to load URL: https://www.kayak.com/hotels/new-york/2025-02-10/2025-02-14/2adults
Page loaded successfully
Accessed URL: https://www.kayak.com/hotels/new-york-c15830/2025-02-10/2025-02-14/2adults?sort=rank_a
Initial page load complete. Looking for hotel elements...
Found 28 hotels
Processing 28 hotels...
Processed hotel 1/28: Virgin Hotels New York City
Processed hotel 2/28: NH Collection New York Madison Avenue
Processed hotel 3/28: The Gotham Hotel
Processed hotel 4/28: DoubleTree by Hilton New York Times Square South
Processed hotel 5/28: Hotel Belleclaire
Processed hotel 6/28: Hyatt Place New York Midtown South
Processed hotel 7/28: Renaissance New York Harlem Hotel
Processed hotel 8/28: Artezen Hotel
Processed hotel 9/28: Untitled at 3 Freeman Alley
Processed hotel 10/28: The Jewel Hotel, New York
Processed hotel 11/28: Luma Hotel - Times Square
Proces