In [None]:
import sys
import os
import json
import random
from datetime import datetime, timedelta
import traceback
from IPython.display import display, HTML

# Add the project root to Python path
notebook_dir = os.path.dirname(os.path.abspath(''))
project_root = os.path.dirname(notebook_dir)

# Print paths for debugging
print("Notebook directory:", notebook_dir)
print("Project root:", project_root)
print("Current working directory:", os.getcwd())

# Add project root to Python path
if project_root not in sys.path:
    sys.path.append(project_root)

try:
    # Try importing the scraper
    from src.scrapers.kayak import KayakHotelScraper
    print("✅ Successfully imported KayakHotelScraper")
except Exception as e:
    print("❌ Error importing KayakHotelScraper:")
    print(str(e))
    traceback.print_exc()

def print_header(text, level=1):
    """Print formatted header with emojis"""
    emoji_map = {
        'start': '🚀',
        'init': '⚙️',
        'progress': '📊',
        'success': '✅',
        'error': '❌',
        'info': 'ℹ️',
        'warning': '⚠️',
        'save': '💾',
        'complete': '🏁'
    }
    
    prefix = '=' * (level * 2)
    emoji = emoji_map.get(text.lower().split()[0], '📌')
    print(f"\n{prefix} {emoji} {text} {prefix}")

def print_section(text):
    """Print section separator with text"""
    print(f"\n{'='*20} {text} {'='*20}")

def print_hotel_details(hotel):
    """Print detailed hotel information"""
    print("\n📍 Basic Information:")
    print(f"  • Name: {hotel.get('hotel_name', 'N/A')}")
    print(f"  • Location: {hotel.get('location', 'N/A')}")
    print(f"  • Price: {hotel.get('price', 'N/A')}")

    if isinstance(hotel.get('review_scores'), dict):
        print("\n⭐ Ratings & Reviews:")
        scores = hotel['review_scores']
        print(f"  • Rating: {scores.get('rating', 'N/A')}")
        print(f"  • Review Count: {scores.get('count', 'N/A')}")
    
    if hotel.get('rooms'):
        print("\n🛏️ Room Options:")
        for i, room in enumerate(hotel['rooms'], 1):
            print(f"\n  Room Type {i}:")
            print(f"    • Type: {room.get('room_type', 'N/A')}")
            price = room.get('price')
            if isinstance(price, (int, float)):
                print(f"    • Price: ${price:.2f}")
            else:
                print(f"    • Price: {price or 'N/A'}")
            print(f"    • Beds: {room.get('bed_configuration', 'N/A')}")
            print(f"    • Cancellation: {room.get('cancellation_policy', 'N/A')}")
            print(f"    • Board: {room.get('board_type', 'N/A')}")
            if room.get('special_conditions'):
                print("    • Special Conditions:")
                for condition in room['special_conditions']:
                    print(f"      - {condition}")

    if hotel.get('amenities'):
        print("\n🏨 Amenities:")
        for amenity in hotel['amenities'][:10]:  # Show first 10 amenities
            print(f"  • {amenity}")
        if len(hotel['amenities']) > 10:
            print(f"  ... and {len(hotel['amenities']) - 10} more")

    if hotel.get('detail_url'):
        print("\n🔗 Hotel URL:")
        print(f"  {hotel['detail_url']}")
    
    print("\n" + "-"*50)

def test_scraper(limit=5):
    """Test the hotel scraper with detailed logging"""
    print_header("Start Scraper Test")
    
    # Create data directory if it doesn't exist
    os.makedirs('data', exist_ok=True)

    # Set search parameters
    city = "Errachidia"
    check_in = datetime(2025, 1, 10)
    check_out = datetime(2025, 1, 14)

    # Print test configuration
    print(f"\n🌍 Testing configuration:")
    print(f"  • City: {city}")
    print(f"  • Check-in: {check_in.date()}")
    print(f"  • Check-out: {check_out.date()}")
    print(f"  • Hotel limit: {limit}")
    
    try:
        print_header("Initialize Scraper", level=2)
        scraper = KayakHotelScraper(
            city=city,
            check_in_date=check_in,
            check_out_date=check_out
        )

        print_header("Start Scraping", level=2)
        result = scraper.scrape_hotels(limit=limit)
        
        if result and isinstance(result, dict):
            hotels = result.get('hotels', [])
            if hotels:
                print_header("Success", level=2)
                print(f"Successfully scraped {len(hotels)} hotels")
                
                # Save results
                scraper.save_results()
                print("\n💾 Results saved to: data/hotel_data.json")
                
                # Print detailed results
                print_header("Detailed Results", level=2)
                for idx, hotel in enumerate(hotels, 1):
                    print_section(f"Hotel {idx}: {hotel.get('hotel_name', 'Unknown')}")
                    print_hotel_details(hotel)
                    
                # Data file verification
                try:
                    with open('data/hotel_data.json', 'r', encoding='utf-8') as f:
                        saved_data = json.load(f)
                    print(f"\n✅ Verified saved data: {len(saved_data['hotels'])} hotels in JSON file")
                except Exception as e:
                    print(f"\n⚠️ Error verifying saved data: {str(e)}")
                    
            else:
                print_header("Error", level=2)
                print("No hotels were found in the results")
        else:
            print_header("Error", level=2)
            print("No valid results returned from scraper")
        
    except Exception as e:
        print_header("Error", level=2)
        print(f"An error occurred during scraping:")
        print(str(e))
        print("\nFull traceback:")
        traceback.print_exc()
        
    finally:
        try:
            scraper.close()
            print("\n🔒 Scraper closed successfully")
        except:
            print("\n⚠️ Error closing scraper")
        print_header("Complete", level=2)

if __name__ == "__main__":
    # Run the test
    test_scraper()

Notebook directory: /app
Project root: /
Current working directory: /app/notebooks
✅ Successfully imported KayakHotelScraper

== 🚀 Start Scraper Test ==

🌍 Testing configuration:
  • City: Errachidia
  • Check-in: 2025-01-10
  • Check-out: 2025-01-14
  • Hotel limit: 5

==== 📌 Initialize Scraper ====


2025-01-01 22:43:28,729 [INFO] WebDriver initialized successfully
2025-01-01 22:43:28,733 [INFO] Loading URL (attempt 1): https://www.kayak.com/hotels/Errachidia-c52508/2025-01-10/2025-01-14/2adults?sort=rank_a



==== 🚀 Start Scraping ====


2025-01-01 22:44:22,760 [INFO] Found 28 hotels
2025-01-01 22:44:35,264 [INFO] Loading detail page: https://www.kayak.com/hotels/Ighiz-Inn-Resort,Errachidia-p170319-h9063437-details/2025-01-10/2025-01-14/2adults?psid=ejEkK9dOl6&pm=daybase#overview
2025-01-01 22:44:35,271 [INFO] Loading URL (attempt 1): https://www.kayak.com/hotels/Ighiz-Inn-Resort,Errachidia-p170319-h9063437-details/2025-01-10/2025-01-14/2adults?psid=ejEkK9dOl6&pm=daybase#overview
2025-01-01 22:45:09,456 [INFO] Found 2 room elements
2025-01-01 22:45:10,721 [INFO] Successfully extracted room: Double or Twin Room with Garden View
2025-01-01 22:45:12,658 [INFO] Successfully extracted room: Two-Bedroom Bungalow
2025-01-01 22:45:12,661 [INFO] Found 2 room types
2025-01-01 22:47:05,112 [INFO] Loading URL (attempt 1): https://www.kayak.com/hotels/Errachidia-c52508/2025-01-10/2025-01-14/2adults?sort=rank_a
2025-01-01 22:47:51,805 [INFO] Loading detail page: https://www.kayak.com/hotels/Kasbah-Hotel-Camping-Jurassique,Errachidia