In [1]:
import sys
import os
import json
import random
from datetime import datetime, timedelta
import traceback
from IPython.display import display, HTML

# Add the project root to Python path
notebook_dir = os.path.dirname(os.path.abspath(''))
project_root = os.path.dirname(notebook_dir)

# Print paths for debugging
print("Notebook directory:", notebook_dir)
print("Project root:", project_root)
print("Current working directory:", os.getcwd())

# Add project root to Python path
if project_root not in sys.path:
    sys.path.append(project_root)

try:
    # Try importing the scraper
    from src.scrapers.kayak import KayakHotelScraper
    print("‚úÖ Successfully imported KayakHotelScraper")
except Exception as e:
    print("‚ùå Error importing KayakHotelScraper:")
    print(str(e))
    traceback.print_exc()

def print_header(text, level=1):
    """Print formatted header with emojis"""
    emoji_map = {
        'start': 'üöÄ',
        'init': '‚öôÔ∏è',
        'progress': 'üìä',
        'success': '‚úÖ',
        'error': '‚ùå',
        'info': '‚ÑπÔ∏è',
        'warning': '‚ö†Ô∏è',
        'save': 'üíæ',
        'complete': 'üèÅ'
    }
    
    prefix = '=' * (level * 2)
    emoji = emoji_map.get(text.lower().split()[0], 'üìå')
    print(f"\n{prefix} {emoji} {text} {prefix}")

def print_section(text):
    """Print section separator with text"""
    print(f"\n{'='*20} {text} {'='*20}")

def print_hotel_details(hotel):
    """Print detailed hotel information"""
    print("\nüìç Basic Information:")
    print(f"  ‚Ä¢ Name: {hotel.get('hotel_name', 'N/A')}")
    print(f"  ‚Ä¢ Location: {hotel.get('location', 'N/A')}")
    print(f"  ‚Ä¢ Price: {hotel.get('price', 'N/A')}")

    if isinstance(hotel.get('review_scores'), dict):
        print("\n‚≠ê Ratings & Reviews:")
        scores = hotel['review_scores']
        print(f"  ‚Ä¢ Rating: {scores.get('rating', 'N/A')}")
        print(f"  ‚Ä¢ Review Count: {scores.get('count', 'N/A')}")
    
    if hotel.get('rooms'):
        print("\nüõèÔ∏è Room Options:")
        for i, room in enumerate(hotel['rooms'], 1):
            print(f"\n  Room Type {i}:")
            print(f"    ‚Ä¢ Type: {room.get('room_type', 'N/A')}")
            price = room.get('price')
            if isinstance(price, (int, float)):
                print(f"    ‚Ä¢ Price: ${price:.2f}")
            else:
                print(f"    ‚Ä¢ Price: {price or 'N/A'}")
            print(f"    ‚Ä¢ Beds: {room.get('bed_configuration', 'N/A')}")
            print(f"    ‚Ä¢ Cancellation: {room.get('cancellation_policy', 'N/A')}")
            print(f"    ‚Ä¢ Board: {room.get('board_type', 'N/A')}")
            if room.get('special_conditions'):
                print("    ‚Ä¢ Special Conditions:")
                for condition in room['special_conditions']:
                    print(f"      - {condition}")

    if hotel.get('amenities'):
        print("\nüè® Amenities:")
        for amenity in hotel['amenities'][:10]:  # Show first 10 amenities
            print(f"  ‚Ä¢ {amenity}")
        if len(hotel['amenities']) > 10:
            print(f"  ... and {len(hotel['amenities']) - 10} more")

    if hotel.get('detail_url'):
        print("\nüîó Hotel URL:")
        print(f"  {hotel['detail_url']}")
    
    print("\n" + "-"*50)

def test_scraper(limit=5):
    """Test the hotel scraper with detailed logging"""
    print_header("Start Scraper Test")
    
    # Create data directory if it doesn't exist
    os.makedirs('data', exist_ok=True)

    # Set search parameters
    city = "Errachidia"
    check_in = datetime(2025, 1, 10)
    check_out = datetime(2025, 1, 14)

    # Print test configuration
    print(f"\nüåç Testing configuration:")
    print(f"  ‚Ä¢ City: {city}")
    print(f"  ‚Ä¢ Check-in: {check_in.date()}")
    print(f"  ‚Ä¢ Check-out: {check_out.date()}")
    print(f"  ‚Ä¢ Hotel limit: {limit}")
    
    try:
        print_header("Initialize Scraper", level=2)
        scraper = KayakHotelScraper(
            city=city,
            check_in_date=check_in,
            check_out_date=check_out
        )

        print_header("Start Scraping", level=2)
        result = scraper.scrape_hotels(limit=limit)
        
        if result and isinstance(result, dict):
            hotels = result.get('hotels', [])
            if hotels:
                print_header("Success", level=2)
                print(f"Successfully scraped {len(hotels)} hotels")
                
                # Save results
                scraper.save_results()
                print("\nüíæ Results saved to: data/hotel_data.json")
                
                # Print detailed results
                print_header("Detailed Results", level=2)
                for idx, hotel in enumerate(hotels, 1):
                    print_section(f"Hotel {idx}: {hotel.get('hotel_name', 'Unknown')}")
                    print_hotel_details(hotel)
                    
                # Data file verification
                try:
                    with open('data/hotel_data.json', 'r', encoding='utf-8') as f:
                        saved_data = json.load(f)
                    print(f"\n‚úÖ Verified saved data: {len(saved_data['hotels'])} hotels in JSON file")
                except Exception as e:
                    print(f"\n‚ö†Ô∏è Error verifying saved data: {str(e)}")
                    
            else:
                print_header("Error", level=2)
                print("No hotels were found in the results")
        else:
            print_header("Error", level=2)
            print("No valid results returned from scraper")
        
    except Exception as e:
        print_header("Error", level=2)
        print(f"An error occurred during scraping:")
        print(str(e))
        print("\nFull traceback:")
        traceback.print_exc()
        
    finally:
        try:
            scraper.close()
            print("\nüîí Scraper closed successfully")
        except:
            print("\n‚ö†Ô∏è Error closing scraper")
        print_header("Complete", level=2)

if __name__ == "__main__":
    # Run the test
    test_scraper()

Notebook directory: /app
Project root: /
Current working directory: /app/notebooks
‚úÖ Successfully imported KayakHotelScraper

== üöÄ Start Scraper Test ==

üåç Testing configuration:
  ‚Ä¢ City: Errachidia
  ‚Ä¢ Check-in: 2025-01-10
  ‚Ä¢ Check-out: 2025-01-14
  ‚Ä¢ Hotel limit: 5

==== üìå Initialize Scraper ====


2024-12-31 22:30:23,308 [INFO] WebDriver initialized successfully
2024-12-31 22:30:23,319 [INFO] Loading URL (attempt 1): https://www.kayak.com/hotels/Errachidia-c52508/2025-01-10/2025-01-14/2adults?sort=rank_a



==== üöÄ Start Scraping ====


2024-12-31 22:31:08,586 [INFO] Found 28 hotels
2024-12-31 22:31:19,707 [ERROR] Error extracting basic hotel info: name 'NoSuchElementException' is not defined
2024-12-31 22:31:30,819 [ERROR] Error extracting basic hotel info: name 'NoSuchElementException' is not defined
2024-12-31 22:31:41,914 [ERROR] Error extracting basic hotel info: name 'NoSuchElementException' is not defined
2024-12-31 22:31:52,796 [ERROR] Error extracting basic hotel info: name 'NoSuchElementException' is not defined
2024-12-31 22:32:03,997 [ERROR] Error extracting basic hotel info: name 'NoSuchElementException' is not defined
2024-12-31 22:32:04,040 [INFO] Loading detail page: https://www.kayak.com/hotels/Kasbah-Hotel-Camping-Jurassique,Errachidia-p170319-h542701-details/2025-01-10/2025-01-14/2adults?psid=ejHEPnA3Lr&pm=daybase#overview
2024-12-31 22:32:04,043 [INFO] Loading URL (attempt 1): https://www.kayak.com/hotels/Kasbah-Hotel-Camping-Jurassique,Errachidia-p170319-h542701-details/2025-01-10/2025-01-14/2adul


==== ‚úÖ Success ====
Successfully scraped 5 hotels

üíæ Results saved to: data/hotel_data.json

==== üìå Detailed Results ====


üìç Basic Information:
  ‚Ä¢ Name: Kasbah Hotel Camping Jurassique
  ‚Ä¢ Location: Nearby - Errachidia Chamber of Commerce, Garden Boutalamine
  ‚Ä¢ Price: $22

‚≠ê Ratings & Reviews:
  ‚Ä¢ Rating: 8.5
  ‚Ä¢ Review Count: 924

üõèÔ∏è Room Options:

  Room Type 1:
    ‚Ä¢ Type: Standard Double Room
    ‚Ä¢ Price: $22.00
    ‚Ä¢ Beds: 1 double bed
    ‚Ä¢ Cancellation: Free cancellation
    ‚Ä¢ Board: None

  Room Type 2:
    ‚Ä¢ Type: Twin Room with Garden View
    ‚Ä¢ Price: $24.00
    ‚Ä¢ Beds: 2 twin beds
    ‚Ä¢ Cancellation: Free cancellation
    ‚Ä¢ Board: None

  Room Type 3:
    ‚Ä¢ Type: Standard Triple Room
    ‚Ä¢ Price: $27.00
    ‚Ä¢ Beds: None
    ‚Ä¢ Cancellation: Free cancellation
    ‚Ä¢ Board: None

  Room Type 4:
    ‚Ä¢ Type: Classic Quadruple Room
    ‚Ä¢ Price: $33.00
    ‚Ä¢ Beds: None
    ‚Ä¢ Cancellation: None
    ‚Ä¢ Board: None

2024-12-31 22:47:26,320 [INFO] WebDriver closed successfully



üîí Scraper closed successfully

==== üèÅ Complete ====
