In [None]:
!pip install playwright

!playwright install

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import csv

In [2]:
import pandas as pd
from playwright.async_api import async_playwright
import asyncio

async def scrape_almeria():
    url = "https://www.booking.com/searchresults.es.html?lang=es&selected_currency=EUR&dest_id=1363&dest_type=region&ac_langcode=es&checkin=2025-05-07&checkout=2025-05-08&nflt=ht_id%3D204&shw_aparth=0"

    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        context = await browser.new_context(user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
        page = await context.new_page()
        await page.goto(url, timeout=60000)

        # Aceptar cookies si aparece
        try:
            await page.click("button:has-text('Aceptar')", timeout=5000)
            print("Cookies aceptadas.")
        except:
            print("No se mostró banner de cookies.")

        # Hacer scroll lento
        for _ in range(15):
            await page.mouse.wheel(0, 1500)
            await page.wait_for_timeout(1000)

        # Esperar por los resultados
        try:
            await page.wait_for_selector('[data-testid="property-card"]', timeout=30000)
        except:
            print("❌ No se encontró ningún hotel. Posible bloqueo o la página está vacía.")
            content = await page.content()
            print("Fragmento del HTML para diagnóstico:\n", content[:2000])
            await browser.close()
            return

        hotels = await page.locator('[data-testid="property-card"]').all()
        print(f"✅ Se encontraron {len(hotels)} hoteles.")

        hotels_data = []

        for hotel in hotels:
            data = {}
            try:
                data['Hotel'] = await hotel.locator('[data-testid="title"]').inner_text()
            except:
                data['Hotel'] = ''
            try:
                data['Precio'] = await hotel.locator('[data-testid="price-and-discounted-price"]').inner_text()
            except:
                data['Precio'] = ''
            # Navegar a la página de detalle para obtener coordenadas
            try:
                # Asumiendo que el enlace a la página de detalle está dentro de un <a> con data-testid="title" o un padre cercano
                detail_link_element = await hotel.locator('a[data-testid="title"]').first
                if detail_link_element:
                    detail_url = await detail_link_element.get_attribute('href')
                    if detail_url:
                        print(f"Navigating to detail page: {detail_url}")
                        # Usar un nuevo contexto para evitar interferencias con la página principal
                        detail_context = await browser.new_context(user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
                        detail_page = await detail_context.new_page()
                        await detail_page.goto(detail_url, timeout=60000)
                        # Esperar por el elemento de coordenadas
                        try:
                            # Buscar el elemento con el atributo data-atlas-latlng
                            coords_element = await detail_page.locator('[data-atlas-latlng]').first
                            if coords_element:
                                coords = await coords_element.get_attribute('data-atlas-latlng')
                                data['Coordenadas'] = coords
                                print(f"Extracted coordinates: {coords}")
                            else:
                                data['Coordenadas'] = ''
                                print("Coordinates element not found on detail page.")
                        except Exception as e:
                            data['Coordenadas'] = ''
                            print(f"Error extracting coordinates: {e}")
                        finally:
                            # Cerrar la página de detalle y su contexto
                            await detail_page.close()
                            await detail_context.close()
                    else:
                        data['Coordenadas'] = ''
                        print("Detail URL not found.")
                else:
                    data['Coordenadas'] = ''
                    print("Detail link element not found.")
            except Exception as e:
                data['Coordenadas'] = ''
                print(f"Error navigating to detail page or finding link: {e}")
            hotels_data.append(data)

        df = pd.DataFrame(hotels_data)
        df.to_csv("hotels_almeria.csv", index=False)
        print("✅ Archivo 'hotels_almeria.csv' guardado con éxito.")
        await browser.close()

# Ejecutar en Colab
await scrape_almeria()

