In [1]:
# Requiere: pip install selenium webdriver-manager pandas
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

# ------------ Configuración del driver ------------
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.implicitly_wait(3)

# ------------ Helpers ------------
def safe_click(element):
    try:
        element.click()
    except Exception:
        driver.execute_script("arguments[0].click();", element)

def close_popups():
    try:
        btn = WebDriverWait(driver, 3).until(
            EC.element_to_be_clickable((By.XPATH,
                '//button[.//span[contains(text(),"Aceptar")]]'
            ))
        )
        safe_click(btn)
    except Exception:
        pass

def scroll_incremental(pause=1.5, max_loops=12):
    last_height = driver.execute_script("return document.body.scrollHeight")
    for i in range(max_loops):
        driver.execute_script("window.scrollBy(0, Math.floor(document.body.clientHeight * 0.9));")
        time.sleep(pause)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

# ------------ Inicio ------------
start_url = "https://www.booking.com/searchresults.es.html?ss=Ibagué"
driver.get(start_url)
time.sleep(4)
close_popups()

all_hoteles = []
seen = set()

scroll_incremental(pause=2, max_loops=8)

while True:
    # recolectar hoteles con nombre, puntuación y comentarios
    title_elems = driver.find_elements(By.CSS_SELECTOR, 'div[data-testid="title"], div[data-testid="property-title"]')
    for t in title_elems:
        name = t.text.strip()
        if not name or name in seen:
            continue
        seen.add(name)

        # subir al contenedor del hotel
        container = t.find_element(By.XPATH, "./ancestor::div[@data-testid='property-card']")

        # puntuación (cifra, ej: 8,3)
        try:
            rating = container.find_element(By.XPATH, ".//div[@data-testid='review-score']//div[contains(@class,'dff2e52086')]").text.strip()
        except:
            rating = None

        # cantidad de comentarios (ej: "40 comentarios")
        try:
            reviews = container.find_element(By.XPATH, ".//div[@data-testid='review-score']//div[contains(text(),'comentario')]").text.strip()
        except:
            reviews = None


        all_hoteles.append({
            "Nombre": name,
            "Puntuacion": rating,
            "Comentarios": reviews
        })

    print(f"Hoteles recogidos hasta ahora: {len(all_hoteles)}")

    # intentar botón "Cargar más resultados"
    try:
        boton = WebDriverWait(driver, 3).until(
            EC.element_to_be_clickable((By.XPATH, "//button//span[contains(text(),'Cargar más')]"))
        )
        ActionChains(driver).move_to_element(boton).perform()
        time.sleep(0.5)
        safe_click(boton)
        time.sleep(2)
        continue
    except:
        print("No se encontró más botón de 'Cargar más resultados'.")
        break

# ------------ Guardar resultado final ------------
df = pd.DataFrame(all_hoteles)
df.to_csv("hoteles_ibague.csv", index=False, encoding="utf-8-sig")

print("\n=== TOTAL ALOJAMIENTOS EXTRAÍDOS ===")
print(df)
print(f"\nTotal único: {len(all_hoteles)}")

driver.quit()


Hoteles recogidos hasta ahora: 69
Hoteles recogidos hasta ahora: 94
Hoteles recogidos hasta ahora: 119
Hoteles recogidos hasta ahora: 143
Hoteles recogidos hasta ahora: 168
Hoteles recogidos hasta ahora: 193
Hoteles recogidos hasta ahora: 218
Hoteles recogidos hasta ahora: 242
Hoteles recogidos hasta ahora: 260
No se encontró más botón de 'Cargar más resultados'.

=== TOTAL ALOJAMIENTOS EXTRAÍDOS ===
                                                Nombre Puntuacion  \
0                                 Sonesta Hotel Ibagué        9,0   
1                               Hotel Estelar Altamira        8,9   
2                                            Hotel F25        8,8   
3                                             FR Hotel        8,8   
4    Casa Morales Hotel Internacional y Centro de C...        7,8   
..                                                 ...        ...   
255                             El portal de Bariloche         10   
256                                   Hotel 

In [2]:

# ------------ Configuración del driver ------------
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.implicitly_wait(3)

# ------------ Helpers ------------
def safe_click(element):
    try:
        element.click()
    except Exception:
        driver.execute_script("arguments[0].click();", element)

def close_popups():
    try:
        btn = WebDriverWait(driver, 3).until(
            EC.element_to_be_clickable((By.XPATH,
                '//button[.//span[contains(text(),"Aceptar")]]'
            ))
        )
        safe_click(btn)
    except Exception:
        pass

def scroll_incremental(pause=1.5, max_loops=8):
    last_height = driver.execute_script("return document.body.scrollHeight")
    for i in range(max_loops):
        driver.execute_script("window.scrollBy(0, Math.floor(document.body.clientHeight * 0.9));")
        time.sleep(pause)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

# ------------ Inicio ------------
start_url = "https://www.booking.com/searchresults.es.html?ss=Murillo%2C+Tolima"
driver.get(start_url)
time.sleep(4)
close_popups()

hoteles = []
seen = set()

# Scroll inicial
scroll_incremental()

while True:
    # recolectar alojamientos visibles
    cards = driver.find_elements(By.CSS_SELECTOR, "div[data-testid='property-card']")
    for c in cards:
        try:
            name = c.find_element(By.CSS_SELECTOR, "div[data-testid='title']").text.strip()
        except:
            name = None

        # puntuación
        try:
            rating = c.find_element(By.XPATH, ".//div[@data-testid='review-score']//div[contains(@class,'dff2e52086')]").text.strip()
        except:
            rating = None

        # cantidad de comentarios
        try:
            reviews = c.find_element(By.XPATH, ".//div[@data-testid='review-score']//div[contains(text(),'comentario')]").text.strip()
        except:
            reviews = None

        if name and name not in seen:
            seen.add(name)
            hoteles.append({
                "Nombre": name,
                "Puntuacion": rating,
                "Comentarios": reviews
            })

    # buscar botón "Cargar más resultados"
    try:
        boton = WebDriverWait(driver, 3).until(
            EC.element_to_be_clickable((By.XPATH, "//button//span[contains(text(),'Cargar más')]"))
        )
        ActionChains(driver).move_to_element(boton).perform()
        safe_click(boton)
        time.sleep(3)
        scroll_incremental()
    except:
        break

# ------------ Exportar a CSV ------------
df = pd.DataFrame(hoteles)
df.to_csv("hoteles_murillo.csv", index=False, encoding="utf-8-sig")

print("Total hoteles extraídos:", len(hoteles))
driver.quit()


Total hoteles extraídos: 26


In [3]:
pip install beautifulsoup4


Collecting beautifulsoup4
  Downloading beautifulsoup4-4.13.5-py3-none-any.whl.metadata (3.8 kB)
Collecting soupsieve>1.2 (from beautifulsoup4)
  Downloading soupsieve-2.7-py3-none-any.whl.metadata (4.6 kB)
Downloading beautifulsoup4-4.13.5-py3-none-any.whl (105 kB)
Downloading soupsieve-2.7-py3-none-any.whl (36 kB)
Installing collected packages: soupsieve, beautifulsoup4
Successfully installed beautifulsoup4-4.13.5 soupsieve-2.7
Note: you may need to restart the kernel to use updated packages.


In [None]:
import time
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup

# Inicializar Selenium
driver = webdriver.Chrome()

# URL de ejemplo (puedes cambiar por la de Prado, Murillo, etc.)
url = "https://www.booking.com/searchresults.es.html?ss=El+Prado%2C+Tolima%2C+Colombia"
driver.get(url)
time.sleep(5)

# Scroll para cargar todos los hoteles
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(3)
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

# Extraer todos los links de hoteles
soup = BeautifulSoup(driver.page_source, "html.parser")
hotels = soup.find_all("a", {"data-testid": "title-link"})

data = []

for hotel in hotels:
    hotel_name = hotel.text.strip()
    hotel_url = hotel["href"]

    # Entrar a la página del hotel
    driver.get(hotel_url)
    time.sleep(4)
    soup_hotel = BeautifulSoup(driver.page_source, "html.parser")

    # 1. Descripción
    description_tag = soup_hotel.find("p", {"data-testid": "property-description"})
    description = description_tag.text.strip() if description_tag else "No disponible"

    # 2. Alrededores (poi-block)
    poi_blocks = soup_hotel.find_all("div", {"data-testid": "poi-block"})
    surroundings = []
    for block in poi_blocks:
        title = block.find("div", class_="e7addce19e")
        items = block.find_all("li")
        block_items = []
        for li in items:
            name = li.find("div", class_="aa225776f2")
            dist = li.find("div", class_="b99b6ef58f")
            if name and dist:
                block_items.append(f"{name.text.strip()} ({dist.text.strip()})")
        if title:
            surroundings.append(f"{title.text.strip()}: " + ", ".join(block_items))

    # Guardar datos
    data.append({
        "Nombre": hotel_name,
        "URL": hotel_url,
        "Descripción": description,
        "Alrededores": " | ".join(surroundings)
    })

# Guardar en CSV
with open("hoteles_prado.csv", "w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=["Nombre", "URL", "Descripción", "Alrededores"])
    writer.writeheader()
    writer.writerows(data)

driver.quit()
print("Scraping finalizado. Datos guardados en hoteles_prado.csv")


WebDriverException: Message: unknown error: net::ERR_NAME_NOT_RESOLVED
  (Session info: chrome=140.0.7339.128)
Stacktrace:
	GetHandleVerifier [0x0x7ff627fc1eb5+80197]
	GetHandleVerifier [0x0x7ff627fc1f10+80288]
	(No symbol) [0x0x7ff627d402fa]
	(No symbol) [0x0x7ff627d3d001]
	(No symbol) [0x0x7ff627d2da49]
	(No symbol) [0x0x7ff627d2f821]
	(No symbol) [0x0x7ff627d2dd66]
	(No symbol) [0x0x7ff627d2d7b7]
	(No symbol) [0x0x7ff627d2d47b]
	(No symbol) [0x0x7ff627d2afec]
	(No symbol) [0x0x7ff627d2b87c]
	(No symbol) [0x0x7ff627d443ca]
	(No symbol) [0x0x7ff627de95de]
	(No symbol) [0x0x7ff627dc037a]
	(No symbol) [0x0x7ff627de87fb]
	(No symbol) [0x0x7ff627dc0153]
	(No symbol) [0x0x7ff627d88b02]
	(No symbol) [0x0x7ff627d898d3]
	GetHandleVerifier [0x0x7ff62827e83d+2949837]
	GetHandleVerifier [0x0x7ff628278c6a+2926330]
	GetHandleVerifier [0x0x7ff6282986c7+3055959]
	GetHandleVerifier [0x0x7ff627fdcfee+191102]
	GetHandleVerifier [0x0x7ff627fe50af+224063]
	GetHandleVerifier [0x0x7ff627fcaf64+117236]
	GetHandleVerifier [0x0x7ff627fcb119+117673]
	GetHandleVerifier [0x0x7ff627fb10a8+11064]
	BaseThreadInitThunk [0x0x7ffe8040e8d7+23]
	RtlUserThreadStart [0x0x7ffe813e8d9c+44]
