In [12]:
# Requiere: pip install selenium webdriver-manager pandas
import time
import re
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager

# ---------- Configuración ----------
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.implicitly_wait(3)

def safe_click(el):
    try:
        el.click()
    except:
        driver.execute_script("arguments[0].click();", el)

def close_popups():
    try:
        btn = WebDriverWait(driver, 2).until(
            EC.element_to_be_clickable((By.XPATH,
                '//button[.//span[contains(text(),"Aceptar")]] | //button[contains(@id,"onetrust-accept")]'
            ))
        )
        safe_click(btn)
    except:
        pass
    try:
        close_btns = driver.find_elements(By.XPATH, '//button[@aria-label="Cerrar" or @aria-label="Close"]')
        for b in close_btns:
            if b.is_displayed():
                safe_click(b)
                time.sleep(0.2)
    except:
        pass

def scroll_incremental(pause=1.0, max_loops=5):
    last_height = driver.execute_script("return document.body.scrollHeight")
    for i in range(max_loops):
        driver.execute_script("window.scrollBy(0, Math.floor(document.body.clientHeight * 0.9));")
        time.sleep(pause)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

# ---------- URL ----------
start_url = "https://www.booking.com/searchresults.es.html?ss=Ibagué"
driver.get(start_url)
time.sleep(3)
close_popups()
scroll_incremental()

# ---------- Extraer todas las cards de todas las páginas ----------
listings = []
seen = set()

while True:
    cards = driver.find_elements(By.CSS_SELECTOR, "div[data-testid='property-card']")
    print("Cards visibles en esta página:", len(cards))

    for c in cards:
        try:
            name = c.find_element(By.CSS_SELECTOR, "div[data-testid='title']").text.strip()
        except:
            name = None
        if not name or name in seen:
            continue
        seen.add(name)

        # URL
        url = None
        try:
            a = c.find_element(By.CSS_SELECTOR, "a[data-testid='title-link']")
            url = a.get_attribute("href")
        except:
            try:
                a2 = c.find_element(By.TAG_NAME, "a")
                url = a2.get_attribute("href")
            except:
                url = None
        if url and url.startswith("/"):
            url = "https://www.booking.com" + url

        # Puntuación
        rating = None
        try:
            rating = c.find_element(By.XPATH, ".//div[@data-testid='review-score']//div[contains(@class,'dff2e52086')]").text.strip()
        except:
            try:
                rating = c.find_element(By.XPATH, ".//div[@data-testid='review-score']//div[@aria-hidden='true']").text.strip()
            except:
                try:
                    text_block = c.find_element(By.CSS_SELECTOR, "div[data-testid='review-score']").text
                    m = re.search(r'(\d+[.,]\d+)', text_block)
                    if m:
                        rating = m.group(1).replace(",", ".")
                except:
                    rating = None

        # Comentarios
        reviews = None
        try:
            reviews = c.find_element(By.XPATH, ".//div[@data-testid='review-score']//div[contains(translate(text(),'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'comentario')]").text.strip()
        except:
            try:
                text_block = c.find_element(By.CSS_SELECTOR, "div[data-testid='review-score']").text
                m2 = re.search(r'(\d{1,7})\s*(comentari|opinione|opiniones|reviews|reseñ)', text_block, re.I)
                if m2:
                    reviews = m2.group(1)
            except:
                reviews = None

        # Municipio
        municipio = None
        try:
            municipio = c.find_element(By.CSS_SELECTOR, "span[data-testid='address']").text.strip()
        except:
            municipio = None

        listings.append({
            "Nombre": name,
            "URL": url,
            "Puntuacion": rating,
            "Comentarios": reviews,
            "Municipio": municipio
        })
        print(f"> {name} | rating={rating} | reviews={reviews} | municipio={municipio}")

    # Intentar botón "Cargar más"
    try:
        boton = WebDriverWait(driver, 3).until(
            EC.element_to_be_clickable((By.XPATH, "//button//span[contains(text(),'Cargar más')]"))
        )
        ActionChains(driver).move_to_element(boton).perform()
        safe_click(boton)
        time.sleep(3)
        scroll_incremental()
    except:
        break

print("Total hoteles encontrados:", len(listings))

# ---------- Scraping interno ----------
results = []
main_tab = driver.current_window_handle

for idx, item in enumerate(listings, start=1):
    hotel_url = item["URL"]
    hotel_name = item["Nombre"]
    print(f"[DETALLE {idx}/{len(listings)}] {hotel_name} -> {hotel_url}")

    try:
        driver.execute_script("window.open(arguments[0], '_blank');", hotel_url)
        driver.switch_to.window(driver.window_handles[-1])
        time.sleep(2)
        close_popups()

        try:
            WebDriverWait(driver, 6).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "p[data-testid='property-description'], div[data-testid='poi-block']"))
            )
        except:
            pass

        # Descripción
        descripcion = None
        try:
            descripcion = driver.find_element(By.CSS_SELECTOR, "p[data-testid='property-description']").text.strip()
        except:
            try:
                descripcion = driver.find_element(By.CSS_SELECTOR, "div.hp_desc_main_content").text.strip()
            except:
                descripcion = None

        # Alrededores
        surroundings_list = []
        try:
            poi_blocks = driver.find_elements(By.CSS_SELECTOR, "div[data-testid='poi-block']")
            for block in poi_blocks:
                try:
                    title = block.find_element(By.XPATH, ".//h3").text.strip()
                except:
                    title = ""
                items = []
                for li in block.find_elements(By.XPATH, ".//li"):
                    txt = li.text.strip().replace("\n", " - ")
                    if txt:
                        items.append(txt)
                if title:
                    surroundings_list.append(f"{title}: " + "; ".join(items))
                else:
                    if items:
                        surroundings_list.append("; ".join(items))
        except:
            pass

        results.append({
            **item,
            "Descripcion": descripcion,
            "Alrededores": " || ".join(surroundings_list)
        })

    except Exception as e:
        print("Error detalle:", e)
    finally:
        try:
            driver.close()
            driver.switch_to.window(main_tab)
        except:
            handles = driver.window_handles
            if handles:
                driver.switch_to.window(handles[0])
        time.sleep(0.8)

# ---------- Guardar CSV ----------
df = pd.DataFrame(results)
df.to_csv("hoteles_ibague.csv", index=False, encoding="utf-8-sig")
print("Guardado hoteles_ibague — total:", len(df))

driver.quit()


Cards visibles en esta página: 70
> Sonesta Hotel Ibagué | rating=9,0 | reviews=1.500 comentarios | municipio=Ibagué
> Hotel Estelar Altamira | rating=8,9 | reviews=1.321 comentarios | municipio=Ibagué
> Hotel F25 | rating=8,8 | reviews=889 comentarios | municipio=Ibagué
> FR Hotel | rating=8,8 | reviews=989 comentarios | municipio=Ibagué
> Casa Morales Hotel Internacional y Centro de Convenciones | rating=7,8 | reviews=773 comentarios | municipio=Ibagué
> Eco Star Hotel | rating=8,4 | reviews=798 comentarios | municipio=Ibagué
> Apartamento Luxury ubicado en la mejor zona comercial de la ciudad | rating=9,9 | reviews=9 comentarios | municipio=Ibagué
> Hotel Dann Combeima | rating=8,5 | reviews=585 comentarios | municipio=Ibagué
> Star AV 37 HOTEL | rating=8,7 | reviews=240 comentarios | municipio=Ibagué
> Hotel Doral Inn | rating=8,2 | reviews=648 comentarios | municipio=Ibagué
> Bunde Haus Hotel EXPRESS BOUTIQUE | rating=8,3 | reviews=534 comentarios | municipio=Ibagué
> Espectacular

In [11]:
# Requiere: pip install selenium webdriver-manager pandas
import time
import re
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager

# ---------- Configuración ----------
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.implicitly_wait(3)

def safe_click(el):
    try:
        el.click()
    except:
        driver.execute_script("arguments[0].click();", el)

def close_popups():
    try:
        btn = WebDriverWait(driver, 2).until(
            EC.element_to_be_clickable((By.XPATH,
                '//button[.//span[contains(text(),"Aceptar")]] | //button[contains(@id,"onetrust-accept")]'
            ))
        )
        safe_click(btn)
    except:
        pass
    try:
        close_btns = driver.find_elements(By.XPATH, '//button[@aria-label="Cerrar" or @aria-label="Close"]')
        for b in close_btns:
            if b.is_displayed():
                safe_click(b)
                time.sleep(0.2)
    except:
        pass

def scroll_incremental(pause=1.0, max_loops=5):
    last_height = driver.execute_script("return document.body.scrollHeight")
    for i in range(max_loops):
        driver.execute_script("window.scrollBy(0, Math.floor(document.body.clientHeight * 0.9));")
        time.sleep(pause)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

# ---------- URL ----------
start_url = "https://www.booking.com/searchresults.es.html?ss=Murillo%2C+Tolima"
driver.get(start_url)
time.sleep(3)
close_popups()
scroll_incremental()

# ---------- Extraer todas las cards de todas las páginas ----------
listings = []
seen = set()

while True:
    cards = driver.find_elements(By.CSS_SELECTOR, "div[data-testid='property-card']")
    print("Cards visibles en esta página:", len(cards))

    for c in cards:
        try:
            name = c.find_element(By.CSS_SELECTOR, "div[data-testid='title']").text.strip()
        except:
            name = None
        if not name or name in seen:
            continue
        seen.add(name)

        # URL
        url = None
        try:
            a = c.find_element(By.CSS_SELECTOR, "a[data-testid='title-link']")
            url = a.get_attribute("href")
        except:
            try:
                a2 = c.find_element(By.TAG_NAME, "a")
                url = a2.get_attribute("href")
            except:
                url = None
        if url and url.startswith("/"):
            url = "https://www.booking.com" + url

        # Puntuación
        rating = None
        try:
            rating = c.find_element(By.XPATH, ".//div[@data-testid='review-score']//div[contains(@class,'dff2e52086')]").text.strip()
        except:
            try:
                rating = c.find_element(By.XPATH, ".//div[@data-testid='review-score']//div[@aria-hidden='true']").text.strip()
            except:
                try:
                    text_block = c.find_element(By.CSS_SELECTOR, "div[data-testid='review-score']").text
                    m = re.search(r'(\d+[.,]\d+)', text_block)
                    if m:
                        rating = m.group(1).replace(",", ".")
                except:
                    rating = None

        # Comentarios
        reviews = None
        try:
            reviews = c.find_element(By.XPATH, ".//div[@data-testid='review-score']//div[contains(translate(text(),'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'comentario')]").text.strip()
        except:
            try:
                text_block = c.find_element(By.CSS_SELECTOR, "div[data-testid='review-score']").text
                m2 = re.search(r'(\d{1,7})\s*(comentari|opinione|opiniones|reviews|reseñ)', text_block, re.I)
                if m2:
                    reviews = m2.group(1)
            except:
                reviews = None

        # Municipio
        municipio = None
        try:
            municipio = c.find_element(By.CSS_SELECTOR, "span[data-testid='address']").text.strip()
        except:
            municipio = None

        listings.append({
            "Nombre": name,
            "URL": url,
            "Puntuacion": rating,
            "Comentarios": reviews,
            "Municipio": municipio
        })
        print(f"> {name} | rating={rating} | reviews={reviews} | municipio={municipio}")

    # Intentar botón "Cargar más"
    try:
        boton = WebDriverWait(driver, 3).until(
            EC.element_to_be_clickable((By.XPATH, "//button//span[contains(text(),'Cargar más')]"))
        )
        ActionChains(driver).move_to_element(boton).perform()
        safe_click(boton)
        time.sleep(3)
        scroll_incremental()
    except:
        break

print("Total hoteles encontrados:", len(listings))

# ---------- Scraping interno ----------
results = []
main_tab = driver.current_window_handle

for idx, item in enumerate(listings, start=1):
    hotel_url = item["URL"]
    hotel_name = item["Nombre"]
    print(f"[DETALLE {idx}/{len(listings)}] {hotel_name} -> {hotel_url}")

    try:
        driver.execute_script("window.open(arguments[0], '_blank');", hotel_url)
        driver.switch_to.window(driver.window_handles[-1])
        time.sleep(2)
        close_popups()

        try:
            WebDriverWait(driver, 6).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "p[data-testid='property-description'], div[data-testid='poi-block']"))
            )
        except:
            pass

        # Descripción
        descripcion = None
        try:
            descripcion = driver.find_element(By.CSS_SELECTOR, "p[data-testid='property-description']").text.strip()
        except:
            try:
                descripcion = driver.find_element(By.CSS_SELECTOR, "div.hp_desc_main_content").text.strip()
            except:
                descripcion = None

        # Alrededores
        surroundings_list = []
        try:
            poi_blocks = driver.find_elements(By.CSS_SELECTOR, "div[data-testid='poi-block']")
            for block in poi_blocks:
                try:
                    title = block.find_element(By.XPATH, ".//h3").text.strip()
                except:
                    title = ""
                items = []
                for li in block.find_elements(By.XPATH, ".//li"):
                    txt = li.text.strip().replace("\n", " - ")
                    if txt:
                        items.append(txt)
                if title:
                    surroundings_list.append(f"{title}: " + "; ".join(items))
                else:
                    if items:
                        surroundings_list.append("; ".join(items))
        except:
            pass

        results.append({
            **item,
            "Descripcion": descripcion,
            "Alrededores": " || ".join(surroundings_list)
        })

    except Exception as e:
        print("Error detalle:", e)
    finally:
        try:
            driver.close()
            driver.switch_to.window(main_tab)
        except:
            handles = driver.window_handles
            if handles:
                driver.switch_to.window(handles[0])
        time.sleep(0.8)

# ---------- Guardar CSV ----------
df = pd.DataFrame(results)
df.to_csv("hoteles_murillo.csv", index=False, encoding="utf-8-sig")
print("Guardado hoteles_murillo.csv — total:", len(df))

driver.quit()


Cards visibles en esta página: 26
> Hostal Pachanuna | rating=9,1 | reviews=179 comentarios | municipio=Murillo
> Cabañas el condor del Ruíz | rating=9,2 | reviews=55 comentarios | municipio=Murillo
> Hostal Casa Celeste | rating=9,2 | reviews=49 comentarios | municipio=Murillo
> Hotel Areca | rating=9,4 | reviews=45 comentarios | municipio=Murillo
> Stellarium Glamping | rating=9,5 | reviews=102 comentarios | municipio=Murillo
> Ecoparque Nevado Del Ruiz | rating=8,8 | reviews=88 comentarios | municipio=Murillo
> La Vaca Fifí | rating=9,0 | reviews=11 comentarios | municipio=Murillo
> Hostal Juan Páramo | rating=10 | reviews=3 comentarios | municipio=Murillo
> El Hostal del Abuelo | rating=9,4 | reviews=37 comentarios | municipio=Murillo
> Cabañas y Restaurante El Jardin | rating=8,7 | reviews=11 comentarios | municipio=Murillo
> HOSTAL RANCHO JOTA Murillo Tolima | rating=8,6 | reviews=55 comentarios | municipio=Murillo
> Refugio Paramo Trek Murillo | rating=9,0 | reviews=29 comentari

In [None]:
"https://www.booking.com/searchresults.es.html?ss=Ibagué"


Collecting beautifulsoup4
  Downloading beautifulsoup4-4.13.5-py3-none-any.whl.metadata (3.8 kB)
Collecting soupsieve>1.2 (from beautifulsoup4)
  Downloading soupsieve-2.7-py3-none-any.whl.metadata (4.6 kB)
Downloading beautifulsoup4-4.13.5-py3-none-any.whl (105 kB)
Downloading soupsieve-2.7-py3-none-any.whl (36 kB)
Installing collected packages: soupsieve, beautifulsoup4
Successfully installed beautifulsoup4-4.13.5 soupsieve-2.7
Note: you may need to restart the kernel to use updated packages.


In [None]:
# Requiere: pip install selenium webdriver-manager pandas
import time
import re
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager

# ---------- Configuración ----------
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.implicitly_wait(3)

def safe_click(el):
    try:
        el.click()
    except:
        driver.execute_script("arguments[0].click();", el)

def close_popups():
    try:
        btn = WebDriverWait(driver, 2).until(
            EC.element_to_be_clickable((By.XPATH,
                '//button[.//span[contains(text(),"Aceptar")]] | //button[contains(@id,"onetrust-accept")]'
            ))
        )
        safe_click(btn)
    except:
        pass
    try:
        close_btns = driver.find_elements(By.XPATH, '//button[@aria-label="Cerrar" or @aria-label="Close"]')
        for b in close_btns:
            if b.is_displayed():
                safe_click(b)
                time.sleep(0.2)
    except:
        pass

def scroll_incremental(pause=1.0, max_loops=5):
    last_height = driver.execute_script("return document.body.scrollHeight")
    for i in range(max_loops):
        driver.execute_script("window.scrollBy(0, Math.floor(document.body.clientHeight * 0.9));")
        time.sleep(pause)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

# ---------- URL ----------
start_url = "https://www.booking.com/searchresults.es.html?ss=Prado%2C+Tolima"
driver.get(start_url)
time.sleep(3)
close_popups()
scroll_incremental()

# ---------- Extraer todas las cards de todas las páginas ----------
listings = []
seen = set()

while True:
    cards = driver.find_elements(By.CSS_SELECTOR, "div[data-testid='property-card']")
    print("Cards visibles en esta página:", len(cards))

    for c in cards:
        try:
            name = c.find_element(By.CSS_SELECTOR, "div[data-testid='title']").text.strip()
        except:
            name = None
        if not name or name in seen:
            continue
        seen.add(name)

        # URL
        url = None
        try:
            a = c.find_element(By.CSS_SELECTOR, "a[data-testid='title-link']")
            url = a.get_attribute("href")
        except:
            try:
                a2 = c.find_element(By.TAG_NAME, "a")
                url = a2.get_attribute("href")
            except:
                url = None
        if url and url.startswith("/"):
            url = "https://www.booking.com" + url

        # Puntuación
        rating = None
        try:
            rating = c.find_element(By.XPATH, ".//div[@data-testid='review-score']//div[contains(@class,'dff2e52086')]").text.strip()
        except:
            try:
                rating = c.find_element(By.XPATH, ".//div[@data-testid='review-score']//div[@aria-hidden='true']").text.strip()
            except:
                try:
                    text_block = c.find_element(By.CSS_SELECTOR, "div[data-testid='review-score']").text
                    m = re.search(r'(\d+[.,]\d+)', text_block)
                    if m:
                        rating = m.group(1).replace(",", ".")
                except:
                    rating = None

        # Comentarios
        reviews = None
        try:
            reviews = c.find_element(By.XPATH, ".//div[@data-testid='review-score']//div[contains(translate(text(),'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'comentario')]").text.strip()
        except:
            try:
                text_block = c.find_element(By.CSS_SELECTOR, "div[data-testid='review-score']").text
                m2 = re.search(r'(\d{1,7})\s*(comentari|opinione|opiniones|reviews|reseñ)', text_block, re.I)
                if m2:
                    reviews = m2.group(1)
            except:
                reviews = None

        # Municipio
        municipio = None
        try:
            municipio = c.find_element(By.CSS_SELECTOR, "span[data-testid='address']").text.strip()
        except:
            municipio = None

        listings.append({
            "Nombre": name,
            "URL": url,
            "Puntuacion": rating,
            "Comentarios": reviews,
            "Municipio": municipio
        })
        print(f"> {name} | rating={rating} | reviews={reviews} | municipio={municipio}")

    # Intentar botón "Cargar más"
    try:
        boton = WebDriverWait(driver, 3).until(
            EC.element_to_be_clickable((By.XPATH, "//button//span[contains(text(),'Cargar más')]"))
        )
        ActionChains(driver).move_to_element(boton).perform()
        safe_click(boton)
        time.sleep(3)
        scroll_incremental()
    except:
        break

print("Total hoteles encontrados:", len(listings))

# ---------- Scraping interno ----------
results = []
main_tab = driver.current_window_handle

for idx, item in enumerate(listings, start=1):
    hotel_url = item["URL"]
    hotel_name = item["Nombre"]
    print(f"[DETALLE {idx}/{len(listings)}] {hotel_name} -> {hotel_url}")

    try:
        driver.execute_script("window.open(arguments[0], '_blank');", hotel_url)
        driver.switch_to.window(driver.window_handles[-1])
        time.sleep(2)
        close_popups()

        try:
            WebDriverWait(driver, 6).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "p[data-testid='property-description'], div[data-testid='poi-block']"))
            )
        except:
            pass

        # Descripción
        descripcion = None
        try:
            descripcion = driver.find_element(By.CSS_SELECTOR, "p[data-testid='property-description']").text.strip()
        except:
            try:
                descripcion = driver.find_element(By.CSS_SELECTOR, "div.hp_desc_main_content").text.strip()
            except:
                descripcion = None

        # Alrededores
        surroundings_list = []
        try:
            poi_blocks = driver.find_elements(By.CSS_SELECTOR, "div[data-testid='poi-block']")
            for block in poi_blocks:
                try:
                    title = block.find_element(By.XPATH, ".//h3").text.strip()
                except:
                    title = ""
                items = []
                for li in block.find_elements(By.XPATH, ".//li"):
                    txt = li.text.strip().replace("\n", " - ")
                    if txt:
                        items.append(txt)
                if title:
                    surroundings_list.append(f"{title}: " + "; ".join(items))
                else:
                    if items:
                        surroundings_list.append("; ".join(items))
        except:
            pass

        results.append({
            **item,
            "Descripcion": descripcion,
            "Alrededores": " || ".join(surroundings_list)
        })

    except Exception as e:
        print("Error detalle:", e)
    finally:
        try:
            driver.close()
            driver.switch_to.window(main_tab)
        except:
            handles = driver.window_handles
            if handles:
                driver.switch_to.window(handles[0])
        time.sleep(0.8)

# ---------- Guardar CSV ----------
df = pd.DataFrame(results)
df.to_csv("hoteles_prado.csv", index=False, encoding="utf-8-sig")
print("Guardado hoteles_prado.csv — total:", len(df))

driver.quit()


Cards visibles en esta página: 34
> Hotel Bellavista Isla del Sol | rating=8,4 | reviews=113 comentarios | municipio=Prado
> Casa de las Guacamayas | rating=9,4 | reviews=36 comentarios | municipio=Prado
> Fincasa del Mar-La Casa del Arbol | rating=9,0 | reviews=48 comentarios | municipio=Prado
> Buena Aventura parejas | rating=10 | reviews=2 comentarios | municipio=Prado
> Cabaña el diviso | rating=10 | reviews=2 comentarios | municipio=Prado
> Hotel y Hostal Mi Reino Represa Prado Tolima | rating=10 | reviews=1 comentario | municipio=Prado
> Cabaña en Prado Tolima | rating=9,0 | reviews=21 comentarios | municipio=Prado
> Casa Isla en la Represa de Prado | rating=9,4 | reviews=6 comentarios | municipio=Prado
> Tana - Casa & Cabaña de Montaña y Lago | rating=8,7 | reviews=6 comentarios | municipio=Prado
> cabaña vacacional prado tolima | rating=None | reviews=None | municipio=Prado
> Las Palmas - Espectacular casa a borde de lago con piscina | rating=9,0 | reviews=2 comentarios | munic

In [16]:
import pandas as pd

# Lista de archivos a unir
archivos = [
    "data/hoteles_prado.csv",
    "data/hoteles_murillo.csv",
    "data/hoteles_ibague.csv"
]

# Leer y concatenar
dfs = [pd.read_csv(f) for f in archivos]

# Unir todos
df_final = pd.concat(dfs, ignore_index=True)

# Eliminar duplicados por nombre o URL (según prefieras)
df_final = df_final.drop_duplicates(subset=["Nombre", "URL"], keep="first")

# Guardar CSV final
df_final.to_csv("data/hoteles_tolima_booking.csv", index=False, encoding="utf-8-sig")

print("✅ Archivo final generado: hoteles_tolima_booking.csv")
print("Total de registros:", len(df_final))


✅ Archivo final generado: hoteles_tolima_booking.csv
Total de registros: 321
