In [1]:

# -*- coding: utf-8 -*-
"""
People Management ‚Üí Generar 'PDA Report' por fila con Selenium (Chrome ya abierto)
- Basado en el flujo que ya funcionaba en tu entorno
- XPaths robustos para 'PDA Report/Reporte PDA'
- Deduplicaci√≥n multi-equipo:
  * √çndice compartido (processed_index.json)
  * B√∫squeda de PDFs existentes en SHARED_DIR por prefijo
  * Claves: email | name|doc | name | name|doc|gender
- Paginaci√≥n y aumento de "Items per page"
- Mueve PDFs a carpeta compartida con renombrado

Lanza Chrome ANTES de correr:
  "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" --remote-debugging-port=9222 --user-data-dir="C:\\selenium\\chrome-profile"
"""

import os, time, json, socket, uuid, shutil, re, unicodedata, glob
from typing import Optional, Tuple, List, Dict
from contextlib import contextmanager

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, StaleElementReferenceException
# ===================== CONFIG =====================
LIST_URL = "https://hrtech.pdaprofile.com/app/people-managment"

# Carpeta local de descargas (propia de cada PC)
DOWNLOAD_DIR = r"C:\Users\juan_garnicac\OneDrive - Corporaci√≥n Unificada Nacional de Educaci√≥n Superior - CUN\Documentos\pda_reports"

# Carpeta COMPARTIDA entre los equipos (red/OneDrive/Drive). MISMA RUTA EN AMBOS PCS.
SHARED_DIR = r"C:\Users\juan_garnicac\OneDrive - Corporaci√≥n Unificada Nacional de Educaci√≥n Superior - CUN\Documentos\pda_reports"    # <--- AJUSTA ESTO

MAX_ROWS: Optional[int] = None          # None = todas las filas/p√°ginas
WAIT = 25
PAUSE_BETWEEN_ROWS = 1.2
CLICK_GAP_SEC = 2.2
FINAL_HOLD_SEC = 10.0
PREFERRED_PAGE_SIZE = 200

# √çndice compartido
INDEX_FILENAME = "processed_index.json"
LOCK_FILENAME  = "index.lock"
# ==================================================
# ---------- XPATHs de tabla / filas ----------
TABLE_ROWS_XPATH = (
    "//table//tbody//tr"
    " | //div[contains(@class,'table')]//div[contains(@role,'row') and contains(@class,'body')]"
)

# Acciones dentro de fila
ROW_THREE_DOTS_XPATH = (
    ".//button[contains(@class,'mat-menu-trigger') or @aria-haspopup='menu' or contains(@class,'menu')]"
    "[.//mat-icon[normalize-space()='more_vert'] or .//*[normalize-space()='more_vert']]"
)
ROW_CARET_XPATH = ".//button[.//mat-icon[normalize-space()='keyboard_arrow_down']]"

# Overlays / Drawer
OVERLAY_PANE_CSS = ".cdk-overlay-pane"
OVERLAY_BACKDROP_CSS = ".cdk-overlay-backdrop"
DRAWER_OPEN_XPATH = "//div[contains(@class,'mat-drawer') and contains(@class,'mat-drawer-end') and contains(@class,'mat-drawer-opened')]"

# Men√∫ contextual ‚Üí 'Generate/Generar'
MENU_GENERATE_ITEM_REL_XPATH = (
    ".//button[contains(@class,'mat-menu-item')]"
    "[.//span[normalize-space()='Generate'] or contains(normalize-space(.),'Generate')"
    " or .//span[normalize-space()='Generar'] or contains(normalize-space(.),'Generar')]"
)

# Panel derecho ‚Üí 'PDA Report' / 'Reporte PDA' (robusto + fallback dentro del drawer abierto)
PDA_REPORT_BTN_XPATH = (
    "//span[contains(@class,'mat-button-wrapper') and normalize-space()='PDA Report']/ancestor::button[1]"
    " | //span[contains(@class,'mat-button-wrapper') and normalize-space()='Reporte PDA']/ancestor::button[1]"
    f" | {DRAWER_OPEN_XPATH}//button[.//span[normalize-space()='PDA Report'] or .//span[normalize-space()='Reporte PDA']]"
    f" | {DRAWER_OPEN_XPATH}//button[contains(.,'PDA Report') or contains(.,'Reporte PDA')]"
)

# Bot√≥n 'Generate/Generar' dentro del drawer
PANEL_GENERATE_BTN_XPATH = (
    "//button[.//span[normalize-space()='Generate'] or contains(normalize-space(.),'Generate')"
    " or .//span[normalize-space()='Generar'] or contains(normalize-space(.),'Generar')]"
)
FINAL_GENERATE_IN_DRAWER_XPATH = (
    f"{DRAWER_OPEN_XPATH}//button[contains(@class,'mat-flat-button')]"
    "[.//span[normalize-space()='Generate'] or .//span[normalize-space()='Generar']]"
)

# Paginador
NEXT_PAGE_BTN_XPATH = (
    "//button[contains(@class,'mat-paginator-navigation-next') and not(@disabled)]"
    " | //mat-paginator//button[contains(@aria-label,'Next') and not(@disabled)]"
    " | //button[(contains(normalize-space(.),'Siguiente') or contains(normalize-space(.),'Next')) and not(@disabled)]"
)

# Page size (Items per page)
PAGE_SIZE_SELECT_XPATH = (
    "//mat-paginator//mat-select[contains(@class,'mat-paginator-page-size-select') or @aria-label='Items per page:']"
    " | //mat-paginator//*[contains(normalize-space(.),'Items per page')]/following::*[self::mat-select or self::div or self::button][1]"
)
PAGE_SIZE_OPTION_XPATH_TPL = (
    "//div[contains(@class,'cdk-overlay-pane')]//mat-option//span[normalize-space()='{}']"
)

# --- Celda People (nombre/email si est√°) ---
ROW_PERSON_TD_XPATH       = ".//td[contains(@class,'mat-column-person') and contains(@class,'cdk-column-person')]"
ROW_NAME_IN_PERSON_XPATH  = ".//span[contains(@class,'font-medium')][1]"
ROW_EMAIL_IN_PERSON_XPATH = ".//a[starts-with(@href,'mailto:')] | .//*[contains(text(),'@')]"

# ---------- utils (compartida/√≠ndice) ----------
def ensure_dirs():
    os.makedirs(DOWNLOAD_DIR, exist_ok=True)
    os.makedirs(SHARED_DIR, exist_ok=True)

def slugify(s: str) -> str:
    keep = []
    for ch in s:
        if ch.isalnum():
            keep.append(ch)
        elif ch in (' ', '-', '_', '.'):
            keep.append(ch)
        else:
            keep.append(' ')
    out = ' '.join(''.join(keep).split())
    return out.replace(' ', '_')

def _normalize_ascii(s: str) -> str:
    import unicodedata
    s = unicodedata.normalize("NFKD", s)
    return "".join(ch for ch in s if not unicodedata.combining(ch))

def extract_name_email_from_row(row_el) -> Tuple[str, str]:
    name, email = "", ""
    try:
        person_td = row_el.find_element(By.XPATH, ROW_PERSON_TD_XPATH)
    except Exception:
        person_td = None

    if person_td:
        try:
            n1 = person_td.find_element(By.XPATH, ROW_NAME_IN_PERSON_XPATH)
            name = (n1.text or "").strip()
        except Exception:
            pass
        if not name:
            try:
                n2 = row_el.find_element(By.XPATH, ".//span[contains(@class,'font-medium')][1]")
                name = (n2.text or "").strip()
            except Exception:
                pass
        if not name:
            try:
                lines = [t for t in (person_td.text or "").split("\n") if t.strip()]
                if lines: name = lines[0].strip()
            except Exception:
                pass
    if not name:
        try:
            lines = [t for t in (row_el.text or "").split("\n") if t.strip()]
            if lines: name = lines[0].strip()
        except Exception:
            pass

    if person_td:
        try:
            e1 = person_td.find_element(By.XPATH, ROW_EMAIL_IN_PERSON_XPATH)
            email = (e1.text or "").strip()
            if not email:
                href = e1.get_attribute("href") or ""
                if href.startswith("mailto:"):
                    email = href.replace("mailto:", "").strip()
        except Exception:
            pass
    if not email:
        try:
            any_mail = row_el.find_element(By.XPATH, ".//*[contains(text(),'@')]")
            email = (any_mail.text or "").strip()
        except Exception:
            pass

    return name, email

def extract_row_signals(row_el) -> Dict[str, str]:
    """doc (7+ d√≠gitos), fecha dd/mm/yyyy, g√©nero (Male/Female)."""
    out = {"name": "", "doc": "", "date": "", "gender": ""}
    out["name"], _ = extract_name_email_from_row(row_el)

    try:
        tds = row_el.find_elements(By.XPATH, ".//td")
    except Exception:
        tds = []

    texts = []
    for td in tds:
        try:
            txt = (td.text or "").strip()
            if txt: texts.append(txt)
        except Exception:
            pass
    blob = " | ".join(texts)

    m_doc = re.search(r"\b(\d{7,})\b", blob)
    if m_doc: out["doc"] = m_doc.group(1)

    m_date = re.search(r"\b(\d{2}/\d{2}/\d{4})\b", blob)
    if m_date: out["date"] = m_date.group(1)

    if re.search(r"\bFemale\b", blob, re.I): out["gender"] = "Female"
    elif re.search(r"\bMale\b", blob, re.I): out["gender"] = "Male"
    return out

def build_candidate_keys(name: str, email: str, doc: str, gender: str) -> List[str]:
    name_norm = _normalize_ascii(name or "").strip().lower()
    ks = []
    if email: ks.append(email.strip().lower())
    if name_norm and doc: ks.append(f"{name_norm}|{doc}")
    if name_norm: ks.append(name_norm)
    if name_norm and doc and gender: ks.append(f"{name_norm}|{doc}|{gender.strip().lower()}")
    dedup = []
    for k in ks:
        if k and k not in dedup: dedup.append(k)
    return dedup

def index_paths():
    return (os.path.join(SHARED_DIR, INDEX_FILENAME), os.path.join(SHARED_DIR, LOCK_FILENAME))

@contextmanager
def index_lock(timeout=15):
    _, lock_path = index_paths()
    token = f"{socket.gethostname()}-{os.getpid()}-{uuid.uuid4().hex}"
    start = time.time()
    while True:
        try:
            fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
            with os.fdopen(fd, "w", encoding="utf-8") as f: f.write(token)
            break
        except FileExistsError:
            if time.time() - start > timeout:
                print("‚ö†Ô∏è  No pude obtener lock del √≠ndice; sigo sin lock.")
                break
            time.sleep(0.3)
    try:
        yield
    finally:
        try:
            if os.path.exists(lock_path): os.remove(lock_path)
        except Exception: pass

def load_index() -> set:
    idx_path, _ = index_paths()
    try:
        with open(idx_path, "r", encoding="utf-8") as f:
            data = json.load(f)
        if isinstance(data, list): return set(data)
        if isinstance(data, dict) and "hashes" in data: return set(data["hashes"])
    except Exception:
        pass
    return set()

def save_index(hashes: set):
    idx_path, _ = index_paths()
    tmp = idx_path + ".tmp"
    data = {"hashes": sorted(list(hashes))}
    with open(tmp, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2)
    os.replace(tmp, idx_path)

def already_processed_any(keys: List[str]) -> bool:
    with index_lock():
        return any(k in load_index() for k in keys)

def mark_processed_all(keys: List[str]):
    with index_lock():
        s = load_index()
        changed = False
        for k in keys:
            if k and k not in s:
                s.add(k); changed = True
        if changed: save_index(s)

def report_exists_in_shared(name: str, email: str, doc: str) -> bool:
    base = slugify(_normalize_ascii(name or "").strip()) if name else ""
    email_local = slugify(email.split("@",1)[0]) if (email and "@" in email) else ""
    patterns = []
    if base: patterns.append(os.path.join(SHARED_DIR, f"ReportePDA_{base}*"))
    if base and email_local: patterns.append(os.path.join(SHARED_DIR, f"ReportePDA_{base}_{email_local}*"))
    # Si quieres incluir doc en el nombre, descomenta:
    # if base and doc: patterns.append(os.path.join(SHARED_DIR, f"ReportePDA_{base}_{doc}*"))
    for pat in patterns:
        if glob.glob(pat): return True
    return False

def suggested_filename_from_row(row_el) -> str:
    name, email = extract_name_email_from_row(row_el)
    base_name = slugify(_normalize_ascii(name or "").strip() or "PDA_Report")[:90]
    if email and "@" in email:
        email_local = slugify(email.split("@", 1)[0])[:50]
        return f"ReportePDA_{base_name}_{email_local}.pdf"
    return f"ReportePDA_{base_name}.pdf"

def move_to_shared_downloads(local_filename: str, suggested_name: str) -> str:
    src = os.path.join(DOWNLOAD_DIR, local_filename)
    base, ext = os.path.splitext(suggested_name)
    if not ext: ext = ".pdf"
    dst = os.path.join(SHARED_DIR, base + ext)
    k = 2
    while os.path.exists(dst):
        dst = os.path.join(SHARED_DIR, f"{base}_{k}{ext}")
        k += 1
    shutil.move(src, dst)
    return dst

# ---------- Selenium helpers ----------
def build_driver_remote(debug_addr: str = "127.0.0.1:9222") -> webdriver.Chrome:
    chrome_options = Options()
    chrome_options.debugger_address = debug_addr
    driver = webdriver.Chrome(options=chrome_options)
    try: driver.set_window_rect(width=1400, height=900)
    except Exception: pass
    ensure_dirs()
    try:
        driver.execute_cdp_cmd("Page.setDownloadBehavior", {"behavior": "allow", "downloadPath": DOWNLOAD_DIR})
    except Exception:
        print("‚ö†Ô∏è  No se pudo fijar carpeta de descargas por CDP; Chrome usar√° la predeterminada.")
    return driver

def wait_present(driver, by, locator, timeout=WAIT):
    return WebDriverWait(driver, timeout).until(EC.presence_of_element_located((by, locator)))

def js_click(driver, el):
    driver.execute_script("arguments[0].scrollIntoView({block:'center'});", el)
    time.sleep(0.05)
    driver.execute_script("arguments[0].click();", el)

def click_then_pause(driver, el, gap=CLICK_GAP_SEC):
    js_click(driver, el)
    time.sleep(gap)

def list_rows(driver) -> List:
    return driver.find_elements(By.XPATH, TABLE_ROWS_XPATH) or []

def close_drawer_and_overlays(driver):
    # cierra drawer
    try:
        for d in driver.find_elements(By.XPATH, DRAWER_OPEN_XPATH):
            close_btns = d.find_elements(
                By.XPATH,
                ".//button[.//mat-icon[normalize-space()='close'] or .//mat-icon[normalize-space()='arrow_back']"
                " or contains(@aria-label,'Close') or contains(@aria-label,'Cerrar')]"
            )
            if close_btns:
                click_then_pause(driver, close_btns[0], gap=0.3)
    except Exception:
        pass
    # cierra overlays
    try:
        for _ in range(3):
            backs = [b for b in driver.find_elements(By.CSS_SELECTOR, OVERLAY_BACKDROP_CSS) if b.is_displayed()]
            if not backs: break
            js_click(driver, backs[-1]); time.sleep(0.2)
    except Exception:
        try: driver.switch_to.active_element.send_keys(Keys.ESCAPE)
        except Exception: pass

def first_row_key(driver) -> str:
    try:
        el = driver.find_element(By.XPATH, f"({TABLE_ROWS_XPATH})[1]")
        return (el.text or "").strip()
    except Exception:
        return ""

def go_next_page(driver, timeout=12) -> bool:
    close_drawer_and_overlays(driver)
    old_key = first_row_key(driver)
    try:
        next_btn = WebDriverWait(driver, 4).until(EC.element_to_be_clickable((By.XPATH, NEXT_PAGE_BTN_XPATH)))
    except TimeoutException:
        return False
    js_click(driver, next_btn)
    try:
        WebDriverWait(driver, timeout).until(lambda d: first_row_key(d) != old_key and len(list_rows(d)) > 0)
        time.sleep(0.5)
        return True
    except TimeoutException:
        return False

def set_items_per_page(driver, preferred=PREFERRED_PAGE_SIZE):
    try:
        size_trigger = WebDriverWait(driver, 4).until(EC.element_to_be_clickable((By.XPATH, PAGE_SIZE_SELECT_XPATH)))
        js_click(driver, size_trigger); time.sleep(0.2)
    except TimeoutException:
        return
    try:
        opt = WebDriverWait(driver, 2).until(
            EC.element_to_be_clickable((By.XPATH, PAGE_SIZE_OPTION_XPATH_TPL.format(preferred)))
        )
        js_click(driver, opt); time.sleep(0.5); return
    except TimeoutException:
        pass
    try:
        pane = WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".cdk-overlay-pane")))
        options = pane.find_elements(By.XPATH, ".//mat-option//span")
        nums = []
        for s in options:
            try:
                val = int((s.text or "").strip()); nums.append((val, s))
            except Exception:
                continue
        if nums:
            _, el = sorted(nums, key=lambda t: t[0])[-1]
            js_click(driver, el); time.sleep(0.5)
    except TimeoutException:
        pass
    finally:
        close_drawer_and_overlays(driver)

# ---------- flujo por fila ----------
def open_actions_menu(driver, row_el) -> bool:
    a = ActionChains(driver)
    try: a.move_to_element(row_el).perform(); time.sleep(0.15)
    except Exception: pass
    # tres puntos
    try:
        btn = row_el.find_element(By.XPATH, ROW_THREE_DOTS_XPATH)
        click_then_pause(driver, btn)
        for _ in range(8):
            panes = [p for p in driver.find_elements(By.CSS_SELECTOR, OVERLAY_PANE_CSS) if p.is_displayed()]
            if panes: return True
            time.sleep(0.25)
    except Exception:
        pass
    # caret (mobile)
    try:
        btn = row_el.find_element(By.XPATH, ROW_CARET_XPATH)
        click_then_pause(driver, btn)
        for _ in range(8):
            panes = [p for p in driver.find_elements(By.CSS_SELECTOR, OVERLAY_PANE_CSS) if p.is_displayed()]
            if panes: return True
            time.sleep(0.25)
    except Exception:
        pass
    return False

def click_menu_generate(driver) -> bool:
    try:
        panes = [p for p in driver.find_elements(By.CSS_SELECTOR, OVERLAY_PANE_CSS) if p.is_displayed()]
        for pane in reversed(panes):
            try:
                item = pane.find_element(By.XPATH, MENU_GENERATE_ITEM_REL_XPATH)
                click_then_pause(driver, item)
                return True
            except Exception:
                continue
    except Exception:
        pass
    return False

def select_pda_report(driver) -> bool:
    # Primero intenta en drawer abierto / variantes exactas
    try:
        pda = WebDriverWait(driver, 12).until(EC.element_to_be_clickable((By.XPATH, PDA_REPORT_BTN_XPATH)))
        click_then_pause(driver, pda)
        return True
    except TimeoutException:
        # Fallback: cualquier bot√≥n visible en drawer que lleve 'PDA'
        try:
            any_pda = WebDriverWait(driver, 6).until(
                EC.element_to_be_clickable((By.XPATH, f"{DRAWER_OPEN_XPATH}//button[contains(.,'PDA')]"))
            )
            click_then_pause(driver, any_pda)
            return True
        except TimeoutException:
            print("   ‚ÑπÔ∏è  No encontr√© 'PDA Report/Reporte PDA' en el panel.")
            return False

def click_final_generate(driver) -> bool:
    try:
        final_btn = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, FINAL_GENERATE_IN_DRAWER_XPATH)))
        click_then_pause(driver, final_btn)
        return True
    except TimeoutException:
        # Fallback: cualquier Generate en drawer
        try:
            any_gen = WebDriverWait(driver, 8).until(EC.element_to_be_clickable((By.XPATH, PANEL_GENERATE_BTN_XPATH)))
            click_then_pause(driver, any_gen)
            return True
        except TimeoutException:
            print("   ‚ùå No pude clickear el 'Generate/Generar' final del drawer.")
            return False

def wait_for_download_finish(initial_files: set, timeout_sec: int = 180) -> Tuple[bool, str]:
    deadline = time.time() + timeout_sec
    seen = set(initial_files)
    while time.time() < deadline:
        now = set(os.listdir(DOWNLOAD_DIR))
        if any(f.endswith(".crdownload") for f in now):
            time.sleep(0.9); continue
        new_files = [f for f in now - seen if not f.endswith(".crdownload")]
        if new_files:
            newest = sorted(new_files, key=lambda x: os.path.getctime(os.path.join(DOWNLOAD_DIR, x)))[-1]
            return True, newest
        time.sleep(0.7)
    return False, ""

def get_row_by_index(driver, idx: int):
    xp = f"({TABLE_ROWS_XPATH})[{idx}]"
    return driver.find_element(By.XPATH, xp)

def process_row_by_index(driver, idx, processed_cache: set) -> bool:
    close_drawer_and_overlays(driver)
    try:
        row_el = get_row_by_index(driver, idx)
    except Exception:
        print(f"   #{idx} ‚ùå No pude localizar la fila #{idx}.")
        return False

    # asegurar visibilidad
    try:
        driver.execute_script("arguments[0].scrollIntoView({block:'center'});", row_el)
        time.sleep(0.2)
    except Exception:
        pass

    # ----- DEDUPE antes de generar -----
    name_dbg, email_dbg = extract_name_email_from_row(row_el)
    sig = extract_row_signals(row_el)
    cand_keys = build_candidate_keys(name_dbg or sig["name"], email_dbg, sig["doc"], sig["gender"])

    if already_processed_any(cand_keys) or any(k in processed_cache for k in cand_keys):
        print(f"   #{idx} ‚è≠Ô∏è  Ya procesado (√≠ndice). Saltando.")
        return True

    if report_exists_in_shared(name_dbg or sig['name'], email_dbg, sig['doc']):
        print(f"   #{idx} ‚è≠Ô∏è  Ya existe PDF en compartida. Saltando.")
        for k in cand_keys: processed_cache.add(k)
        mark_processed_all(cand_keys)
        return True

    for k in cand_keys: processed_cache.add(k)

    suggested_name = suggested_filename_from_row(row_el)

    # Log visual
    preview = f"{(name_dbg or sig['name']).strip()} | {email_dbg}".strip(" |")
    if preview: print(preview)

    # ---- Flujo UI (el que ya te funcionaba) ----
    if not open_actions_menu(driver, row_el):
        print(f"   #{idx} ‚ùå No pude abrir el men√∫ de acciones (tres puntos/caret).")
        for k in cand_keys:
            if k in processed_cache: processed_cache.discard(k)
        return False

    if not click_menu_generate(driver):
        print(f"   #{idx} ‚ùå No apareci√≥ 'Generate/Generar' dentro del men√∫.")
        for k in cand_keys:
            if k in processed_cache: processed_cache.discard(k)
        return False

    if not select_pda_report(driver):
        print(f"   #{idx} ‚ùå No pude seleccionar 'PDA Report/Reporte PDA'.")
        for k in cand_keys:
            if k in processed_cache: processed_cache.discard(k)
        return False

    before = set(os.listdir(DOWNLOAD_DIR))

    if not click_final_generate(driver):
        print(f"   #{idx} ‚ùå No pude pulsar el 'Generate/Generar' final.")
        for k in cand_keys:
            if k in processed_cache: processed_cache.discard(k)
        return False

    time.sleep(FINAL_HOLD_SEC)

    print(f"   #{idx} ‚úÖ Generate pulsado. Esperando descarga‚Ä¶")
    okd, fname = wait_for_download_finish(before, timeout_sec=180)
    if okd:
        try:
            final_path = move_to_shared_downloads(fname, suggested_name)
            print(f"   #{idx} üìÑ Movido a compartida: {final_path}")
        except Exception as e:
            print(f"   #{idx} ‚ö†Ô∏è Descargado '{fname}', pero no pude mover a compartida: {e}")

        mark_processed_all(cand_keys)
        close_drawer_and_overlays(driver)
        return True
    else:
        print(f"   #{idx} ‚ö†Ô∏è No detect√© archivo nuevo en {DOWNLOAD_DIR}.")
        close_drawer_and_overlays(driver)
        for k in cand_keys:
            if k in processed_cache: processed_cache.discard(k)
        return False

# ---------- navegaci√≥n con paginaci√≥n ----------
def go_to_list(driver):
    if not driver.current_url.startswith(LIST_URL):
        driver.get(LIST_URL)
    wait_present(driver, By.XPATH, TABLE_ROWS_XPATH, timeout=WAIT)

def iterate_pages(driver):
    set_items_per_page(driver, preferred=PREFERRED_PAGE_SIZE)

    page = 1
    processed_total = 0
    processed_cache = set()

    while True:
        rows_now = list_rows(driver)
        total = len(rows_now)
        if total == 0:
            print("‚ö†Ô∏è  No se detectaron filas. Revisa TABLE_ROWS_XPATH.")
            break

        print(f"\n== P√°gina {page} | Filas visibles: {total} ==")

        for idx in range(1, total + 1):
            if MAX_ROWS is not None and processed_total >= MAX_ROWS:
                print(f"\n‚èπ L√≠mite MAX_ROWS alcanzado: {MAX_ROWS}")
                return

            print(f"[{idx}]", end=" ", flush=True)
            try:
                _ = process_row_by_index(driver, idx, processed_cache)
            except StaleElementReferenceException:
                try:
                    time.sleep(0.5)
                    _ = process_row_by_index(driver, idx, processed_cache)
                except Exception as e:
                    print(f"   ‚ùå Error inesperado en fila #{idx}: {e}")
            except Exception as e:
                print(f"   ‚ùå Error inesperado en fila #{idx}: {e}")

            processed_total += 1
            time.sleep(PAUSE_BETWEEN_ROWS)

        if go_next_page(driver):
            page += 1
            continue
        else:
            print("No hay m√°s p√°ginas (o bot√≥n 'Next' no disponible).")
            break

    print("\n‚úÖ Proceso terminado.")

# ---------- main ----------
def main():
    ensure_dirs()
    driver = build_driver_remote("127.0.0.1:9222")
    try:
        go_to_list(driver)
        iterate_pages(driver)
        print(f"\nüìÇ Descargas locales: {DOWNLOAD_DIR}")
        print(f"ü§ù Carpeta compartida: {SHARED_DIR}")
    finally:
        try: driver.quit()
        except Exception: pass

if __name__ == "__main__":
    main()


== P√°gina 1 | Filas visibles: 100 ==
[1]    #1 ‚è≠Ô∏è  Ya existe PDF en compartida. Saltando.
[2]    #2 ‚è≠Ô∏è  Ya existe PDF en compartida. Saltando.
[3]    #3 ‚è≠Ô∏è  Ya existe PDF en compartida. Saltando.
[4]    #4 ‚è≠Ô∏è  Ya existe PDF en compartida. Saltando.
[5]    #5 ‚è≠Ô∏è  Ya existe PDF en compartida. Saltando.
[6]    #6 ‚è≠Ô∏è  Ya existe PDF en compartida. Saltando.
[7]    #7 ‚è≠Ô∏è  Ya existe PDF en compartida. Saltando.
[8]    #8 ‚è≠Ô∏è  Ya existe PDF en compartida. Saltando.
[9]    #9 ‚è≠Ô∏è  Ya existe PDF en compartida. Saltando.
[10]    #10 ‚è≠Ô∏è  Ya existe PDF en compartida. Saltando.
[11]    #11 ‚è≠Ô∏è  Ya existe PDF en compartida. Saltando.
[12]    #12 ‚è≠Ô∏è  Ya existe PDF en compartida. Saltando.
[13]    #13 ‚è≠Ô∏è  Ya existe PDF en compartida. Saltando.
[14]    #14 ‚è≠Ô∏è  Ya existe PDF en compartida. Saltando.
[15]    #15 ‚è≠Ô∏è  Ya existe PDF en compartida. Saltando.
[16]    #16 ‚è≠Ô∏è  Ya existe PDF en compartida. Saltando.
[17]    #17 ‚è≠Ô∏è  Ya existe PDF e