# 1. Dependencias
Descargamos el chrome driver: https://sites.google.com/chromiun.org/driver/downloads

E instalamos las dependencias necesarias

In [None]:
# pip install selenium webdriver-manager

In [14]:
import time
from bs4 import BeautifulSoup

from selenium import webdriver
from selenium.webdriver.chrome.service import Service

# 2. Configuración del Servicio y Driver

In [15]:
# Ruta del chrome driver
chrome_driver_path = "chromedriver.exe" #  El archivo que está en la carpeta


# Opcional: configurar opciones
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized") #  Iniciar en pantalla completa
# Hacer menos detectable el scraper
options.add_argument("--disable-blink-features=AutomationControlled")


# Crear el servicio y el driver
service = Service(executable_path=chrome_driver_path)
driver = webdriver.Chrome(service=service, options=options)



# 3. Definir URL



In [16]:
# abrir la url
url = "http://quotes.toscrape.com/scroll"
driver.get(url)

# Esperar un momento a que se renderizen las frases
time.sleep(3)

# 4. Manipular Renderizado

In [17]:
# Obtener HMTL renderizado
html = driver.page_source
driver.quit()

# Procesar el HTML con BeutifulSoup
soup = BeautifulSoup(html, "html.parser")
# buscar el elemento que contiene las frases
quotes = soup.select("div.quote")

print("Citas encontradas: ", len(quotes)) # obtenemos las citas

# guardar cada cita
for quote in quotes:
    text = quote.find("span", class_="text").get_text()
    author = quote.find("small", class_="author").get_text()
    print(f"{text} - {author}")


Citas encontradas:  10
“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.” - Albert Einstein
“It is our choices, Harry, that show what we truly are, far more than our abilities.” - J.K. Rowling
“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.” - Albert Einstein
“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.” - Jane Austen
“Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.” - Marilyn Monroe
“Try not to become a man of success. Rather become a man of value.” - Albert Einstein
“It is better to be hated for what you are than to be loved for what you are not.” - André Gide
“I have not failed. I've just found 10,000 ways that won't work.” - Thomas A. Edison
“A woman is like a tea bag; you never know how strong it is until it's in hot

# 6. Scroll infinito

In [33]:
# Ruta del chrome driver
chrome_driver_path = "chromedriver.exe"  # El archivo que está en la carpeta


# Opcional: configurar opciones
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")  # Iniciar en pantalla completa
# Hacer menos detectable el scraper
options.add_argument("--disable-blink-features=AutomationControlled")


# Crear el servicio y el driver
service = Service(executable_path=chrome_driver_path)
driver = webdriver.Chrome(service=service, options=options)

In [34]:
# abrir la url
url = "http://quotes.toscrape.com/scroll"
driver.get(url)

# Esperar un momento a que se renderizen las frases
time.sleep(3)

In [35]:
# import necesario para la implementación
from selenium.webdriver.common.by import By


SCROLL_PAUSE_TIME = 2
# obtenemos la altura del sitio, indispensable para el scroll
last_height = driver.execute_script("return document.body.scrollHeight")
quotes_set = set()

# Desplazarse varias veces
for i in range(10):
    # hacer el scrool
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    # tiempo de pausa para esperar que se renderice
    new_height = driver.execute_script("return document.body.scrollHeight")

    # Extraer las nuevas frases cargadas en está iteración
    quotes = driver.find_elements(By.CLASS_NAME, "quote")
    # Procesar los datos obtenidos
    for quote in quotes:
        # obtener cada frase
        text = quote.find_element(By.CLASS_NAME, "text").text
        # añadirla a la lista de frases
        quotes_set.add(text)

    # si llega al final se cierra
    if new_height == last_height:
        break

    # se obtiene la nueva altura
    last_height = new_height

driver.quit()

In [36]:
print(f"Total de frases únicas cargadas: {len(quotes_set)}")
for quote in quotes_set:
    print(quote)

Total de frases únicas cargadas: 80
“Life is what happens to us while we are making other plans.”
“I have always imagined that Paradise will be a kind of library.”
“A reader lives a thousand lives before he dies, said Jojen. The man who never reads lives only one.”
“If you can't explain it to a six year old, you don't understand it yourself.”
“Beauty is in the eye of the beholder and it may be necessary from time to time to give a stupid or misinformed beholder a black eye.”
“You don’t forget the face of the person who was your last hope.”
“Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.”
“To love at all is to be vulnerable. Love anything and your heart will be wrung and possibly broken. If you want to make sure of keeping it intact you must give it to no one, not even an animal. Wrap it carefully round with hobbies and little luxuries; avoid all entanglements. Lock it up safe in the casket or coffin of your selfishness. But