Notes:

1. If you had not, install selenium using `pip install selenium webdriver-manager`
2. instead of storing strings & numbers, or generic objects, it is wiser to store typed objects; two good choices are
    1. __[namedtuple](https://docs.python.org/3/library/collections.html#collections.namedtuple)__
    2. __[dataclass](https://docs.python.org/3/library/dataclasses.html)__

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
from dataclasses import dataclass, field
from typing import Optional, List
import re
import time

url = 'https://www.imdb.com/search/title/?title_type=tv_movie,feature&release_date=2024-01-01,2024-12-31&country_of_origin=ES'

@dataclass
class MovieInfo:
    title: str
    url: str
    imdbRating: Optional[float] = None
    imdbVotes: Optional[int] = None
    metascore: Optional[int] = None
    directors: List[str] = field(default_factory=list)
    thespians: List[str] = field(default_factory=list)

def scrapeIMDbMoviesWithSlidingWindow(someURL):
    driver = webdriver.Chrome()
    driver.get(someURL)
    
    moviesList = []
    pageBatchSize = 50
    batchCounter = 0

    while True:
        # Wait until new batch is loaded
        WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CLASS_NAME, "ipc-metadata-list-summary-item"))
        )
        time.sleep(0.5)  # slight buffer to ensure render

        # Parse and collect current visible 50 movies
        pageSource = driver.page_source
        soup = BeautifulSoup(pageSource, 'html.parser')
        movieItems = soup.select("ul.ipc-metadata-list > li")

        newMovies = []

        for li in movieItems:
            try:
                titleBlock = li.select_one("div.dli-parent h3")
                if not titleBlock:
                    continue
                title = titleBlock.text.strip()
                movieURL = "https://www.imdb.com" + li.select_one("a")["href"]
                imdbRating = li.select_one("span.ipc-rating-star--rating")
                voteCount = li.select_one("span.ipc-rating-star--voteCount")
                metascore = li.select_one("span.metacritic-score-box")

                movie = MovieInfo(title, movieURL, imdbRating.text if imdbRating else None,
                                    voteCount.text if voteCount else None, metascore.text if metascore else None, [], [])
                newMovies.append(movie)
            except Exception as e:
                print(f"Parse error: {e}")

        print(f"✅ Batch #{batchCounter+1}: {len(newMovies)} movies scraped")
        print(newMovies)
        moviesList.extend(newMovies)
        batchCounter += 1

        # Remove the first 50 <li> elements to avoid bloat
        driver.execute_script("""
            const ul = document.querySelector("ul.ipc-metadata-list");
            const lis = ul.querySelectorAll("li");
            for (let i = 0; i < 50 && i < lis.length; i++) {
                lis[i].remove();
            }
        """)

        time.sleep(0.5)  # Allow DOM to settle after deletion

        # Try clicking the “More” button
        try:
            button = WebDriverWait(driver, 5).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "ipc-see-more__button"))
            )
            buttonText = driver.execute_script("return arguments[0].innerText;", button)
        except Exception as e:
            print("🛑 button retrieval failure")
            print(repr(e))
            break
        try:
            driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", button)
        except Exception as e:
            print("🛑 scroll failure")
            print(repr(e))
            break
        try:
            time.sleep(0.5)
        except Exception as e:
            print("🛑 sleep failure")
            print(repr(e))
            break
        try:
            driver.execute_script("arguments[0].click();", button)
        except Exception as e:
            print("🛑 Clickety failure")
            print(repr(e))
            break
        try:
            print(f"🔁 Clicked ‘Load more’ (#{batchCounter})")
        except Exception as e:
            print("🛑 batchCounter print failure")
            print(repr(e))
            break
        try:
            match = re.search(r"(\d+)", buttonText)
            print(match)
            if match:
                pageBatchSize = int(match.group(1))
            else:
                pageBatchSize = 50  # fallback/default
            print(f"'{buttonText}' is parsed as announcing {pageBatchSize} more movies")
        except Exception as e:
            print("🛑 update of batchSize: failure")
            print(repr(e))
            break

            # Wait for at least one new <li> to load
        try:
            WebDriverWait(driver, 10).until(
                lambda d: len(d.find_elements(By.CLASS_NAME, "ipc-metadata-list-summary-item")) >= pageBatchSize
            )
        except Exception as e:
            print("🛑 Lambda failure in WebDriverWait")
            print(repr(e))
            break

    driver.quit()
    return moviesList

movies = scrapeIMDbMoviesWithSlidingWindow(url)

✅ Batch #1: 50 movies scraped
[MovieInfo(title='1. The Penguin Lessons', url='https://www.imdb.com/title/tt26677014/?ref_=sr_i_1', imdbRating='7.3', imdbVotes='\xa0(570)', metascore='57', directors=[], thespians=[]), MovieInfo(title='2. Rich Flu', url='https://www.imdb.com/title/tt17677434/?ref_=sr_i_2', imdbRating='5.5', imdbVotes='\xa0(5.3K)', metascore=None, directors=[], thespians=[]), MovieInfo(title="3. The Killer's Game", url='https://www.imdb.com/title/tt0327785/?ref_=sr_i_3', imdbRating='5.8', imdbVotes='\xa0(17K)', metascore='36', directors=[], thespians=[]), MovieInfo(title='4. The Room Next Door', url='https://www.imdb.com/title/tt29439114/?ref_=sr_i_4', imdbRating='6.8', imdbVotes='\xa0(19K)', metascore='70', directors=[], thespians=[]), MovieInfo(title='5. Spellbound', url='https://www.imdb.com/title/tt7215232/?ref_=sr_i_5', imdbRating='5.6', imdbVotes='\xa0(7K)', metascore='54', directors=[], thespians=[]), MovieInfo(title='6. Your Fault', url='https://www.imdb.com/title

In [2]:
print(movies)



In [3]:
print(len(movies))

473
