In [None]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, ElementClickInterceptedException, NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager

# Genres to scrape
genres = ["Action", "Adventure",  "Horror", "Mystery", "Romance", "Thriller"]

# Setup driver
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
wait = WebDriverWait(driver, 10)

for genre in genres:
    data = []
    print(f"\n--- Scraping {genre} movies ---")
    
    # IMDb search page for the genre (2024 movies only, count=200 per page)
    url = f"https://www.imdb.com/search/title/?title_type=feature&release_date=2024-01-01,2024-12-31&genres={genre.lower()}&count=200"
    driver.get(url)
    time.sleep(3)  # Wait for page to load

    # Keep loading until no "Load More" button
    print("  Loading all movies...")
    while True:
        try:
            load_more_btn = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.ipc-see-more__button")))
            driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", load_more_btn)
            time.sleep(2)
            load_more_btn.click()
            time.sleep(3)  # Wait for new movies to load
            print(f"  Loaded {len(driver.find_elements(By.CSS_SELECTOR, '.ipc-metadata-list-summary-item'))} movies so far")
        except (TimeoutException, ElementClickInterceptedException, NoSuchElementException):
            print("  No more movies found to load.")
            break

    # Now scrape all the loaded movies
    movie_elements = driver.find_elements(By.CSS_SELECTOR, ".ipc-metadata-list-summary-item")
    total_movies = len(movie_elements)
    print(f"  Finished loading. Total movies available for {genre}: {total_movies}")

    for i, m in enumerate(movie_elements, start=1):
        try:
            # Title
            title_element = m.find_element(By.CSS_SELECTOR, "h3.ipc-title__text")
            title = title_element.text.strip()

            # Metadata (year, duration, etc.)
            metadata_elements = m.find_elements(By.CSS_SELECTOR, ".sc-15ac7568-7.cCsint.dli-title-metadata-item")
            duration = metadata_elements[1].text if len(metadata_elements) > 1 else "N/A"

            # Rating
            try:
                rating = m.find_element(By.CSS_SELECTOR, "[data-testid='ratingGroup--container'] span.ipc-rating-star--rating").text
            except:
                rating = "N/A"

            # Voters
            try:
                voters_element = m.find_element(By.CSS_SELECTOR, "[data-testid='ratingGroup--container'] span.ipc-rating-star--voteCount")
                voters = voters_element.text.strip(' ()') if voters_element else "N/A"
            except:
                voters = "N/A"

            data.append({
                "title": title,
                "duration": duration,
                "rating": rating,
                "voters": voters,
                "genre": genre
            })

            if i % 50 == 0:
                print(f"  Scraped {i} movies")

        except Exception as e:
            print(f"  Error parsing movie {i}: {e}")
            continue

    # Save results to CSV
    if data:
        df = pd.DataFrame(data)
        filename = f"imdb_{genre.lower()}_{len(df)}.csv"
        df.to_csv(filename, index=False)
        print(f"  ✅ Saved {len(df)} movies to {filename}")
    else:
        print(f"  ❌ No data collected for {genre}")

driver.quit()

