In [1]:
%pip install selenium

Note: you may need to restart the kernel to use updated packages.


In [2]:
# Action movies 

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import time

# Initialize WebDriver
driver = webdriver.Chrome()
driver.get("https://www.imdb.com/search/title/?title_type=feature&genres=action&release_date=2024-01-01,2024-12-31")

time.sleep(3)  # Waiting for the page to load

genre = "Horror"

# Scroll Until No More New Data Loads
scrolling = True
movies_per_page = 50  # Assuming each load gives 50 movies
total_movies_needed = 500
current_movies = 0

while scrolling and current_movies < total_movies_needed:
    old_page_source = driver.page_source  # Save old page source
    
    # Scroll down to load more data
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)  # Allow time for new data to load
    
    new_page_source = driver.page_source  # Save new page source

    if new_page_source == old_page_source:
        try:
            # Locate and click the "See More" button if present
            see_more_button = driver.find_element(By.XPATH, "//span[contains(@class, 'ipc-see-more')]")
            ActionChains(driver).move_to_element(see_more_button).click().perform()
            time.sleep(2)  # Wait for new content to load
        except Exception:
            scrolling = False  # Stop scrolling if button isn't found
    else:
        current_movies += movies_per_page  # Increment count

# Extract movie containers
movie_blocks = driver.find_elements(By.CSS_SELECTOR, "li.ipc-metadata-list-summary-item")

movies_list = []

for movie in movie_blocks:
    try:
        title = movie.find_element(By.CSS_SELECTOR, "h3.ipc-title__text").text.strip()
        title = title.split(". ", 1)[-1]  # Removes index number if present
    except:
        title = "N/A"

    try:
        duration_element = movie.find_element(By.XPATH, ".//span[contains(@class, 'dli-title-metadata-item') and (contains(text(),'h') or contains(text(),'m'))]")
        duration = duration_element.text.strip() if duration_element.text.strip() else "N/A"
    except:
        duration = "N/A"

    try:
        rating = movie.find_element(By.CSS_SELECTOR, "span.ipc-rating-star--rating").text.strip()
    except:
        rating = "N/A"

    try:
        voting = movie.find_element(By.CSS_SELECTOR, "span.ipc-rating-star--voteCount").text.strip()
    except:
        voting = "N/A"

    movie_data = {
        "Title": title,
        "Genre": genre,
        "Duration": duration,
        "Rating": rating,
        "Votes": voting,
    }
    movies_list.append(movie_data)


# Print results
for movie in movies_list:
    print(movie)

# Close the driver
driver.quit()

{'Title': 'Dune: Part Two', 'Genre': 'Horror', 'Duration': '2h 46m', 'Rating': '8.5', 'Votes': '(610K)'}
{'Title': 'Venom: The Last Dance', 'Genre': 'Horror', 'Duration': '1h 50m', 'Rating': '6.0', 'Votes': '(105K)'}
{'Title': 'Gladiator II', 'Genre': 'Horror', 'Duration': '2h 28m', 'Rating': '6.6', 'Votes': '(213K)'}
{'Title': 'Sonic the Hedgehog 3', 'Genre': 'Horror', 'Duration': '1h 50m', 'Rating': '6.9', 'Votes': '(51K)'}
{'Title': 'The Lord of the Rings: The War of the Rohirrim', 'Genre': 'Horror', 'Duration': '2h 14m', 'Rating': '6.3', 'Votes': '(27K)'}
{'Title': 'William Tell', 'Genre': 'Horror', 'Duration': '2h 13m', 'Rating': '5.8', 'Votes': '(1.4K)'}
{'Title': 'Kraven the Hunter', 'Genre': 'Horror', 'Duration': '2h 7m', 'Rating': '5.4', 'Votes': '(42K)'}
{'Title': 'Pushpa: The Rule - Part 2', 'Genre': 'Horror', 'Duration': '3h 21m', 'Rating': '6.1', 'Votes': '(53K)'}
{'Title': 'Furiosa: A Mad Max Saga', 'Genre': 'Horror', 'Duration': '2h 28m', 'Rating': '7.5', 'Votes': '(278K

In [3]:
import pandas as pd

df=pd.DataFrame(movies_list)
df.to_csv("Horror.csv")