In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from bs4 import BeautifulSoup
import time
import pandas as pd

In [2]:
# URL for the first season
url = "https://www.imdb.com/title/tt0409591/episodes/?season=1"

# Initialize Selenium WebDriver
driver = webdriver.Chrome()  # Ensure the correct WebDriver version

# Open the URL with Selenium
driver.get(url)

try:
    # Wait for the "All" button to be clickable
    all_button = WebDriverWait(driver, 15).until(
        EC.element_to_be_clickable((By.XPATH, "//button[contains(@class, 'ipc-see-more__button')]"))
    )

    # Scroll to the "All" button to ensure it's in view
    ActionChains(driver).move_to_element(all_button).perform()

    # Click the "All" button using JavaScript
    driver.execute_script("arguments[0].click();", all_button)

    # Wait for the content to load
    time.sleep(5)  # Adjust if needed

    # Initialize variables for scrolling
    last_height = driver.execute_script("return document.body.scrollHeight")
    episodes = []

    # Loop to keep scrolling until no new episodes are loaded
    while True:
        # Scroll to the bottom of the page
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        
        # Wait for new content to load
        time.sleep(5)  # Adjust if needed

        # Check if there's a new "See More" button
        try:
            see_more_button = WebDriverWait(driver, 5).until(
                EC.presence_of_element_located((By.XPATH, "//button[contains(@class, 'ipc-see-more__button')]"))
            )
            # Click the "See More" button using JavaScript
            driver.execute_script("arguments[0].click();", see_more_button)
            time.sleep(5)  # Wait for new content to load
        except:
            # No more "See More" buttons, break the loop
            break

        # Get the page source and parse it
        season_page = BeautifulSoup(driver.page_source, 'html.parser')
        episode_tiles = season_page.find_all('div', class_='sc-ccd6e31b-1 ggXjkj')

        # Extract episode details and append to the list
        for episode in episode_tiles:
            episode_name = episode.find('h4').a.text.strip()
            episode_airdate = episode.find('span', class_='sc-ccd6e31b-10 dYquTu').text.strip()
            episode_description = episode.find('div', class_='ipc-html-content-inner-div').text.strip()
            episode_rating = episode.find('span', class_='ipc-rating-star--rating').text.strip()
            episode_votes = episode.find('span', class_='ipc-rating-star--voteCount').text.strip()

            episodes.append({
                'Name': episode_name,
                'Air Date': episode_airdate,
                'Description': episode_description,
                'Rating': episode_rating,
                'Votes': episode_votes
            })

        # Check if the page height has changed
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

    # Create a DataFrame and save to CSV
    df = pd.DataFrame(episodes)
    df.to_csv('episodes.csv', index=False)

finally:
    # Close the browser
    driver.quit()