In [148]:
# all imports
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException, StaleElementReferenceException
from selenium.webdriver.support.wait import WebDriverWait
import os

In [162]:
# Set up service and options
chromdrive = Service(os.path.join('chromedriver.exe'))
opt = webdriver.ChromeOptions()
driver = webdriver.Chrome(service=chromdrive, options=opt)
driver.maximize_window()

In [150]:
# BASE URL for IMDB disney movies list
url = "https://www.imdb.com/list/ls026785255/"
driver.get(url)

In [151]:
# Scroll to bottom of page and load all elements
def load_full_page():
    last_height = driver.execute_script("return document.body.scrollHeight")

    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        try:
            WebDriverWait(driver, 10).until(
                lambda d: driver.execute_script("return document.body.scrollHeight") > last_height
            )
        except TimeoutException:
            break
        last_height = driver.execute_script("return document.body.scrollHeight")

In [152]:
load_full_page()
movie_links = [url.get_attribute('href') for url in driver.find_elements(By.XPATH, "//a[@class='ipc-title-link-wrapper']")]
len(movie_links)

136

In [153]:
disney_movies = []

In [154]:
# To dismiss any dialog box like ratings that open up while browsing
def dismiss_dialog_if_present(driver):
    try:
        close_button = WebDriverWait(driver, 5).until(
            EC.element_to_be_clickable((By.CLASS_NAME, 'ipc-icon ipc-icon--clear'))
        )
        close_button.click()  # Click to close the dialog
        print("Dialog box dismissed.")
    except Exception as e:
        pass

In [None]:
# Iterating and collecting all movies in disney_movies as list of dict
for movie_link in movie_links:
    try:
        # Navigate to the movie link
        driver.get(movie_link)
        load_full_page()  # Ensure the page is fully loaded

        # Check for the dialog box before scraping
        dismiss_dialog_if_present(driver)

        # Scrape the data
        name = WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.CLASS_NAME, 'hero__primary-text'))
        ).text
        
        # Check for the dialog box again before finding the video
        dismiss_dialog_if_present(driver)

        video = WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.XPATH, '//video[@class="jw-video jw-reset"]'))
        ).get_attribute('src')
        
        # Check for the dialog box again before finding the cover image
        dismiss_dialog_if_present(driver)

        cover_img = WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.XPATH, '//img[@class="ipc-image"]'))
        ).get_attribute('src')

        # Check for the dialog box again before finding genres
        dismiss_dialog_if_present(driver)

        genres = [{'name': genre.text, 'url': genre.get_attribute('href')} for genre in
                driver.find_elements(By.XPATH, '//a[@class="ipc-chip ipc-chip--on-baseAlt"]')]

        # Check for the dialog box again before finding the synopsis
        dismiss_dialog_if_present(driver)

        synopsis = WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.XPATH, '//span[@class="sc-55855a9b-0 dAbouZ"]'))
        ).text

        # Check for the dialog box again before finding the story
        dismiss_dialog_if_present(driver)

        story = WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.XPATH, '//div[@class="ipc-html-content-inner-div"]'))
        ).text

    except Exception as e:
        print(f"An unexpected error occurred: {e}")

    disney_movies.append({
        'name': name,
        'video': video,
        'cover_img': cover_img,
        'genres': genres,
        'synopsis': synopsis,
        'story': story
    })

In [161]:
disney_movies

[{'name': 'Snow White and the Seven Dwarfs',
  'video': 'https://imdb-video.media-imdb.com/vi854264089/1434659607842-pgv4ql-1616198319041.mp4?Expires=1728849484&Signature=JYIRqJM2dF36IE4JZR27UArXkQ0Rw1G3DVmnvozvlcLHXK-U8qB~BTAKaTKxtLQD1Wf7atV99JVmGGDCBMtvZOrTICGAJyNJq8slxsXPftqKGRX094RqYSHiUjzNAHbZwfFrbxoRtTJbNp7PgqJL4dTC-CoLespVrgziZueahAPJSSm3CF0I978pcvtjgJ3WcN-5enUqBjoxA~-0Apnj7Df91Kj7rqHLGQU~ij4oBE9TrOc8RF70ZiUsxxxeZspGG3yCiUKl~RA0S3tHX1lYusygnBMbRDKWr6MYAuU4mL8cA9SCGk4DmM2ex3fesPA9MYcodMeBY1ndCb5iJFGAPw__&Key-Pair-Id=APKAIFLZBVQZ24NQH3KA',
  'cover_img': 'https://m.media-amazon.com/images/M/MV5BMjAyNDM2MDA2NF5BMl5BanBnXkFtZTcwOTMxMDAxNA@@._V1_QL75_UX190_CR0,0,190,281_.jpg',
  'genres': [{'name': 'Computer Animation',
    'url': 'https://www.imdb.com/interest/in0000028/?ref_=tt_ov_in_1'},
   {'name': 'Jukebox Musical',
    'url': 'https://www.imdb.com/interest/in0000132/?ref_=tt_ov_in_2'},
   {'name': 'Adventure',
    'url': 'https://www.imdb.com/interest/in0000012/?ref_=tt_ov_in_3