Web Scraping NASA website: https://mars.nasa.gov/mars2020/multimedia/raw-images/

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import os

def safe_click(element, driver):
    # Scroll element into view and then click using JavaScript.
    driver.execute_script("arguments[0].scrollIntoView(true);", element)
    time.sleep(1)
    driver.execute_script("arguments[0].click();", element)

# Set up the download directory and Chrome options for automatic download
download_dir = "mars_images"
os.makedirs(download_dir, exist_ok=True)

options = webdriver.ChromeOptions()
prefs = {
    "download.default_directory": os.path.abspath(download_dir),
    "download.prompt_for_download": False,
    "download.directory_upgrade": True,
    "safebrowsing.enabled": True
}
options.add_experimental_option("prefs", prefs)

driver = webdriver.Chrome(options=options)
driver.get("https://mars.nasa.gov/mars2020/multimedia/raw-images/")

wait = WebDriverWait(driver, 20)
first_iteration = True

while True:
    time.sleep(3)  # Allow page to load

    if first_iteration:
        # STEP 1 (first page only): Activate "select multiple images" mode.
        try:
            select_mode = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a.selections_mode")))
            safe_click(select_mode, driver)
            print("Clicked 'select multiple images'")
        except Exception as e:
            print("Warning: 'select multiple images' already active or not clickable; proceeding. Error:", e)

        time.sleep(2)
        # STEP 2 (first page only): Click "Select all on this page" link.
        try:
            select_all = wait.until(EC.element_to_be_clickable(
                (By.XPATH, "//a[normalize-space(text())='Select all on this page']")))
            safe_click(select_all, driver)
            print("Clicked 'Select all on this page'")
        except Exception as e:
            print("Error clicking 'Select all on this page':", e)
            break

        time.sleep(2)
        # STEP 3 (first page only): Click "Download selections" link.
        try:
            download_sel = wait.until(EC.element_to_be_clickable(
                (By.XPATH, "//a[contains(., 'Download selections')]")))
            safe_click(download_sel, driver)
            print("Clicked 'Download selections'")
        except Exception as e:
            print("Error clicking 'Download selections':", e)
            break
    else:
        # On subsequent pages, images are automatically selected, so just download.
        try:
            download_sel = wait.until(EC.element_to_be_clickable(
                (By.XPATH, "//a[contains(., 'Download selections')]")))
            safe_click(download_sel, driver)
            print("Clicked 'Download selections' on subsequent page")
        except Exception as e:
            print("Error clicking 'Download selections' on subsequent page:", e)
            break

    # STEP 4: Wait for the zip file download to complete.
    zip_downloaded = False
    download_timeout = time.time() + 120*4  # 8 minutes timeout
    while not zip_downloaded and time.time() < download_timeout:
        files = os.listdir(download_dir)
        if any(file.endswith(".zip") for file in files):
            zip_downloaded = True
            print("Zip file download completed.")
        else:
            time.sleep(5)

    if not zip_downloaded:
        print("Zip download did not complete within timeout.")
        break

    # STEP 5: Navigate to the next page using the page selector input.
    try:
        page_input = wait.until(EC.presence_of_element_located((By.ID, "footer_pagination")))
        # Get the current page number from the input's value attribute.
        current_page = int(page_input.get_attribute("value"))
        next_page = current_page + 1
        # Clear the input and enter the next page number.
        page_input.clear()
        page_input.send_keys(str(next_page))
        # Simulate pressing ENTER to trigger the page change.
        page_input.send_keys(Keys.RETURN)
        # Wait until the page number in the input updates to the new value.
        wait.until(lambda d: d.find_element(By.ID, "footer_pagination").get_attribute("value") == str(next_page))
        print(f"Navigated to next page: {next_page}")
    except Exception as e:
        print("Error navigating to next page:", e)
        break

    # After the first page, set the flag so subsequent pages only trigger the download.
    first_iteration = False

driver.quit()
