In [28]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import requests
import os

In [29]:
captcha_site = 'https://www.google.com/recaptcha/api2/demo'
num_images = 10
output_folder = 'captchas/recaptcha/'

In [31]:
# create a new instance of the Chrome browser
driver = webdriver.Chrome()

# navigate to the website
driver.get(captcha_site)

# wait for reCAPTCHA iframe to load and switch to it
wait = WebDriverWait(driver, 10)
recaptcha_iframe = wait.until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, 'iframe[title="reCAPTCHA"]')))

# wait for div#rc-anchor-container to load and click on div.recaptcha-checkbox-border
recaptcha_wait = WebDriverWait(driver, 10)
recaptcha = recaptcha_wait.until(EC.presence_of_element_located((By.ID, 'rc-anchor-container')))
recaptcha.click()

# switch back to the main frame and wait for the "recaptcha challenge expires in two minutes" iframe to load
driver.switch_to.default_content()
challenge_iframe = wait.until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, 'iframe[title="recaptcha challenge expires in two minutes"]')))


# download the next image
for i in range(num_images):
    # find the captcha image element and get its source and class name
    image = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'img[src*="api2/payload"]')))
    image_src = image.get_attribute('src')
    class_name = image.get_attribute('class')

    # create subfolder for the class name of the image
    class_folder = os.path.join(output_folder, class_name)
    if not os.path.exists(class_folder):
        os.makedirs(class_folder)

    label_folder = None
    # get the label of the image
    if class_name=='rc-image-tile-44':
        label = driver.find_element(By.TAG_NAME, 'strong').text
        label_folder = os.path.join(class_folder, label)
        if not os.path.exists(label_folder):
            os.makedirs(label_folder)

    # download the image
    filename = f'captcha_{i}.png'
    response = requests.get(image_src)

    f_out = os.path.join(class_folder, filename) if label_folder is None else os.path.join(label_folder, filename)
    with open(f_out, "wb") as f:
        f.write(response.content)

    # find the reload button and click it
    reload_button = driver.find_element(By.ID,'recaptcha-reload-button')
    reload_button.click()

    # wait for the next image to load
    wait.until(EC.staleness_of(image))

# switch back to the main frame and quit the browser
driver.switch_to.default_content()
driver.quit()
