In [1]:
import requests
import mimetypes
import uuid
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

In [2]:
# Google image search term
query = "chilidogs"

# Full search url with search term
url = f"https://www.google.com/search?tbm=isch&q={query}"

# Start Selenium to run our search
driver = webdriver.Chrome()
driver.get(url)

In [3]:
# Get the body element containing our search results
body = driver.find_element_by_css_selector('body')

# The following elements are used to scroll through results and find the end of the page
show_more_button = body.find_element_by_xpath('//input[@value="Show more results"]')
end_of_page = body.find_element_by_xpath('//div[text() = "Looks like you\'ve reached the end"]')

# Scroll to bottom of search results - this is done because not all images are loaded
while not show_more_button.is_displayed() or not end_of_page.is_displayed():
    body.send_keys(Keys.PAGE_DOWN)
    if show_more_button.is_displayed():
        show_more_button.click()
    if end_of_page.is_displayed():
        break

# Extract potential image urls
imgs = body.find_elements_by_class_name("rg_i")

# Keep only items with "https://" in the src attribute
img_urls = [
    img.get_attribute('src') for img in imgs
    if img.get_attribute('src')
    and "https://" in img.get_attribute('src')
]

# Close out the browser as we no longer need it for the heavy lifting
driver.close()

In [4]:
# Save images to this directory
data_directory = "./data/google_images/"

# Iterate through our image urls and save images locally
for url in img_urls:
    print(f"Downloading {url}")
    resp = requests.get(url)
    mime_type = resp.headers.get('Content-Type')
    extension = mimetypes.guess_extension(mime_type)
    filename = uuid.uuid5(uuid.NAMESPACE_URL, url).hex
    with open(f"{data_directory}{filename}{extension}", "wb") as f:
        f.write(resp.content)

Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSUtmoOc7OVwbA-wW5f7dRC-MArFtAtk3FUyw&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRpQfM0NNujl7w27m-tF8Mg-WDoIQSc72OUSA&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRrCwVgCrq1mCBFZV8dFUod3unvDDnQ3vwCmw&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTNXJJg91MM9gCsACfS-sJtE3SwOT3FvGQ1vw&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTuvlhzuNunjSBANCwi2KNMtCY3M96x65ohsw&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQhm9A74A5qjT9LQlS5cObqmWDVmkXAXaw8Ew&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTOH5usC2IxJzXgjvGLByn0akeBQFTcfKIJTg&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcThwHln5ZzPGHs8ZXe-7jk1zSsOTKjhvzoylQ&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRlg8DZVXAUNMhxXQxwcmAwbnZgoKhgBehh2Q&u

Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTRS5dPUg4oAMBna15BdG4F2OwfitbBKZB4Bg&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRWux4uAeiy1nrPHlFw1EBVO51onU_EZ2V5Qw&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQr3Pu0kUVGsRygx-IR96SRnwIprthMYxFQoQ&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSve5SdSgNh37IdXml9urjg7WTdp66ttTqFag&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcT5Dx5gABk9IQpg9kmI6wxpm9HUR37mND7PqQ&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcToI3fTCkY_LJmQUQWN8okCb7YQL8_oEzaBcQ&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSmJD8KpApGkxKUCSWbT9qWl7HQeWJ84V78lQ&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQq2W_dIMPGgWSW_T4c_GlQjXDfclEzkEF4Cw&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRX8vLhpk7QiAExj84qoMqCR0K9dkD2vGdamg&u

Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcR9fMGBbEvOFHPDHH4nAca7IJf7LSxHL79Miw&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSyfvAe_vL54vAdlbMWOJgCDWGiVqGfesQFnw&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcR8RbUkJ1KtPAc_R_NWiAlJTWzETI7WEAtgpQ&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ__OksqIsP5uuM8UfWp0L3A_o0pWX3IPxd8w&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRq5wCpHmqM3zSgTiqVQC93AP8hFA-yCpMY_A&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSZy2y2slHd0UJigCtkdqNBQBBthjCbCyyIhw&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQKMeNZtP3R9d9JB-Mh5PvN7ovZb8OWn0erpA&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSLY490eDT3k-Ow_TtAaqpjGa2BeHljHC08XA&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRkvtmg2e85pE_GNDufvsOS1_L67m7N7gug4A&u

Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcT-9dDNMcUK7YjZ3SaIluM4L3h1uulbcPtjgA&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRLeTITI5WuRyDLsUjzsRIEpplG60e9Vkt0eQ&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTnq5_poL1pHAoC8-NRVgRv6qtGL2racKyGaQ&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQS36xq4LogJel2U9w-_DO9-NXRXQByx05qYw&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSN3Ab7JKN2HLOhqnw_AX0pPbF6_80d7I4V6Q&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSI8kn_HTBlKXtLF6zIp4bgzVuJ_WQOble4lw&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTbBGj8k1Swt2EA0H-SHUZfH58jPJQ9jWMsmA&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSm1x8HfSexncZ7KIJ_uE2FCuNzzHcbn1Z7jA&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTP1MJccXBXmTS4mQU9NpwuRVieXuGXwtN7mg&u

Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQTsRgmo9v0p4wkYn5us9YdxudICPXsHO4F3A&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSJVjDgyLJ8KdI8Up5mUrH-xtYGBfw3s8_1HA&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTWDjPISKmAgHzM1uG1ZGte-OVReGkxBqQbCQ&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRv2zKsGhtFcPbVvS_HvACw86CSMjDP3dJEOg&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRnloYrrlIkvSinCXIqgaYeZ4JVj40WoKz7-w&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTUxMptZEyGnTD4zZRlm9kb9brnN26kY7IvyQ&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcStMK2AEed0ojfhzCaoZMtHvzDLgPc-kpY_JQ&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTSjI49-5B7ThBfyE7vFzhhXYGbcGOzVbRL7g&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRbjxyw6VCv74UcK9GumwfsAcEmjVFXP5ikNQ&u

Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSIu_tMs7fefINrFV1wM_HMWxeYqfEAYRIUAQ&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRd0u_dVIVAR6lmH7iqnEJbtkTm15keAH5csQ&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQbHdmK-Z33A-H8KV-hNfkschkJocM59vKySA&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRnM4-vHOB7TM68Y93JmrAtjDgwjaRAYznLAw&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTYeA_a36wKg1mzT3UztbvjVsz-DxuM42n-aA&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSoxP0-DCYntury53ztQDmlctFUhJAb6-Nprw&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSUOQ9z_AobzrMtRRpiNFn6etI3W_Spw-82Hw&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRnHFC_fr0ffR3T02f-OKgFEWo5jdtlsfqPSg&usqp=CAU
Downloading https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTzNxfL_ikRytFAlwC9DYP5FhjaRwG2DqH5lg&u