In [3]:
pip install selenium pillow

Note: you may need to restart the kernel to use updated packages.


In [12]:
import os
import time
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from PIL import Image
from io import BytesIO

# --- Configurable Variables ---
# SEARCH_QUERY = "Kalanchoe Tomentosa"
# DOWNLOAD_DIR = "./kalanchoe-kalanchoe_tomentosa"
NUM_IMAGES = 300
HEADLESS = True
# ------------------------------

class Plant():
    def __init__(self, search_query, download_dir):
        self.search_query = search_query
        self.download_dir = download_dir

def create_driver(headless=True):
    chrome_options = Options()
    if headless:
        chrome_options.add_argument("--headless")
        chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--log-level=3")
    driver = webdriver.Chrome(options=chrome_options)
    return driver

def scroll_and_click_see_more(driver):
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)

        try:
            see_more_button = WebDriverWait(driver, 2).until(
                EC.element_to_be_clickable((By.CSS_SELECTOR, "a.btn_seemore"))
            )
            print("Clicking 'See more images' button...")
            ActionChains(driver).move_to_element(see_more_button).click().perform()
            time.sleep(2)
        except:
            # Button not found or not clickable
            break

def collect_thumbnails(driver, max_images):
    thumbnails = set()
    while len(thumbnails) < max_images:
        scroll_and_click_see_more(driver)
        new_thumbs = driver.find_elements(By.CSS_SELECTOR, "img.mimg")
        thumbnails.update(new_thumbs)
        if len(new_thumbs) == 0 or len(thumbnails) >= max_images:
            break
    return list(thumbnails)[:max_images]

def download_images(thumbnails, download_path):
    os.makedirs(download_path, exist_ok=True)
    count = 0
    for i, thumb in enumerate(thumbnails):
        try:
            src = thumb.get_attribute("src")
            if src and src.startswith("http"):
                response = requests.get(src, timeout=10)
                img = Image.open(BytesIO(response.content)).convert("RGB")
                filename = f"IMG_{count+1:04}.jpg"
                img.save(os.path.join(download_path, filename), "JPEG")
                count += 1
                print(f"Downloaded {filename}")
        except Exception as e:
            print(f"Failed to download image {i}: {e}")
    print(f"Downloaded {count} images.")

def main(plants):
    for plant in plants:
        driver = create_driver(headless=HEADLESS)
        query_url = f"https://www.bing.com/images/search?q={plant.search_query.replace(' ', '+')}"
        driver.get(query_url)
        time.sleep(2)
    
        print("Collecting thumbnails...")
        thumbnails = collect_thumbnails(driver, NUM_IMAGES)
    
        print(f"Collected {len(thumbnails)} image thumbnails.")
        download_images(thumbnails, plant.download_dir)
    
        driver.quit()


In [14]:
main([
    Plant('Echeveria Perle von nurnberg', 'echeveria-echeveria_perle_von_nurnberg'),
    Plant('echeveria minima', 'echeveria-echeveria_minima'),
    Plant('Aloe vera', 'aloe-aloe_vera'),
    Plant('Acanthocereus tetragonus', 'acanthocereus-acanthocereus_tetragonus'),
    Plant('senecio vitalis', 'senecio-senecio_vitalis'),
    Plant('Lithops', 'lithops-lithops'),
    Plant('edum morganianum burrito', 'sedum-sedum_morganianum_burrito'),
])

Collecting thumbnails...
Clicking 'See more images' button...
Collected 300 image thumbnails.
Downloaded IMG_0001.jpg
Downloaded IMG_0002.jpg
Downloaded IMG_0003.jpg
Downloaded IMG_0004.jpg
Downloaded IMG_0005.jpg
Downloaded IMG_0006.jpg
Downloaded IMG_0007.jpg
Downloaded IMG_0008.jpg
Downloaded IMG_0009.jpg
Downloaded IMG_0010.jpg
Downloaded IMG_0011.jpg
Downloaded IMG_0012.jpg
Downloaded IMG_0013.jpg
Downloaded IMG_0014.jpg
Downloaded IMG_0015.jpg
Downloaded IMG_0016.jpg
Downloaded IMG_0017.jpg
Downloaded IMG_0018.jpg
Downloaded IMG_0019.jpg
Downloaded IMG_0020.jpg
Downloaded IMG_0021.jpg
Downloaded IMG_0022.jpg
Downloaded IMG_0023.jpg
Downloaded IMG_0024.jpg
Downloaded IMG_0025.jpg
Downloaded IMG_0026.jpg
Downloaded IMG_0027.jpg
Downloaded IMG_0028.jpg
Downloaded IMG_0029.jpg
Downloaded IMG_0030.jpg
Downloaded IMG_0031.jpg
Downloaded IMG_0032.jpg
Downloaded IMG_0033.jpg
Downloaded IMG_0034.jpg
Downloaded IMG_0035.jpg
Downloaded IMG_0036.jpg
Downloaded IMG_0037.jpg
Downloaded IMG_003