In [91]:
from pathlib import Path

# Directory Paths
root_dir = Path("/Users/alextsagkas/Document/Office/solar_panels")
data_dir = root_dir / "data"
download_dir = data_dir / "download"

In [92]:
import requests
import io
from PIL import Image
import os

def download_image(download_path, url, file_name):
    try:
        image_content = requests.get(url).content
        image_file = io.BytesIO(image_content)
        image = Image.open(image_file).convert('RGB')

        os.makedirs(download_path, exist_ok=True)
        file_path = download_path / file_name

        with open(file_path, 'wb') as f:
            image.save(f, "JPEG")
        
        print(f"[INFO] Image {file_name} downloaded successfully.")
    except Exception as e:
        print(f"[ERROR] {str(e).capitalize()}")

In [94]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time

# Scroll down the page to get more images
def scroll_down(wd, delay):
    wd.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(delay)

def get_images_from_google(url, delay, max_images):
    wd = webdriver.Chrome()
    wd.get(url)

    # Click Reject all button
    buttons = wd.find_elements(
        by=By.CLASS_NAME,
        value="Nc7WLe"
    )
    for button in buttons:
        if button.text == "Reject all":
            button.click()
            break

    image_urls = set() 

    while len(image_urls) < max_images:
        scroll_down(wd, delay)

        thumbnails = wd.find_elements(
            by=By.CLASS_NAME, 
            value="Q4LuWd"
        )

        print("thumbnails found: ", len(thumbnails))

        for img in thumbnails:
            try:
                img.click()
                time.sleep(delay)
            except Exception:
                continue
            
            # Find big image (the one that is displayed after interaction)
            images = wd.find_elements(
                by=By.CLASS_NAME, 
                value="iPVvYb"
            )

            print(f"Images with class iPVvYb found: {len(images)}")

            for image in images:
                image_src = image.get_attribute("src")

                if image_src is not None and "http" in image_src:  # type: ignore
                    image_urls.add(image_src)
                    print(f"Total image urls collected: {len(image_urls)}")

            if len(image_urls) >= max_images:
                break

    wd.quit()

    return image_urls



thumbnails found:  100
Images with class iPVvYb found: 1
Total image urls collected: 1
Images with class iPVvYb found: 1
Total image urls collected: 2
Images with class iPVvYb found: 1
Total image urls collected: 3
Images with class iPVvYb found: 0
Images with class iPVvYb found: 1
Total image urls collected: 4
Images with class iPVvYb found: 1
Total image urls collected: 5
Images with class iPVvYb found: 1
Total image urls collected: 6
Images with class iPVvYb found: 1
Total image urls collected: 7
Images with class iPVvYb found: 1
Total image urls collected: 8
Images with class iPVvYb found: 1
Total image urls collected: 9
Images with class iPVvYb found: 1
Total image urls collected: 10
[INFO] Image 0.jpg downloaded successfully.
[INFO] Image 1.jpg downloaded successfully.
[INFO] Image 2.jpg downloaded successfully.
[INFO] Image 3.jpg downloaded successfully.
[INFO] Image 4.jpg downloaded successfully.
[INFO] Image 5.jpg downloaded successfully.
[INFO] Image 6.jpg downloaded successf

In [95]:
url = "https://www.google.com/search?client=safari&sca_esv=563382129&rls=en&sxsrf=AB5stBjcBYJvBNXbZkQoxcLP8mUEu8OMlQ:1694090812474&q=solar+panel+images&tbm=isch&source=lnms&sa=X&ved=2ahUKEwjN0OCIxJiBAxXvV0EAHalWC-AQ0pQJegQICBAB&biw=1016&bih=1175&dpr=2"

image_urls = get_images_from_google(url, 0.8, 2)

for i, url in enumerate(image_urls):
    download_image(download_dir, url, f"{i}.jpg")

thumbnails found:  100
Images with class iPVvYb found: 0
Images with class iPVvYb found: 0
Images with class iPVvYb found: 0
Images with class iPVvYb found: 0
Images with class iPVvYb found: 0
Images with class iPVvYb found: 1
Total image urls collected: 1
Images with class iPVvYb found: 0
Images with class iPVvYb found: 1
Total image urls collected: 2
[INFO] Image 0.jpg downloaded successfully.
[INFO] Image 1.jpg downloaded successfully.
