In [133]:
import os
import time
import base64
import pandas as pd
from PIL import Image
from io import BytesIO
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

In [142]:


class ImageDownloader:
    """
    A class to download images from Google Images.

    Parameters:
        path (str): The path to store the downloaded images and CSV file.
        srch (str): The XPath for the search box on the Google Images page.
        second_image (str): The XPath for the second image on the preview page.
        keyword (str): The keyword to search for.
        num_images (int): The number of images to download.
    """

    def __init__(self, path, srch, second_image, keyword, num_images):
        self.path = path
        self.srch = srch
        self.second_image = second_image
        self.keyword = keyword
        self.num_images = num_images
        self.driver = webdriver.Safari()

    def create_directory(self):
        """
        Create the directory to store the downloaded images and CSV file.
        """
        os.makedirs(self.path, exist_ok=True)

    def download_images(self):
        """
        Download images based on the specified parameters.
        """
        
        self.driver.get("http://www.google.com") #Go to google.com
        
        search_box = self.driver.find_element(By.XPATH, self.srch)
        search_box.send_keys(self.keyword)
        search_box.send_keys(Keys.RETURN) # In the search bar type the keyword

        time.sleep(2)

        images_link = self.driver.find_element(By.LINK_TEXT, "Images") # Go to the Images part of google
        images_link.click()
        time.sleep(2)
        image_elements = self.driver.find_elements(By.CSS_SELECTOR, '.rg_i') #Get all items that are images

        image_info = []
        for i, element in enumerate(image_elements):
            if i == self.num_images: # check the number of images if we reached the limit of predefined number
                break

            element.click()
            time.sleep(2)
            image_forlink = self.driver.find_element(By.XPATH, self.second_image) #Click on the image 
            url = image_forlink.get_attribute('src') #get the url of specific image

            image_url = element.get_attribute('src')

            title = element.get_attribute('alt') #get the title of an image
            if image_url is not None and image_url.startswith('data:image'): 
                encoded_data = image_url.split(',')[1]
                image = Image.open(BytesIO(base64.b64decode(encoded_data))) #transform the link that is available to the format from which we can save the images
                file_path = f'{self.path}/image_{i}.png'
                image.save(file_path)
                print(f"Image {i + 1} downloaded successfully.")
            image_info.append((title, url))

        df = pd.DataFrame(image_info, columns=['Title', 'URL'])
        df.to_csv(f'{self.path}/image_info.csv', mode='w', index=False) # Saving dataframe to csv 

    def quit_driver(self):
        """
        Quit the Selenium driver.
        """
        self.driver.quit()


In [143]:
if __name__ == "__main__":
    path = "Python_finalproject"
    srch = '/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]' #This should not be changed
    second_image = '//*[@id="Sva75c"]/div[2]/div/div[2]/div[2]/div[2]/c-wiz/div/div/div/div[3]/div[1]/a/img[1]' #This should not be changed
    keyword = 'Python funny'
    num_images = 5

    downloader = ImageDownloader(path, srch, second_image, keyword, num_images)
    downloader.create_directory()
    downloader.download_images()
    downloader.quit_driver()

Image 1 downloaded successfully.
Image 2 downloaded successfully.
Image 3 downloaded successfully.
Image 4 downloaded successfully.
Image 5 downloaded successfully.
