<a href="https://colab.research.google.com/github/chaupmbn/Image_Retrieval/blob/master/Flickr_Image_Crawling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install tqdm
!pip install selenium

!apt-get update
!apt-get install -y wget
!apt-get install -y chromium-browser
!apt-get install -y chromium-chromedriver

Collecting selenium
  Downloading selenium-4.23.1-py3-none-any.whl.metadata (7.1 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.26.2-py3-none-any.whl.metadata (8.6 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.11.1-py3-none-any.whl.metadata (4.7 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Collecting h11<1,>=0.9.0 (from wsproto>=0.14->trio-websocket~=0.9->selenium)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading selenium-4.23.1-py3-none-any.whl (9.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.4/9.4 MB[0m [31m58.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trio-0.26.2-py3-none-any.whl (475 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m476.0/476.0 kB[0m [31m25.

In [3]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

from bs4 import BeautifulSoup # For parsing HTML content
from urllib.parse import urljoin, urlparse # For handling URLs
import urllib.request # For making HTTP requests
import time # For handling time-related operations
import os # For interacting with the operating system (relate to dir, folder, file)
from tqdm import tqdm # For displaying progress bars (visualize progress)
import concurrent.futures # For multi-threading
import json # For writing to a text file
from PIL import Image # For handling images

In [6]:
class UrlScraper:
    # Constructor
    def __init__(self, url_template, max_images=50, max_workers=4):
        self.url_template = url_template # Link crawl
        self.max_images = max_images # Max images
        self.max_workers = max_workers # Thread
        self.setup_environment() # Call for set up environment

    # Set up environment for selenium
    def setup_environment(self):
        os.environ['PATH'] += ':/usr/lib/chromium-browser/'
        os.environ['PATH'] += ':/usr/lib/chromium-browser/chromedriver/'

    def get_url_images(self, term):
        """
        Crawl the urls of images by term

        Parameters:
        term (str): The name of animal, plant, scenery, furniture

        Returns:
        urls (list): List of urls of images
        """

        # Initialize Chrome driver
        options = webdriver.ChromeOptions()
        options.add_argument('--headless')
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-dev-shm-usage')
        driver = webdriver.Chrome(options=options)

        url = self.url_template.format(search_term=term)
        driver.get(url)

        # Start crawl urls of image like brute force - the same mechanism with
        # this but add some feature
        urls = []
        more_content_available = True

        pbar = tqdm(total=self.max_images, desc=f"Fetching images for {term}")
        # Set up for visualize progress

        while len(urls) < self.max_images and more_content_available:
            soup = BeautifulSoup(driver.page_source, "html.parser")
            img_tags = soup.find_all("img")

            for img in img_tags:
                if len(urls) >= self.max_images:
                  break
                if 'src' in img.attrs:
                    href = img.attrs['src']
                    img_path = urljoin(url, href)
                    img_path = img_path.replace("_m.jpg", "_b.jpg").replace(
                        "_n.jpg", "_b.jpg").replace("_w.jpg", "_b.jpg")
                    if img_path == "https://combo.staticflickr.com/ap/build/images/getty/IStock_corporate_logo.svg":
                        continue
                    urls.append(img_path)
                    pbar.update(1)

            try:
                load_more_button = WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable((By.XPATH,
                    '//button[@id="yui_3_16_0_1_1721642285931_28620"]')))
                load_more_button.click()
                time.sleep(2)
            except:
                driver.execute_script(
                    "window.scrollTo(0, document.body.scrollHeight);")
                time.sleep(2)

                new_soup = BeautifulSoup(driver.page_source, "html.parser")
                new_img_tags = new_soup.find_all("img", loading_="lazy")
                if len(new_img_tags) == len(img_tags):
                    more_content_available = False
                    img_tags = new_img_tags

        pbar.close()
        driver.quit()
        return urls

    def scrape_urls(self, categories):
        """
        Call get_url_images method to get all urls of any object in categories\

        Parameter:
        categories (dictionary): the dict of all object we need to collect image
            with format categories{"name_object": [value1, value2, ...]}

        Returns:
        all_urls (dictionary): Dictionary of urls of images
        """
        all_urls = {category: {} for category in categories}

        # Handle multi-threading for efficent installation
        with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
            future_to_term = {executor.submit(self.get_url_images, term): (category, term)
                for category, terms in categories.items() for term in terms}

            for future in tqdm(concurrent.futures.as_completed(future_to_term),
                          total=len(future_to_term), desc="Overall Progress"):
                category, term = future_to_term[future]
                try:
                    urls = future.result()
                    all_urls[category][term] = urls
                    print(f"\nNumber of images retrieved for {term}: {len(urls)}")
                except Exception as exc:
                    print(f"\n{term} generated an exception: {exc}")
        return all_urls

    def save_to_file(self, data, filename):
        """
        Save the data to a JSON file.

        Parameters:
        data (dict): The data to be saved.
        filename (str): The name of the JSON file.

        Returns:
        None
        """
        with open(filename, 'w') as file:
            json.dump(data, file, indent=4)
        print(f"Data saved to {filename}")

In [7]:
categories = {
    "animal": ["Monkey", "Elephant", "cows", "Cat", "Dog", "bear", "fox",
               "Civet", "Pangolins", "Rabbit", "Bats", "Whale", "Cock", "Owl",
               "flamingo", "Lizard", "Turtle", "Snake", "Frog", "Fish",
               "shrimp", "Crab", "Snail", "Coral", "Jellyfish", "Butterfly",
               "Flies", "Mosquito", "Ants", "Cockroaches", "Spider",
               "scorpion", "tiger", "bird", "horse", "pig", "Alligator",
               "Alpaca", "Anteater", "donkey", "Bee", "Buffalo", "Camel",
               "Caterpillar", "Cheetah", "Chicken", "Dragonfly", "Duck",
               "panda", "Giraffe"],
    "plant": ["Bamboo", "Apple", "Apricot", "Banana", "Bean", "Wildflower",
              "Flower", "Mushroom", "Weed", "Fern", "Reed", "Shrub", "Moss",
              "Grass", "Palmtree", "Corn", "Tulip", "Rose", "Clove",
              "Dogwood", "Durian", "Ferns", "Fig", "Flax", "Frangipani",
              "Lantana", "Hibiscus", "Bougainvillea", "Pea", "OrchidTree",
              "RangoonCreeper", "Jackfruit", "Cottonplant", "Corneliantree",
              "Coffeeplant", "Coconut", "wheat", "watermelon", "radish",
              "carrot"],
    "furniture": ["bed", "cabinet", "chair", "chests", "clock", "desks",
                  "table", "Piano", "Bookcase", "Umbrella", "Clothes", "cart",
                  "sofa", "ball", "spoon", "Bowl", "fridge", "pan", "book"],
    "scenery": ["Cliff", "Bay", "Coast", "Mountains", "Forests", "Waterbodies",
                "Lake", "desert", "farmland", "river", "hedges", "plain",
                "sky", "cave", "cloud", "flowergarden", "glacier", "grassland",
                "horizon", "lighthouse", "plateau", "savannah", "valley",
                "volcano", "waterfall"]
}
urltopic = {"flickr": "https://www.flickr.com/search/?text={search_term}"}
scraper = UrlScraper(url_template=urltopic["flickr"],
                     max_images=20, max_workers=5)
image_urls = scraper.scrape_urls(categories)
scraper.save_to_file(image_urls, 'image_urls.json')

Overall Progress:   0%|          | 0/134 [00:00<?, ?it/s]
Fetching images for Dog:   0%|          | 0/20 [00:00<?, ?it/s][A

Fetching images for Cat:   0%|          | 0/20 [00:00<?, ?it/s][A[A


Fetching images for cows:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A



Fetching images for Elephant:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A[A

Fetching images for Cat:   5%|▌         | 1/20 [00:04<01:31,  4.82s/it][A[A

Fetching images for Cat:  10%|█         | 2/20 [00:04<00:37,  2.07s/it][A[A
Fetching images for Dog:   5%|▌         | 1/20 [00:05<01:43,  5.43s/it][A


Fetching images for cows:   5%|▌         | 1/20 [00:05<01:38,  5.20s/it][A[A[A



Fetching images for Cat: 100%|██████████| 20/20 [00:22<00:00,  1.14s/it]
Overall Progress:   1%|          | 1/134 [01:31<3:22:29, 91.35s/it]


Number of images retrieved for Cat: 20


Fetching images for Dog: 100%|██████████| 20/20 [00:24<00:00,  1.24s/it]
Fetching images for cows: 100%|██████████| 20/20 [00:24<00:00,  1.21s/it]
Fetching images for Elephant: 100%|██████████| 20/20 [00:23<00:00,  1.18s/it]



Number of images retrieved for Dog: 20


Overall Progress:   2%|▏         | 3/134 [01:33<46:13, 21.17s/it]  


Number of images retrieved for cows: 20

Number of images retrieved for Elephant: 20



Fetching images for bear:   0%|          | 0/20 [00:00<?, ?it/s][A

Fetching images for fox:   0%|          | 0/20 [00:00<?, ?it/s][A[A
Fetching images for bear:   5%|▌         | 1/20 [00:03<01:10,  3.70s/it][A


Fetching images for Pangolins:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A



Fetching images for Civet:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A[A



Fetching images for Civet:   5%|▌         | 1/20 [00:01<00:24,  1.27s/it][A[A[A[A


Fetching images for Pangolins:   5%|▌         | 1/20 [00:02<00:47,  2.48s/it][A[A[A

Fetching images for bear: 100%|██████████| 20/20 [00:18<00:00,  1.08it/s]
Overall Progress:   4%|▎         | 5/134 [02:12<43:22, 20.17s/it]


Number of images retrieved for bear: 20


Fetching images for Civet: 100%|██████████| 20/20 [00:19<00:00,  1.04it/s]
Overall Progress:   4%|▍         | 6/134 [02:17<33:51, 15.87s/it]


Number of images retrieved for Civet: 20


Fetching images for Pangolins: 100%|██████████| 20/20 [00:20<00:00,  1.03s/it]
Fetching images for fox: 100%|██████████| 20/20 [00:20<00:00,  1.05s/it]


Number of images retrieved for Pangolins: 20



Overall Progress:   6%|▌         | 8/134 [02:18<17:12,  8.20s/it]


Number of images retrieved for fox: 20



Fetching images for Rabbit:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for Rabbit:   5%|▌         | 1/20 [00:00<00:17,  1.07it/s][A

Fetching images for Bats:   0%|          | 0/20 [00:00<?, ?it/s][A[A

Fetching images for Bats:   5%|▌         | 1/20 [00:01<00:27,  1.45s/it][A[A


Fetching images for Whale:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A


Fetching images for Whale:   5%|▌         | 1/20 [00:02<00:47,  2.49s/it][A[A[A



Fetching images for Cock:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A[A



Fetching images for Rabbit: 100%|██████████| 20/20 [00:17<00:00,  1.17it/s]
Overall Progress:   7%|▋         | 9/134 [02:46<29:25, 14.13s/it]


Number of images retrieved for Rabbit: 20


Fetching images for Bats: 100%|██████████| 20/20 [00:15<00:00,  1.25it/s]
Overall Progress:   7%|▋         | 10/134 [02:49<22:08, 10.72s/it]


Number of images retrieved for Bats: 20


Fetching images for Whale: 100%|██████████| 20/20 [00:18<00:00,  1.09it/s]
Overall Progress:   8%|▊         | 11/134 [02:53<17:55,  8.75s/it]


Number of images retrieved for Whale: 20


Fetching images for Cock: 100%|██████████| 20/20 [00:20<00:00,  1.02s/it]
Overall Progress:   9%|▉         | 12/134 [02:59<16:09,  7.94s/it]


Number of images retrieved for Cock: 20



Fetching images for Owl:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for Owl:   5%|▌         | 1/20 [00:02<00:42,  2.25s/it][A

Fetching images for flamingo:   0%|          | 0/20 [00:00<?, ?it/s][A[A

Fetching images for flamingo:   5%|▌         | 1/20 [00:01<00:35,  1.86s/it][A[A


Fetching images for Monkey:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A



Fetching images for Lizard:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A[A


Fetching images for Monkey:   5%|▌         | 1/20 [00:03<01:03,  3.37s/it][A[A[A



Fetching images for Lizard:   5%|▌         | 1/20 [00:03<01:08,  3.62s/it][A[A[A[A




Fetching images for Turtle:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A[A[A




Fetching images for Owl: 100%|██████████| 20/20 [00:23<00:00,  1.16s/it]
Overall Progress:  10%|▉         | 13/134 [03:29<29:00, 14.39s/it]


Number of images retrieved for Owl: 20


Fetching images for flamingo: 100%|██████████| 20/20 [00:22<00:00,  1.12s/it]
Overall Progress:  10%|█         | 14/134 [03:30<21:10, 10.59s/it]


Number of images retrieved for flamingo: 20



Fetching images for Monkey: 100%|██████████| 20/20 [00:23<00:00,  1.15s/it]
Overall Progress:  11%|█         | 15/134 [03:34<16:52,  8.51s/it]


Number of images retrieved for Monkey: 20


Fetching images for Lizard: 100%|██████████| 20/20 [00:22<00:00,  1.11s/it]
Overall Progress:  12%|█▏        | 16/134 [03:35<12:11,  6.20s/it]


Number of images retrieved for Lizard: 20


Fetching images for Turtle: 100%|██████████| 20/20 [00:22<00:00,  1.12s/it]
Overall Progress:  13%|█▎        | 17/134 [03:41<11:49,  6.06s/it]


Number of images retrieved for Turtle: 20




Fetching images for Fish:   0%|          | 0/20 [00:00<?, ?it/s][A[A


Fetching images for Frog:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A
Fetching images for Snake:   5%|▌         | 1/20 [00:29<09:15, 29.25s/it][A



Fetching images for shrimp:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A[A


Fetching images for Frog:   5%|▌         | 1/20 [00:03<01:12,  3.82s/it][A[A[A

Fetching images for Fish:   5%|▌         | 1/20 [00:06<02:05,  6.59s/it][A[A



Fetching images for shrimp:   5%|▌         | 1/20 [00:03<01:10,  3.69s/it][A[A[A[A




Fetching images for Crab:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A[A[A




Fetching images for Snake: 100%|██████████| 20/20 [00:43<00:00,  2.16s/it]
Overall Progress:  13%|█▎        | 18/134 [04:15<28:18, 14.64s/it]


Number of images retrieved for Snake: 20


Fetching images for Frog: 100%|██████████| 20/20 [00:18<00:00,  1.07it/s]
Overall Progress:  14%|█▍        | 19/134 [04:17<20:47, 10.85s/it]


Number of images retrieved for Frog: 20


Fetching images for Fish: 100%|██████████| 20/20 [00:20<00:00,  1.04s/it]
Overall Progress:  15%|█▍        | 20/134 [04:18<15:02,  7.92s/it]


Number of images retrieved for Fish: 20


Fetching images for shrimp: 100%|██████████| 20/20 [00:18<00:00,  1.11it/s]
Overall Progress:  16%|█▌        | 21/134 [04:20<11:36,  6.16s/it]


Number of images retrieved for shrimp: 20


Fetching images for Crab: 100%|██████████| 20/20 [00:19<00:00,  1.05it/s]
Overall Progress:  16%|█▋        | 22/134 [04:30<13:30,  7.23s/it]


Number of images retrieved for Crab: 20



Fetching images for Snail:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for Snail:   5%|▌         | 1/20 [00:06<01:56,  6.14s/it][A

Fetching images for Coral:   0%|          | 0/20 [00:00<?, ?it/s][A[A


Fetching images for Jellyfish:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A

Fetching images for Coral:   5%|▌         | 1/20 [00:04<01:18,  4.15s/it][A[A


Fetching images for Jellyfish:   5%|▌         | 1/20 [00:04<01:29,  4.69s/it][A[A[A



Fetching images for Butterfly:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A[A



Fetching images for Butterfly:   5%|▌         | 1/20 [00:02<00:44,  2.36s/it][A[A[A[A




Fetching images for Snail: 100%|██████████| 20/20 [00:21<00:00,  1.07s/it]





Overall Progress:  17%|█▋        | 23/134 [04:55<22:56, 12.40s/it]


Number of images retrieved for Snail: 20


Fetching images for Coral: 100%|██████████| 20/20 [00:17<00:00,  1.16it/s]
Overall Progress:  18%|█▊        | 24/134 [04:58<17:53,  9.76s/it]


Number of images retrieved for Coral: 20


Fetching images for Jellyfish: 100%|██████████| 20/20 [00:19<00:00,  1.03it/s]
Overall Progress:  19%|█▊        | 25/134 [05:02<14:29,  7.98s/it]


Number of images retrieved for Jellyfish: 20



Fetching images for Mosquito:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for Butterfly: 100%|██████████| 20/20 [00:20<00:00,  1.03s/it]
Overall Progress:  19%|█▉        | 26/134 [05:10<14:10,  7.88s/it]


Number of images retrieved for Butterfly: 20


Fetching images for Flies: 100%|██████████| 20/20 [00:21<00:00,  1.09s/it]
Overall Progress:  20%|██        | 27/134 [05:16<13:04,  7.33s/it]


Number of images retrieved for Flies: 20




Fetching images for Ants:   0%|          | 0/20 [00:00<?, ?it/s][A[A

Fetching images for Mosquito: 100%|██████████| 20/20 [00:22<00:00,  1.13s/it]
Overall Progress:  21%|██        | 28/134 [05:29<15:56,  9.02s/it]


Number of images retrieved for Mosquito: 20



Fetching images for Cockroaches:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for Cockroaches:   5%|▌         | 1/20 [00:03<01:10,  3.71s/it][A


Fetching images for Spider:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A


Fetching images for Spider:   5%|▌         | 1/20 [00:01<00:24,  1.32s/it][A[A[A



Fetching images for scorpion:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A[A



Fetching images for Ants: 100%|██████████| 20/20 [00:21<00:00,  1.08s/it]
Overall Progress:  22%|██▏       | 29/134 [05:47<20:32, 11.73s/it]


Number of images retrieved for Ants: 20




Fetching images for Cockroaches: 100%|██████████| 20/20 [00:24<00:00,  1.21s/it]
Overall Progress:  22%|██▏       | 30/134 [05:56<18:58, 10.95s/it]


Number of images retrieved for Cockroaches: 20


Fetching images for Spider: 100%|██████████| 20/20 [00:21<00:00,  1.06s/it]
Overall Progress:  23%|██▎       | 31/134 [05:57<14:00,  8.16s/it]


Number of images retrieved for Spider: 20




Fetching images for tiger:   5%|▌         | 1/20 [00:06<02:01,  6.41s/it][A[A

Fetching images for scorpion: 100%|██████████| 20/20 [00:23<00:00,  1.17s/it]
Overall Progress:  24%|██▍       | 32/134 [06:02<12:16,  7.22s/it]


Number of images retrieved for scorpion: 20



Fetching images for bird:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for bird:   5%|▌         | 1/20 [00:03<00:57,  3.05s/it][A


Fetching images for tiger: 100%|██████████| 20/20 [00:28<00:00,  1.41s/it]
Overall Progress:  25%|██▍       | 33/134 [06:21<18:05, 10.74s/it]


Number of images retrieved for tiger: 20




Fetching images for pig:   0%|          | 0/20 [00:00<?, ?it/s][A[A


Fetching images for horse:   5%|▌         | 1/20 [00:05<01:49,  5.74s/it][A[A[A


Fetching images for horse:  10%|█         | 2/20 [00:05<00:43,  2.43s/it][A[A[A

Fetching images for pig:   5%|▌         | 1/20 [00:03<01:04,  3.40s/it][A[A



Fetching images for bird: 100%|██████████| 20/20 [00:23<00:00,  1.20s/it]
Overall Progress:  25%|██▌       | 34/134 [06:34<18:59, 11.40s/it]


Number of images retrieved for bird: 20






Fetching images for horse: 100%|██████████| 20/20 [00:24<00:00,  1.22s/it]
Fetching images for pig: 100%|██████████| 20/20 [00:22<00:00,  1.11s/it]


Number of images retrieved for horse: 20



Overall Progress:  27%|██▋       | 36/134 [06:44<12:37,  7.73s/it]


Number of images retrieved for pig: 20



Fetching images for Alpaca:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for Alligator: 100%|██████████| 20/20 [00:21<00:00,  1.09s/it]
Overall Progress:  28%|██▊       | 37/134 [06:53<12:48,  7.93s/it]


Number of images retrieved for Alligator: 20




Fetching images for Anteater:   0%|          | 0/20 [00:00<?, ?it/s][A[A

Fetching images for Anteater:   5%|▌         | 1/20 [00:01<00:36,  1.91s/it][A[A


Fetching images for Alpaca: 100%|██████████| 20/20 [00:24<00:00,  1.22s/it]
Overall Progress:  28%|██▊       | 38/134 [07:11<17:32, 10.97s/it]


Number of images retrieved for Alpaca: 20



Fetching images for Bee:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for Bee:   5%|▌         | 1/20 [00:02<00:52,  2.78s/it][A


Fetching images for Anteater: 100%|██████████| 20/20 [00:21<00:00,  1.06s/it]
Overall Progress:  29%|██▉       | 39/134 [07:19<16:03, 10.14s/it]


Number of images retrieved for Anteater: 20




Fetching images for Buffalo:   0%|          | 0/20 [00:00<?, ?it/s][A[A

Fetching images for Buffalo:   5%|▌         | 1/20 [00:01<00:18,  1.00it/s][A[A



Fetching images for Camel:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A[A



Fetching images for Bee: 100%|██████████| 20/20 [00:20<00:00,  1.05s/it]
Overall Progress:  30%|██▉       | 40/134 [07:35<18:35, 11.87s/it]


Number of images retrieved for Bee: 20



Fetching images for donkey: 100%|██████████| 20/20 [00:26<00:00,  1.31s/it]
Overall Progress:  31%|███       | 41/134 [07:36<13:27,  8.68s/it]


Number of images retrieved for donkey: 20



Fetching images for Buffalo: 100%|██████████| 20/20 [00:17<00:00,  1.11it/s]
Overall Progress:  31%|███▏      | 42/134 [07:38<10:11,  6.65s/it]


Number of images retrieved for Buffalo: 20


Fetching images for Camel: 100%|██████████| 20/20 [00:22<00:00,  1.14s/it]
Overall Progress:  32%|███▏      | 43/134 [07:49<11:59,  7.91s/it]


Number of images retrieved for Camel: 20


Fetching images for Caterpillar: 100%|██████████| 20/20 [00:19<00:00,  1.03it/s]
Overall Progress:  33%|███▎      | 44/134 [07:55<11:03,  7.38s/it]


Number of images retrieved for Caterpillar: 20



Fetching images for Cheetah:   0%|          | 0/20 [00:00<?, ?it/s][A

Fetching images for Chicken:   0%|          | 0/20 [00:00<?, ?it/s][A[A
Fetching images for Cheetah:   5%|▌         | 1/20 [00:01<00:36,  1.94s/it][A


Fetching images for Dragonfly:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A


Fetching images for Dragonfly:   5%|▌         | 1/20 [00:02<00:47,  2.51s/it][A[A[A

Fetching images for Chicken:   5%|▌         | 1/20 [00:04<01:21,  4.28s/it][A[A



Fetching images for Duck:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A[A



Fetching images for Cheetah: 100%|██████████| 20/20 [00:24<00:00,  1.21s/it]
Overall Progress:  34%|███▎      | 45/134 [08:23<19:54, 13.42s/it]


Number of images retrieved for Cheetah: 20


Fetching images for Dragonfly: 100%|██████████| 20/20 [00:23<00:00,  1.17s/it]

Fetching images for Chicken: 100%|██████████| 20/20 [00:25<00:00,  1.26s/it]
Overall Progress:  35%|███▌      | 47/134 [08:25<10:11,  7.03s/it]


Number of images retrieved for Dragonfly: 20

Number of images retrieved for Chicken: 20



Fetching images for Duck: 100%|██████████| 20/20 [00:20<00:00,  1.03s/it]
Overall Progress:  36%|███▌      | 48/134 [08:35<11:27,  8.00s/it]


Number of images retrieved for Duck: 20




Fetching images for Giraffe:   0%|          | 0/20 [00:00<?, ?it/s][A[A


Fetching images for Bamboo:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A



Fetching images for Apple:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A[A

Fetching images for Giraffe:   5%|▌         | 1/20 [00:05<01:37,  5.14s/it][A[A


Fetching images for panda: 100%|██████████| 20/20 [00:26<00:00,  1.31s/it]
Overall Progress:  37%|███▋      | 49/134 [08:51<14:43, 10.39s/it]


Number of images retrieved for panda: 20






Fetching images for Giraffe: 100%|██████████| 20/20 [00:23<00:00,  1.19s/it]
Overall Progress:  37%|███▋      | 50/134 [09:04<15:37, 11.17s/it]


Number of images retrieved for Giraffe: 20


Fetching images for Bamboo: 100%|██████████| 20/20 [00:21<00:00,  1.10s/it]
Overall Progress:  38%|███▊      | 51/134 [09:05<11:14,  8.12s/it]


Number of images retrieved for Bamboo: 20


Fetching images for Apple: 100%|██████████| 20/20 [00:24<00:00,  1.25s/it]
Overall Progress:  39%|███▉      | 52/134 [09:10<09:49,  7.19s/it]


Number of images retrieved for Apple: 20



Fetching images for Banana:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for Banana:   5%|▌         | 1/20 [00:01<00:25,  1.33s/it][A

Fetching images for Bean:   0%|          | 0/20 [00:00<?, ?it/s][A[A

Fetching images for Bean:   5%|▌         | 1/20 [00:03<01:01,  3.22s/it][A[A


Fetching images for Wildflower:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A


Fetching images for Banana: 100%|██████████| 20/20 [00:23<00:00,  1.15s/it]

Overall Progress:  40%|███▉      | 53/134 [09:35<17:00, 12.60s/it]


Number of images retrieved for Banana: 20



Fetching images for Bean: 100%|██████████| 20/20 [00:18<00:00,  1.10it/s]
Overall Progress:  40%|████      | 54/134 [09:43<14:55, 11.19s/it]


Number of images retrieved for Bean: 20




Fetching images for Mushroom:   0%|          | 0/20 [00:00<?, ?it/s][A[A

Fetching images for Wildflower: 100%|██████████| 20/20 [00:18<00:00,  1.06it/s]
Overall Progress:  41%|████      | 55/134 [09:49<12:31,  9.51s/it]


Number of images retrieved for Wildflower: 20


Fetching images for Flower: 100%|██████████| 20/20 [00:15<00:00,  1.28it/s]
Overall Progress:  42%|████▏     | 56/134 [09:51<09:31,  7.32s/it]


Number of images retrieved for Flower: 20



Fetching images for Weed:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for Mushroom: 100%|██████████| 20/20 [00:19<00:00,  1.05it/s]
Overall Progress:  43%|████▎     | 57/134 [10:06<12:25,  9.68s/it]


Number of images retrieved for Mushroom: 20




Fetching images for Fern:   0%|          | 0/20 [00:00<?, ?it/s][A[A

Fetching images for Fern:   5%|▌         | 1/20 [00:04<01:19,  4.16s/it][A[A


Fetching images for Weed: 100%|██████████| 20/20 [00:24<00:00,  1.23s/it]
Overall Progress:  43%|████▎     | 58/134 [10:20<13:51, 10.94s/it]


Number of images retrieved for Weed: 20





Fetching images for Reed:   5%|▌         | 1/20 [00:03<01:04,  3.38s/it][A[A[A
Fetching images for Fern: 100%|██████████| 20/20 [00:28<00:00,  1.42s/it]
Overall Progress:  44%|████▍     | 59/134 [10:39<16:42, 13.37s/it]


Number of images retrieved for Fern: 20



Fetching images for Reed: 100%|██████████| 20/20 [00:23<00:00,  1.17s/it]
Overall Progress:  45%|████▍     | 60/134 [10:43<12:50, 10.41s/it]


Number of images retrieved for Reed: 20




Fetching images for Moss:   0%|          | 0/20 [00:00<?, ?it/s][A[A

Fetching images for Shrub: 100%|██████████| 20/20 [00:20<00:00,  1.04s/it]
Overall Progress:  46%|████▌     | 61/134 [10:59<14:41, 12.08s/it]


Number of images retrieved for Shrub: 20



Fetching images for Grass:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for Grass:   5%|▌         | 1/20 [00:01<00:25,  1.34s/it][A


Fetching images for Palmtree:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A


Fetching images for Palmtree:   5%|▌         | 1/20 [00:01<00:20,  1.10s/it][A[A[A



Fetching images for Corn:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A[A



Fetching images for Corn:   5%|▌         | 1/20 [00:01<00:29,  1.53s/it][A[A[A[A




Fetching images for Moss: 100%|██████████| 20/20 [00:20<00:00,  1.03s/it]
Overall Progress:  46%|████▋     | 62/134 [11:12<15:05, 12.58s/it]


Number of images retrieved for Moss: 20







Fetching images for Grass: 100%|██████████| 20/20 [00:19<00:00,  1.03it/s]
Overall Progress:  47%|████▋     | 63/134 [11:20<13:17, 11.24s/it]


Number of images retrieved for Grass: 20


Fetching images for Palmtree: 100%|██████████| 20/20 [00:17<00:00,  1.12it/s]
Overall Progress:  48%|████▊     | 64/134 [11:21<09:30,  8.14s/it]


Number of images retrieved for Palmtree: 20



Fetching images for Corn: 100%|██████████| 20/20 [00:16<00:00,  1.22it/s]
Overall Progress:  49%|████▊     | 65/134 [11:26<08:06,  7.05s/it]


Number of images retrieved for Corn: 20



Fetching images for Apricot: 100%|██████████| 20/20 [00:23<00:00,  1.18s/it]
Overall Progress:  49%|████▉     | 66/134 [11:34<08:32,  7.54s/it]


Number of images retrieved for Apricot: 20




Fetching images for Rose:   0%|          | 0/20 [00:00<?, ?it/s][A[A


Fetching images for Clove:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A

Fetching images for Tulip: 100%|██████████| 20/20 [00:19<00:00,  1.01it/s]
Overall Progress:  50%|█████     | 67/134 [11:45<09:32,  8.55s/it]


Number of images retrieved for Tulip: 20





Fetching images for Clove:   5%|▌         | 1/20 [00:04<01:28,  4.67s/it][A[A[A
Fetching images for Dogwood:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for Dogwood:   5%|▌         | 1/20 [00:02<00:50,  2.65s/it][A



Fetching images for Rose: 100%|██████████| 20/20 [00:18<00:00,  1.07it/s]
Overall Progress:  51%|█████     | 68/134 [11:59<11:13, 10.20s/it]


Number of images retrieved for Rose: 20






Fetching images for Clove: 100%|██████████| 20/20 [00:23<00:00,  1.18s/it]
Overall Progress:  51%|█████▏    | 69/134 [12:07<10:03,  9.29s/it]


Number of images retrieved for Clove: 20




Fetching images for Dogwood: 100%|██████████| 20/20 [00:22<00:00,  1.14s/it]


Overall Progress:  52%|█████▏    | 70/134 [12:10<07:54,  7.42s/it]


Number of images retrieved for Dogwood: 20



Fetching images for Fig:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for Durian: 100%|██████████| 20/20 [00:22<00:00,  1.11s/it]
Overall Progress:  53%|█████▎    | 71/134 [12:21<09:07,  8.69s/it]


Number of images retrieved for Durian: 20





Fetching images for Ferns: 100%|██████████| 20/20 [00:23<00:00,  1.18s/it]
Overall Progress:  54%|█████▎    | 72/134 [12:32<09:43,  9.41s/it]


Number of images retrieved for Ferns: 20




Fetching images for Frangipani:   0%|          | 0/20 [00:00<?, ?it/s][A[A


Fetching images for Flax:   5%|▌         | 1/20 [00:06<01:53,  5.99s/it][A[A[A

Fetching images for Fig: 100%|██████████| 20/20 [00:25<00:00,  1.29s/it]
Overall Progress:  54%|█████▍    | 73/134 [12:43<09:52,  9.72s/it]


Number of images retrieved for Fig: 20



Fetching images for Lantana:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for Flax: 100%|██████████| 20/20 [00:21<00:00,  1.07s/it]
Overall Progress:  55%|█████▌    | 74/134 [12:51<09:18,  9.30s/it]


Number of images retrieved for Flax: 20





Fetching images for Hibiscus:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A


Fetching images for Frangipani: 100%|██████████| 20/20 [00:26<00:00,  1.33s/it]


Overall Progress:  56%|█████▌    | 75/134 [13:02<09:36,  9.77s/it]


Number of images retrieved for Frangipani: 20




Fetching images for Lantana: 100%|██████████| 20/20 [00:20<00:00,  1.03s/it]
Overall Progress:  57%|█████▋    | 76/134 [13:08<08:27,  8.75s/it]


Number of images retrieved for Lantana: 20


Fetching images for Hibiscus: 100%|██████████| 20/20 [00:24<00:00,  1.22s/it]
Overall Progress:  57%|█████▋    | 77/134 [13:18<08:24,  8.86s/it]


Number of images retrieved for Hibiscus: 20



Fetching images for Pea:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for Bougainvillea: 100%|██████████| 20/20 [00:23<00:00,  1.19s/it]
Overall Progress:  58%|█████▊    | 78/134 [13:26<08:09,  8.74s/it]


Number of images retrieved for Bougainvillea: 20




Fetching images for OrchidTree:   0%|          | 0/20 [00:00<?, ?it/s][A[A

Fetching images for OrchidTree:   5%|▌         | 1/20 [00:04<01:22,  4.33s/it][A[A


Fetching images for Pea: 100%|██████████| 20/20 [00:20<00:00,  1.01s/it]
Overall Progress:  59%|█████▉    | 79/134 [13:43<10:09, 11.09s/it]


Number of images retrieved for Pea: 20





Fetching images for RangoonCreeper:   5%|▌         | 1/20 [00:06<02:00,  6.37s/it][A[A[A
Fetching images for Jackfruit:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for Jackfruit:   5%|▌         | 1/20 [00:04<01:27,  4.59s/it][A



Fetching images for Cottonplant:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A[A



Fetching images for OrchidTree: 100%|██████████| 20/20 [00:23<00:00,  1.19s/it]
Overall Progress:  60%|█████▉    | 80/134 [13:57<10:49, 12.02s/it]


Number of images retrieved for OrchidTree: 20


Fetching images for RangoonCreeper: 100%|██████████| 20/20 [00:22<00:00,  1.14s/it]
Overall Progress:  60%|██████    | 81/134 [14:01<08:32,  9.66s/it]


Number of images retrieved for RangoonCreeper: 20




Fetching images for Corneliantree:   0%|          | 0/20 [00:00<?, ?it/s][A[A

Fetching images for Jackfruit: 100%|██████████| 20/20 [00:24<00:00,  1.23s/it]
Overall Progress:  61%|██████    | 82/134 [14:09<08:03,  9.29s/it]


Number of images retrieved for Jackfruit: 20


Fetching images for Cottonplant: 100%|██████████| 20/20 [00:24<00:00,  1.22s/it]
Overall Progress:  62%|██████▏   | 83/134 [14:16<07:08,  8.40s/it]


Number of images retrieved for Cottonplant: 20




Fetching images for Corneliantree:  95%|█████████▌| 19/20 [00:17<00:00,  1.12it/s][A[A
Fetching images for Coffeeplant:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for Coffeeplant:   5%|▌         | 1/20 [00:01<00:24,  1.29s/it][A


Fetching images for Coconut:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A


Fetching images for Corneliantree: 100%|██████████| 20/20 [00:31<00:00,  1.58s/it]
Overall Progress:  63%|██████▎   | 84/134 [14:34<09:31, 11.44s/it]


Number of images retrieved for Corneliantree: 20




Fetching images for wheat:   0%|          | 0/20 [00:00<?, ?it/s][A[A

Fetching images for Coffeeplant: 100%|██████████| 20/20 [00:20<00:00,  1.02s/it]
Overall Progress:  63%|██████▎   | 85/134 [14:44<09:03, 11.10s/it]


Number of images retrieved for Coffeeplant: 20


Fetching images for Coconut: 100%|██████████| 20/20 [00:24<00:00,  1.23s/it]
Overall Progress:  64%|██████▍   | 86/134 [14:50<07:39,  9.57s/it]


Number of images retrieved for Coconut: 20


Fetching images for wheat: 100%|██████████| 20/20 [00:20<00:00,  1.04s/it]
Overall Progress:  65%|██████▍   | 87/134 [14:58<06:56,  8.85s/it]


Number of images retrieved for wheat: 20



Fetching images for radish:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for radish:   5%|▌         | 1/20 [00:01<00:24,  1.28s/it][A

Fetching images for carrot:   0%|          | 0/20 [00:00<?, ?it/s][A[A

Fetching images for carrot:   5%|▌         | 1/20 [00:03<01:12,  3.80s/it][A[A


Fetching images for bed:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A


Fetching images for radish: 100%|██████████| 20/20 [00:19<00:00,  1.01it/s]
Overall Progress:  66%|██████▌   | 88/134 [15:18<09:24, 12.26s/it]


Number of images retrieved for radish: 20


Fetching images for carrot: 100%|██████████| 20/20 [00:21<00:00,  1.08s/it]
Overall Progress:  66%|██████▋   | 89/134 [15:24<07:52, 10.51s/it]


Number of images retrieved for carrot: 20



Fetching images for cabinet:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for bed: 100%|██████████| 20/20 [00:17<00:00,  1.13it/s]
Overall Progress:  67%|██████▋   | 90/134 [15:32<07:01,  9.58s/it]


Number of images retrieved for bed: 20




Fetching images for chair:   0%|          | 0/20 [00:00<?, ?it/s][A[A

Fetching images for chair:   5%|▌         | 1/20 [00:00<00:18,  1.02it/s][A[A


Fetching images for chests:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A


Fetching images for cabinet: 100%|██████████| 20/20 [00:22<00:00,  1.14s/it]
Overall Progress:  68%|██████▊   | 91/134 [15:48<08:12, 11.45s/it]


Number of images retrieved for cabinet: 20



Fetching images for chair: 100%|██████████| 20/20 [00:17<00:00,  1.13it/s]

Overall Progress:  69%|██████▊   | 92/134 [15:54<06:57,  9.94s/it]


Number of images retrieved for chair: 20


Fetching images for chests: 100%|██████████| 20/20 [00:20<00:00,  1.01s/it]
Overall Progress:  69%|██████▉   | 93/134 [16:01<06:11,  9.06s/it]


Number of images retrieved for chests: 20




Fetching images for desks:   0%|          | 0/20 [00:00<?, ?it/s][A[A

Fetching images for clock: 100%|██████████| 20/20 [00:18<00:00,  1.08it/s]
Overall Progress:  70%|███████   | 94/134 [16:11<06:08,  9.21s/it]


Number of images retrieved for clock: 20



Fetching images for table:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for table:   5%|▌         | 1/20 [00:01<00:26,  1.42s/it][A


Fetching images for Piano:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A


Fetching images for desks: 100%|██████████| 20/20 [00:19<00:00,  1.03it/s]
Overall Progress:  71%|███████   | 95/134 [16:25<07:02, 10.82s/it]


Number of images retrieved for desks: 20


Fetching images for table: 100%|██████████| 20/20 [00:17<00:00,  1.12it/s]
Overall Progress:  72%|███████▏  | 96/134 [16:29<05:33,  8.78s/it]


Number of images retrieved for table: 20



Fetching images for Bookcase:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for Piano: 100%|██████████| 20/20 [00:19<00:00,  1.01it/s]
Overall Progress:  72%|███████▏  | 97/134 [16:37<05:14,  8.50s/it]


Number of images retrieved for Piano: 20




Fetching images for Clothes:   0%|          | 0/20 [00:00<?, ?it/s][A[A


Fetching images for watermelon:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A

Fetching images for Clothes:   5%|▌         | 1/20 [00:01<00:24,  1.27s/it][A[A


Fetching images for Bookcase: 100%|██████████| 20/20 [00:17<00:00,  1.14it/s]
Overall Progress:  73%|███████▎  | 98/134 [16:48<05:33,  9.27s/it]


Number of images retrieved for Bookcase: 20



Fetching images for Umbrella:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for Umbrella:   5%|▌         | 1/20 [00:02<00:55,  2.93s/it][A



Fetching images for cart:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A[A



Fetching images for Clothes: 100%|██████████| 20/20 [00:17<00:00,  1.12it/s]
Overall Progress:  74%|███████▍  | 99/134 [17:04<06:30, 11.15s/it]


Number of images retrieved for Clothes: 20


Fetching images for watermelon: 100%|██████████| 20/20 [00:18<00:00,  1.07it/s]
Overall Progress:  75%|███████▍  | 100/134 [17:05<04:37,  8.17s/it]


Number of images retrieved for watermelon: 20




Fetching images for Umbrella: 100%|██████████| 20/20 [00:20<00:00,  1.04s/it]
Overall Progress:  75%|███████▌  | 101/134 [17:09<03:52,  7.05s/it]


Number of images retrieved for Umbrella: 20




Fetching images for cart: 100%|██████████| 20/20 [00:23<00:00,  1.17s/it]
Overall Progress:  76%|███████▌  | 102/134 [17:18<04:02,  7.58s/it]


Number of images retrieved for cart: 20



Fetching images for ball:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for ball:   5%|▌         | 1/20 [00:01<00:28,  1.50s/it][A


Fetching images for sofa: 100%|██████████| 20/20 [00:17<00:00,  1.13it/s]
Overall Progress:  77%|███████▋  | 103/134 [17:27<04:03,  7.85s/it]


Number of images retrieved for sofa: 20





Fetching images for spoon:   5%|▌         | 1/20 [00:04<01:30,  4.76s/it][A[A[A


Fetching images for spoon:  10%|█         | 2/20 [00:04<00:36,  2.03s/it][A[A[A

Fetching images for Bowl:   0%|          | 0/20 [00:00<?, ?it/s][A[A

Fetching images for Bowl:   5%|▌         | 1/20 [00:02<00:46,  2.46s/it][A[A



Fetching images for pan:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A[A




Fetching images for ball: 100%|██████████| 20/20 [00:19<00:00,  1.02it/s]





Overall Progress:  78%|███████▊  | 104/134 [17:42<05:07, 10.26s/it]


Number of images retrieved for ball: 20






Fetching images for spoon: 100%|██████████| 20/20 [00:21<00:00,  1.06s/it]
Overall Progress:  78%|███████▊  | 105/134 [17:46<04:03,  8.40s/it]


Number of images retrieved for spoon: 20


Fetching images for Bowl: 100%|██████████| 20/20 [00:20<00:00,  1.04s/it]
Overall Progress:  79%|███████▉  | 106/134 [17:53<03:40,  7.87s/it]


Number of images retrieved for Bowl: 20


Fetching images for fridge: 100%|██████████| 20/20 [00:18<00:00,  1.07it/s]
Fetching images for pan: 100%|██████████| 20/20 [00:19<00:00,  1.04it/s]
Overall Progress:  80%|███████▉  | 107/134 [18:00<03:23,  7.55s/it]


Number of images retrieved for pan: 20

Number of images retrieved for fridge: 20



Fetching images for book:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for book:   5%|▌         | 1/20 [00:05<01:40,  5.30s/it][A

Fetching images for Cliff:   0%|          | 0/20 [00:00<?, ?it/s][A[A

Fetching images for Cliff:   5%|▌         | 1/20 [00:02<00:47,  2.48s/it][A[A


Fetching images for Bay:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A


Fetching images for book: 100%|██████████| 20/20 [00:24<00:00,  1.23s/it]

Overall Progress:  81%|████████▏ | 109/134 [18:29<04:32, 10.89s/it]


Number of images retrieved for book: 20


Fetching images for Cliff: 100%|██████████| 20/20 [00:21<00:00,  1.06s/it]


Overall Progress:  82%|████████▏ | 110/134 [18:31<03:26,  8.62s/it]
Fetching images for Mountains:   5%|▌         | 1/20 [00:02<00:40,  2.11s/it][A


Number of images retrieved for Cliff: 20




Fetching images for Bay: 100%|██████████| 20/20 [00:18<00:00,  1.11it/s]
Overall Progress:  83%|████████▎ | 111/134 [18:38<03:04,  8.02s/it]


Number of images retrieved for Bay: 20





Fetching images for Forests:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A



Fetching images for Waterbodies:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A[A


Fetching images for Forests:   5%|▌         | 1/20 [00:03<00:57,  3.03s/it][A[A[A



Fetching images for Mountains: 100%|██████████| 20/20 [00:21<00:00,  1.08s/it]
Overall Progress:  84%|████████▎ | 112/134 [18:51<03:29,  9.51s/it]


Number of images retrieved for Mountains: 20


Fetching images for Coast: 100%|██████████| 20/20 [00:21<00:00,  1.07s/it]
Overall Progress:  84%|████████▍ | 113/134 [18:53<02:33,  7.32s/it]


Number of images retrieved for Coast: 20



Fetching images for Forests: 100%|██████████| 20/20 [00:23<00:00,  1.18s/it]
Overall Progress:  85%|████████▌ | 114/134 [19:05<02:53,  8.66s/it]


Number of images retrieved for Forests: 20



Fetching images for Waterbodies: 100%|██████████| 20/20 [00:24<00:00,  1.25s/it]
Overall Progress:  86%|████████▌ | 115/134 [19:09<02:19,  7.32s/it]


Number of images retrieved for Waterbodies: 20




Fetching images for desert:   0%|          | 0/20 [00:00<?, ?it/s][A[A

Fetching images for desert:   5%|▌         | 1/20 [00:01<00:27,  1.45s/it][A[A


Fetching images for farmland:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A


Fetching images for farmland:   5%|▌         | 1/20 [00:05<01:53,  5.95s/it][A[A[A


Fetching images for Lake: 100%|██████████| 20/20 [00:26<00:00,  1.33s/it]
Overall Progress:  87%|████████▋ | 116/134 [19:28<03:12, 10.67s/it]


Number of images retrieved for Lake: 20



Fetching images for desert: 100%|██████████| 20/20 [00:26<00:00,  1.34s/it]


Overall Progress:  87%|████████▋ | 117/134 [19:42<03:19, 11.76s/it]


Number of images retrieved for desert: 20



Fetching images for farmland: 100%|██████████| 20/20 [00:27<00:00,  1.37s/it]
Overall Progress:  88%|████████▊ | 118/134 [19:44<02:23,  8.98s/it]


Number of images retrieved for farmland: 20




Fetching images for hedges:   5%|▌         | 1/20 [00:02<00:51,  2.72s/it][A[A


Fetching images for plain:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A


Fetching images for plain:   5%|▌         | 1/20 [00:00<00:12,  1.50it/s][A[A[A



Fetching images for sky:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A[A



Fetching images for sky:   5%|▌         | 1/20 [00:02<00:49,  2.61s/it][A[A[A[A




Fetching images for river: 100%|██████████| 20/20 [00:30<00:00,  1.51s/it]
Fetching images for hedges: 100%|██████████| 20/20 [00:23<00:00,  1.17s/it]


Number of images retrieved for river: 20



Overall Progress:  90%|████████▉ | 120/134 [20:06<02:06,  9.01s/it]


Number of images retrieved for hedges: 20







Fetching images for plain: 100%|██████████| 20/20 [00:21<00:00,  1.05s/it]
Overall Progress:  90%|█████████ | 121/134 [20:11<01:40,  7.74s/it]


Number of images retrieved for plain: 20


Fetching images for sky: 100%|██████████| 20/20 [00:20<00:00,  1.02s/it]
Overall Progress:  91%|█████████ | 122/134 [20:17<01:26,  7.22s/it]


Number of images retrieved for sky: 20


Fetching images for cave: 100%|██████████| 20/20 [00:21<00:00,  1.09s/it]
Overall Progress:  92%|█████████▏| 123/134 [20:23<01:17,  7.04s/it]


Number of images retrieved for cave: 20



Fetching images for flowergarden:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching images for flowergarden:   5%|▌         | 1/20 [00:01<00:23,  1.21s/it][A

Fetching images for cloud:   0%|          | 0/20 [00:00<?, ?it/s][A[A

Fetching images for cloud:   5%|▌         | 1/20 [00:01<00:34,  1.81s/it][A[A


Fetching images for glacier:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A


Fetching images for glacier:   5%|▌         | 1/20 [00:02<00:45,  2.41s/it][A[A[A



Fetching images for grassland:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A[A



Fetching images for grassland:   5%|▌         | 1/20 [00:05<01:38,  5.17s/it][A[A[A[A



Fetching images for flowergarden: 100%|██████████| 20/20 [00:29<00:00,  1.45s/it]
Fetching images for cloud: 100%|██████████| 20/20 [00:27<00:00,  1.39s/it]
Overall Progress:  93%|█████████▎| 124/134 [20:54<02:19, 13.97s/it]


Number of images retrieved for cloud: 20



Fetching images for horizon:   0%|          | 0/20 [00:00<?, ?it/s][A


Number of images retrieved for flowergarden: 20


Fetching images for glacier: 100%|██████████| 20/20 [00:24<00:00,  1.23s/it]
Overall Progress:  94%|█████████▍| 126/134 [20:54<01:01,  7.69s/it]


Number of images retrieved for glacier: 20



Fetching images for grassland: 100%|██████████| 20/20 [00:22<00:00,  1.12s/it]
Overall Progress:  95%|█████████▍| 127/134 [21:10<01:07,  9.63s/it]


Number of images retrieved for grassland: 20




Fetching images for lighthouse:   0%|          | 0/20 [00:00<?, ?it/s][A[A


Fetching images for savannah:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A



Fetching images for plateau:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A[A

Fetching images for lighthouse:   5%|▌         | 1/20 [00:01<00:36,  1.90s/it][A[A


Fetching images for savannah:   5%|▌         | 1/20 [00:01<00:35,  1.89s/it][A[A[A


Fetching images for savannah:  10%|█         | 2/20 [00:01<00:15,  1.20it/s][A[A[A



Fetching images for horizon: 100%|██████████| 20/20 [00:23<00:00,  1.16s/it]
Overall Progress:  96%|█████████▌| 128/134 [21:17<00:54,  9.12s/it]


Number of images retrieved for horizon: 20


Fetching images for savannah: 100%|██████████| 20/20 [00:17<00:00,  1.15it/s]
Overall Progress:  96%|█████████▋| 129/134 [21:28<00:47,  9.52s/it]


Number of images retrieved for savannah: 20


Fetching images for lighthouse: 100%|██████████| 20/20 [00:23<00:00,  1.19s/it]
Overall Progress:  97%|█████████▋| 130/134 [21:34<00:34,  8.64s/it]


Number of images retrieved for lighthouse: 20


Fetching images for plateau: 100%|██████████| 20/20 [00:24<00:00,  1.22s/it]

Overall Progress:  98%|█████████▊| 131/134 [21:36<00:19,  6.58s/it]


Number of images retrieved for plateau: 20



Fetching images for valley:   5%|▌         | 1/20 [00:03<00:59,  3.15s/it][A

Fetching images for volcano:   0%|          | 0/20 [00:00<?, ?it/s][A[A

Fetching images for volcano:   5%|▌         | 1/20 [00:00<00:17,  1.08it/s][A[A


Fetching images for waterfall:   0%|          | 0/20 [00:00<?, ?it/s][A[A[A


Fetching images for valley: 100%|██████████| 20/20 [00:17<00:00,  1.15it/s]
Overall Progress:  99%|█████████▊| 132/134 [21:53<00:19,  9.68s/it]


Number of images retrieved for valley: 20


Fetching images for volcano: 100%|██████████| 20/20 [00:14<00:00,  1.38it/s]
Overall Progress:  99%|█████████▉| 133/134 [21:54<00:07,  7.03s/it]


Number of images retrieved for volcano: 20


Fetching images for waterfall: 100%|██████████| 20/20 [00:12<00:00,  1.54it/s]
Overall Progress: 100%|██████████| 134/134 [21:56<00:00,  9.82s/it]


Number of images retrieved for waterfall: 20
Data saved to image_urls.json





In [9]:
class ImageDownloader:
    def __init__(self, json_file, download_dir='Dataset',
                 max_workers=4, delay=1):
        self.json_file = json_file
            # file containing URLs of images in JSON format
        self.download_dir = download_dir # Folder name for storing images
        self.max_workers = max_workers # Number of threads
        self.delay = delay
            # Polite delay: when we send request too much to the server for
            # downloading images without polite delay,
            # it will crash or prevent your IP from being blocked

        self.filename = set() # To store filename directories
        self.setup_directory() # Set up the folder structure

    def setup_directory(self):
        if not os.path.exists(self.download_dir):
          os.makedirs(self.download_dir)

    def read_json(self):
        """
        Read the JSON file and return the data.

        Returns:
        data (dict): The data read from the JSON file.
        """
        with open(self.json_file, 'r') as file:
            data = json.load(file)
        return data

    def is_valid_url(self, url):
        """
        Check if the URL is valid.

        Parameters:
        url (str): The URL to be checked.

        Returns:
        bool: True if the URL is valid, False otherwise.
        """
        try:
            with urllib.request.urlopen(url) as response:
                if response.status == 200 and 'image' in response.info().get_content_type():
                    return True
        except Exception:
            return False
    def download_image(self, url, category, term, pbar):
        """
        Download the image from the given URL.

        Parameters:
        url (str): The URL of the image to be downloaded.
        category (str): The category of the image.
        term (str): The term or keyword associated with the image.
        pbar (tqdm): The progress bar object.

        Returns:
        str: A message indicating the status of the download.
        """
        if not self.is_valid_url(url):
            pbar.update(1)
            return f"Invalid URL: {url}"

        category_dir = os.path.join(self.download_dir, category)
        if not os.path.exists(category_dir):
            os.makedirs(category_dir)

        term_dir = os.path.join(category_dir, term)
        if not os.path.exists(term_dir):
            os.makedirs(term_dir)

        filename = os.path.join(term_dir, os.path.basename(urlparse(url).path))

        self.filename.add(filename) # Record the filename directory

        try:
            urllib.request.urlretrieve(url, filename)
            pbar.update(1)
            return f"Downloaded: {url}"
        except Exception as e:
            pbar.update(1)
        return f"Failed to download {url}: {str(e)}"

    def download_images(self):
        """
        Download images from the URLs in the JSON file.

        Returns:
        None
        """
        data = self.read_json()
        download_tasks = []

        total_images = sum(len(urls) for terms in data.values()
                            for urls in terms.values())
        with tqdm(total=total_images, desc="Downloading images") as pbar:
            with concurrent.futures.ThreadPoolExecutor(
                max_workers=self.max_workers) as executor:
                for category, terms in data.items():
                    for term, urls in terms.items():
                        for url in urls:
                            download_tasks.append(executor.submit(
                                self.download_image, url, category, term, pbar))

                            time.sleep(self.delay) # Polite delay

                for future in concurrent.futures.as_completed(download_tasks):
                    print(future.result())

        self.export_filename()

    def export_filename(self):
        """
        Export the filename directories to a text file.

        Returns:
        None
        """
        with open('filename.txt', 'w') as file:
            for filename in sorted(self.filename):
                file.write(f"{filename}\n")

In [10]:
downloader = ImageDownloader(json_file='image_urls.json',
                             download_dir='Dataset', max_workers=4, delay=1)
downloader.download_images()
downloader.export_filename()

Downloading images: 100%|██████████| 2680/2680 [44:42<00:00,  1.00it/s]

Downloaded: https://live.staticflickr.com/169/384447781_c811dbf520_b.jpg
Downloaded: https://live.staticflickr.com/10/13350698_3eee621f07_b.jpg
Downloaded: https://live.staticflickr.com/7102/7380287404_ccf55f560d_b.jpg
Downloaded: https://live.staticflickr.com/65535/51130913602_fa56f5b380_b.jpg
Downloaded: https://live.staticflickr.com/2027/2534782254_cbc0e12bd4_b.jpg
Downloaded: https://live.staticflickr.com/3225/3018520259_7b9fdf784a_b.jpg
Downloaded: https://live.staticflickr.com/5054/5451164696_92de322f65_b.jpg
Downloaded: https://live.staticflickr.com/6169/6164665781_594df941fb_b.jpg
Downloaded: https://live.staticflickr.com/3495/3871029191_94c00c8220_b.jpg
Downloaded: https://live.staticflickr.com/3101/2374292304_809e4f6803_b.jpg
Downloaded: https://live.staticflickr.com/8464/8363470909_a3f7515f72_b.jpg
Downloaded: https://live.staticflickr.com/4133/5044088454_47e919a6f9_b.jpg
Downloaded: https://live.staticflickr.com/8315/27882103054_9d980562e7_b.jpg
Downloaded: https://live.sta

Downloading images: 100%|██████████| 2680/2680 [44:43<00:00,  1.00s/it]

Downloaded: https://live.staticflickr.com/65535/50053838501_2ab958a8b0_b.jpg
Downloaded: https://live.staticflickr.com/7062/6836614904_5dc2767621_b.jpg
Downloaded: https://live.staticflickr.com/8502/8258264811_1cb016efe8_b.jpg
Downloaded: https://live.staticflickr.com/3576/3324696288_8f0b635475_b.jpg
Downloaded: https://live.staticflickr.com/7837/46461302315_11cfd23574_b.jpg
Downloaded: https://live.staticflickr.com/8251/8470846267_0a583b8c44_b.jpg
Downloaded: https://live.staticflickr.com/8252/8470843157_0e0ec78433_b.jpg
Downloaded: https://live.staticflickr.com/3856/33375915312_b32633a75d_b.jpg
Downloaded: https://live.staticflickr.com/6027/6097411524_e7365af0ae_b.jpg
Downloaded: https://live.staticflickr.com/2292/2377602381_2944351e93_b.jpg
Downloaded: https://live.staticflickr.com/3261/2378439644_8b6585c21a_b.jpg
Downloaded: https://live.staticflickr.com/7381/27618826684_fe0f484253_b.jpg
Downloaded: https://live.staticflickr.com/7466/15639965814_d913d2d209_b.jpg
Downloaded: https:/




In [13]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [12]:
def check_and_preprocess_images(image_dir):
    """
    Check and preprocess images in the specified directory.

    Parameters:
    image_dir (str): The directory containing the images to be checked and preprocessed.

    Returns:
    None
    """
    for root, _, files in os.walk(image_dir):
        for file in files:
            file_path = os.path.join(root, file)
            try:
                with Image.open(file_path) as img:
                    # Check if image is smaller than 50x50 pixels
                    if img.size[0] < 50 or img.size[1] < 50:
                        os.remove(file_path)
                        print(f"Deleted {file_path}: Image too small ({img.size[0]}x{img.size[1]})")
                        continue

                    # Convert non-RGB images to RGB
                    if img.mode != 'RGB':
                        img = img.convert('RGB')
                        img.save(file_path)
                        print(f"Converted {file_path} to RGB")

            except Exception as e:
                # If file is not an image, delete it
                os.remove(file_path)
                print(f"Deleted {file_path}: Not an image or corrupted file ({str(e)})")

check_and_preprocess_images('Dataset')

Converted Dataset/scenery/desert/48115750033_ed68297f86_b.jpg to RGB
Converted Dataset/scenery/desert/49007511591_74582e367b_b.jpg to RGB
Converted Dataset/scenery/desert/52712003999_cba6904a86_b.jpg to RGB
Converted Dataset/scenery/desert/49011833281_72063e1544_b.jpg to RGB
Converted Dataset/scenery/volcano/49629211101_d46f735055_b.jpg to RGB
Converted Dataset/animal/Elephant/4839032364_8c521066b2_b.jpg to RGB
Converted Dataset/animal/Cheetah/4838391253_bed2279fdf_b.jpg to RGB
Converted Dataset/animal/Cheetah/4838389045_d5fc08ca2b_b.jpg to RGB
Converted Dataset/plant/Bean/8529951413_c315df19e5_b.jpg to RGB
Converted Dataset/plant/Wildflower/51418402341_b87250d90c_b.jpg to RGB


In [14]:
!zip -r /content/drive/MyDrive/Clean_Dataset.zip Dataset

  adding: Dataset/ (stored 0%)
  adding: Dataset/furniture/ (stored 0%)
  adding: Dataset/furniture/chair/ (stored 0%)
  adding: Dataset/furniture/chair/6966550587_4760de012a_b.jpg (deflated 3%)
  adding: Dataset/furniture/chair/31972477316_88e1304de3_b.jpg (deflated 3%)
  adding: Dataset/furniture/chair/7169739223_4c57d346c3_b.jpg (deflated 0%)
  adding: Dataset/furniture/chair/8288177868_2cd17a8fb5_b.jpg (deflated 0%)
  adding: Dataset/furniture/chair/2433092503_fe6cf1e688_b.jpg (deflated 0%)
  adding: Dataset/furniture/chair/2475070939_7b7338b4d8_b.jpg (deflated 0%)
  adding: Dataset/furniture/chair/177715520_deee6ea440_b.jpg (deflated 1%)
  adding: Dataset/furniture/chair/147482645_d125c7f1f8_b.jpg (deflated 0%)
  adding: Dataset/furniture/chair/2865338391_a419230b3e_b.jpg (deflated 0%)
  adding: Dataset/furniture/chair/3619633442_0c55846789_b.jpg (deflated 1%)
  adding: Dataset/furniture/chair/2556879813_fa775d223a_b.jpg (deflated 0%)
  adding: Dataset/furniture/chair/5894919073_f

In [22]:
import os
import shutil
from collections import defaultdict

# Define the source and target directories
source_dir = "Dataset"
train_dir = "data/train"
test_dir = "data/test"

# Create the target directories if they don’t exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Initialize a dictionary to hold file paths for each class
class_files = defaultdict(list)

# Read the file paths from the text file
with open('filename.txt', 'r') as file:
    lines = file.readlines()
    for line in lines:
        line = line.strip()
        if line:
            # Extract the class name from the path
            parts = line.split('/')
            class_name = parts[2] # Structure Dataset/category/class/image.jpg
            class_files[class_name].append(line)
class_files

# Move images to the train and test directories
for class_name, files in class_files.items():
    # Create the train and test directories for the class
    train_class_dir = os.path.join(train_dir, class_name)
    test_class_dir = os.path.join(test_dir, class_name)
    os.makedirs(train_class_dir, exist_ok=True)
    os.makedirs(test_class_dir, exist_ok=True)

    # Move 19 images to train and 1 image to test
    for i, file_path in enumerate(files):
        if i == 0:
            shutil.copy(file_path, test_class_dir)
        elif i < 20:
            shutil.copy(file_path, train_class_dir)

print("Dataset organization complete!")

Dataset organization complete!


In [23]:
!zip -r /content/drive/MyDrive/data.zip data

updating: data/ (stored 0%)
updating: data/train/ (stored 0%)
updating: data/train/waterfall/ (stored 0%)
updating: data/test/ (stored 0%)
updating: data/test/waterfall/ (stored 0%)
updating: data/test/waterfall/9536683732_3c101945da_b.jpg (deflated 0%)
  adding: data/train/Mushroom/ (stored 0%)
  adding: data/train/Mushroom/4961199453_557599aa0b_b.jpg (deflated 1%)
  adding: data/train/Mushroom/481012156_f8751644b8_b.jpg (deflated 0%)
  adding: data/train/Mushroom/30773281961_a2983bf1be_b.jpg (deflated 1%)
  adding: data/train/Mushroom/29359074785_d9567082ce_b.jpg (deflated 0%)
  adding: data/train/Mushroom/3397647648_48b7c31722_b.jpg (deflated 0%)
  adding: data/train/Mushroom/21545048500_e15da413a7_b.jpg (deflated 0%)
  adding: data/train/Mushroom/189098954_f360f4bdfe_b.jpg (deflated 0%)
  adding: data/train/Mushroom/3607099094_eed0cd0fed_b.jpg (deflated 0%)
  adding: data/train/Mushroom/846207623_72126d2f02_b.jpg (deflated 0%)
  adding: data/train/Mushroom/52954574_058d5c59b3_b.jpg