In [45]:
from fastdownload import download_url
from fastai.vision.all import *
from PIL import Image

In [46]:
from duckduckgo_search import DDGS
import requests
import os
from pathlib import Path

In [47]:
def download_image(url, folder_path, image_name):
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()  # Check if the request was successful
        image_path = folder_path / image_name
        with open(image_path, 'wb') as file:
            for chunk in response.iter_content(1024):
                file.write(chunk)
        print(f"Downloaded {image_name}")
        return 1
    except Exception as e:
        print(f"Failed to download {image_name}. Error: {e}")
        return 0

In [48]:
def download_image_by_keyword(parents_dir, keyword, num_downloads=3):
    # Set up the folder to save images
    folder_path = Path(f"{parents_dir}/{keyword}")
    
    # would like to replace the space with underscore for folder_path
    folder_path = folder_path.with_name(folder_path.name.replace(" ", "_"))
    folder_path.mkdir(parents=True, exist_ok=True)

    # Fetch the image URLs using DuckDuckGo Search
    results = DDGS().images(
        keywords=keyword,
        region="wt-wt",
        safesearch="off",
        size=None,
        color=None,
        type_image=None,
        layout=None,
        license_image=None,
        max_results=num_downloads * 5
    )

    # Download each image
    count = 0
    for i, result in enumerate(results):
        image_url = result['image']
        image_name = f"{keyword}_{count+1}.jpg"
        image_name = image_name.replace(" ", "_")
        add_count = download_image(image_url, folder_path, image_name)
        count += add_count
        if count == num_downloads:
            break

In [49]:
%%capture --no-display
# list comprehension with the above function for each keyword, butterfly, butterfly with sun light, butterfly with flowers, butterfly with sunset
result = [download_image_by_keyword(parents_dir = "butterfly", keyword = keyword, num_downloads = 3) 
          for keyword in ["picture of butterfly", "picture of butterfly with sun light", "picture of butterfly with flowers", "picture of butterfly with sunset"]];

In [50]:
import os

def list_all_files(directory):
    """Lists all files under a given directory, including subfolders.

    Args:
        directory: The path to the directory to search.

    Returns:
        A list of full file paths.
    """

    files = []
    for root, dirs, filenames in os.walk(directory):
        for filename in filenames:
            files.append(os.path.join(root, filename))
    return files


# Example usage:
directory_path = "./butterfly"  # Replace with the actual path
all_files_butterfly = list_all_files(directory_path)


In [52]:
import os
import random
import shutil
from pathlib import Path

def move_files(source_folder, all_files, split_ratio, train_dir, test_dir):
    """
    Moves files to train and test directories based on the split ratio.

    Args:
        all_files: A list of full file paths.
        split_ratio: The ratio of files to move to the test directory.
        train_dir: The path to the train directory.
        test_dir: The path to the test directory.
    """
    # Ensure train and test directories exist
    Path(train_dir).mkdir(parents=True, exist_ok=True)
    Path(test_dir).mkdir(parents=True, exist_ok=True)

    # Shuffle the files
    random.shuffle(all_files)

    # Calculate the split index
    split_index = int(len(all_files) * split_ratio)

    # Move the files
    for i, file in enumerate(all_files):
        if i < split_index:
            shutil.move(file, os.path.join(train_dir, os.path.basename(file)))
        else:
            shutil.move(file, os.path.join(test_dir, os.path.basename(file)))

    # Empty the source directory
    shutil.rmtree(source_folder)

# Example usage
# all_files_butterfly = [str(f) for f in Path("butterfly").glob('*') if f.is_file()]
move_files(source_folder="butterfly", all_files=all_files_butterfly, split_ratio=0.8, train_dir="data/train/butterfly", test_dir="data/test/butterfly")


In [56]:
%%capture --no-display
# list comprehension with the above function for each keyword, butterfly, butterfly with sun light, butterfly with flowers, butterfly with sunset
result = [download_image_by_keyword(parents_dir = "forest", keyword = keyword, num_downloads = 3) 
          for keyword in ["picture of forest", "picture of forest with sun light", "picture of forest with flowers", "picture of forest with sunset"]];

directory_path = "./forest"  # Replace with the actual path
all_files_forest = list_all_files(directory_path)

move_files(source_folder="forest", all_files=all_files_forest, split_ratio=0.8, train_dir="data/train/forest", test_dir="data/test/forest")