#### Here done with this following code.
- This code applies the strict keywords while downloading the images from duckduckgo_search.
- It also checks the color format whether it matches or not , if the color is slightly dark or light or inappropriate then it skips that image.

In [None]:
from duckduckgo_search import DDGS
import requests
import os
import concurrent.futures
import time
from PIL import Image
from io import BytesIO
import numpy as np

# Define feather color categories
categories = [
    "black feathers", "blue feathers", "brown feathers", "gray feathers", "green feathers", "orange feathers",
    "purple feathers", "red feathers", "white feathers", "yellow feathers", "iridescent feathers", "multicolor feathers"
]

# Define dataset directory
output_dir = r"D:\BirdFeatherClassification\dataset\train"
os.makedirs(output_dir, exist_ok=True)

HEADERS = {"User-Agent": "Mozilla/5.0"}

# Define RGB color ranges for each feather category
color_ranges = {
    "black feathers": [(0, 0, 0), (50, 50, 50)],
    "blue feathers": [(0, 0, 100), (100, 100, 255)],
    "brown feathers": [(60, 30, 10), (165, 100, 50)],
    "gray feathers": [(100, 100, 100), (200, 200, 200)],
    "green feathers": [(0, 100, 0), (100, 255, 100)],
    "orange feathers": [(200, 100, 0), (255, 165, 50)],
    "purple feathers": [(75, 0, 75), (200, 100, 200)],
    "red feathers": [(150, 0, 0), (255, 100, 100)],
    "white feathers": [(200, 200, 200), (255, 255, 255)],
    "yellow feathers": [(200, 200, 0), (255, 255, 100)],
    "iridescent feathers": [(0, 50, 50), (255, 255, 255)],  # Broad range for shimmering colors
    "multicolor feathers": [(0, 0, 0), (255, 255, 255)]  # Accept all colors
}

def is_valid_image(img_url):
    """Check if an image URL is valid."""
    try:
        response = requests.head(img_url, headers=HEADERS, timeout=5)
        content_type = response.headers.get("Content-Type", "")
        return "image" in content_type
    except requests.exceptions.RequestException:
        return False

def is_feather_color_correct(image, category):
    """Check if the downloaded image predominantly contains the expected feather color."""
    try:
        img = Image.open(BytesIO(image)).convert("RGB")
        img_array = np.array(img)

        lower_bound, upper_bound = color_ranges[category]
        
        # Count pixels within the color range
        mask = (
            (img_array[:, :, 0] >= lower_bound[0]) & (img_array[:, :, 0] <= upper_bound[0]) &
            (img_array[:, :, 1] >= lower_bound[1]) & (img_array[:, :, 1] <= upper_bound[1]) &
            (img_array[:, :, 2] >= lower_bound[2]) & (img_array[:, :, 2] <= upper_bound[2])
        )

        valid_pixels = np.sum(mask)
        total_pixels = img_array.shape[0] * img_array.shape[1]

        return (valid_pixels / total_pixels) > 0.3  # At least 30% of the image should match the color
    except Exception:
        return False

def download_image(img_url, filename, category):
    """Download and save an image if valid and color matches."""
    try:
        if not is_valid_image(img_url):
            return
        response = requests.get(img_url, headers=HEADERS, timeout=10)
        response.raise_for_status()
        
        if is_feather_color_correct(response.content, category):
            with open(filename, "wb") as f:
                f.write(response.content)
            print(f"Downloaded: {filename}")
        else:
            print(f"Skipped (color mismatch): {filename}")

    except requests.exceptions.RequestException:
        pass  # Ignore failed downloads

def fetch_images(category, max_results=200):
    """Fetch high-quality feather images."""
    category_dir = os.path.join(output_dir, category)
    os.makedirs(category_dir, exist_ok=True)

    print(f"Searching for: {category}")

    ddgs = DDGS()

    queries = [
        f"{category} bird feather solid color high resolution",
        f"{category} bird feather macro close-up HD",
        f"{category} bird feather close-up high quality",
        f"{category} single {category} feather on white background",
        f"{category} plain bird feather"
    ]

    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = []
        for query in queries:
            time.sleep(1)  # Prevent rate limiting
            results = ddgs.images(query, max_results=max_results)  # Fetch full results

            for i, img in enumerate(results):
                img_url = img.get("image")
                if img_url:
                    filename = os.path.join(category_dir, f"{category}_{i}.jpg")
                    futures.append(executor.submit(download_image, img_url, filename, category))

        concurrent.futures.wait(futures)

for category in categories:
    fetch_images(category, max_results=200)

print("Feather dataset ready!")


##### For downloading Specific Categories of birds

In [None]:
from duckduckgo_search import DDGS
import requests
import os
import concurrent.futures
import time
from PIL import Image
from io import BytesIO
import numpy as np

# Define feather color categories
categories = [
    "green feathers"
]

# Define dataset directory
output_dir = r"D:\BirdFeatherClassification\dataset\train"
os.makedirs(output_dir, exist_ok=True)

HEADERS = {"User-Agent": "Mozilla/5.0"}

# Define RGB color ranges for each feather category
color_ranges = {
    "green feathers": [(0, 100, 0), (100, 255, 100)],
}

def is_valid_image(img_url):
    """Check if an image URL is valid."""
    try:
        response = requests.head(img_url, headers=HEADERS, timeout=5)
        content_type = response.headers.get("Content-Type", "")
        return "image" in content_type
    except requests.exceptions.RequestException:
        return False

def is_feather_color_correct(image, category):
    """Check if the downloaded image predominantly contains the expected feather color."""
    try:
        img = Image.open(BytesIO(image)).convert("RGB")
        img_array = np.array(img)

        lower_bound, upper_bound = color_ranges[category]
        
        # Count pixels within the color range
        mask = (
            (img_array[:, :, 0] >= lower_bound[0]) & (img_array[:, :, 0] <= upper_bound[0]) &
            (img_array[:, :, 1] >= lower_bound[1]) & (img_array[:, :, 1] <= upper_bound[1]) &
            (img_array[:, :, 2] >= lower_bound[2]) & (img_array[:, :, 2] <= upper_bound[2])
        )

        valid_pixels = np.sum(mask)
        total_pixels = img_array.shape[0] * img_array.shape[1]

        return (valid_pixels / total_pixels) > 0.3  # At least 30% of the image should match the color
    except Exception:
        return False

def download_image(img_url, filename, category):
    """Download and save an image if valid and color matches."""
    try:
        if not is_valid_image(img_url):
            return
        response = requests.get(img_url, headers=HEADERS, timeout=10)
        response.raise_for_status()
        
        if is_feather_color_correct(response.content, category):
            with open(filename, "wb") as f:
                f.write(response.content)
            print(f"Downloaded: {filename}")
        else:
            print(f"Skipped (color mismatch): {filename}")

    except requests.exceptions.RequestException:
        pass  # Ignore failed downloads

def fetch_images(category, max_results=200):
    """Fetch high-quality feather images."""
    category_dir = os.path.join(output_dir, category)
    os.makedirs(category_dir, exist_ok=True)

    print(f"Searching for: {category}")

    ddgs = DDGS()

    queries = [
        f"{category} bird feather solid color high resolution",
        f"{category} bird feather macro close-up HD",
        f"{category} bird feather close-up high quality",
        f"{category} single {category} feather on white background",
        f"{category} plain bird feather"
    ]

    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = []
        for query in queries:
            time.sleep(1)  # Prevent rate limiting
            results = ddgs.images(query, max_results=max_results)  # Fetch full results

            for i, img in enumerate(results):
                img_url = img.get("image")
                if img_url:
                    filename = os.path.join(category_dir, f"{category}_{i}.jpg")
                    futures.append(executor.submit(download_image, img_url, filename, category))

        concurrent.futures.wait(futures)

for category in categories:
    fetch_images(category, max_results=200)

print("Feather dataset ready!")
