WOLF SUBSPECIES DATASET

In [5]:
import os
import pandas as pd
import random

# Define the paths to the folders containing the extracted images
image_folder_paths = [
    r"Dataset\Alaskan Interior Wolf",
    r"Dataset\Arctic Wolf",
    r"Dataset\Baffin Island Wolf",
    r"Dataset\Eastern Wolf",
    r"Dataset\Ethiopian Wolf",
    r"Dataset\Eurasian Wolf",
    r"Dataset\Gray Wolf",
    r"Dataset\Hudson Bay Wolf",
    r"Dataset\Iberian Wolf",
    r"Dataset\Indian Wolf",
    r"Dataset\Mexican Wolf",
    r"Dataset\Mongolian Wolf",
    r"Dataset\Northern Rocky Mountain Wolf",
    r"Dataset\Red Wolf",
    r"Dataset\Steppe Wolf",
    r"Dataset\Timber Wolf",
    r"Dataset\Tundra Wolf",
    r"Dataset\Vancouver Island Wolf"
]

# Collect all image paths by subspecies
image_paths = []
for image_folder_path in image_folder_paths:
    if os.path.exists(image_folder_path):
        for subspecies_folder in os.listdir(image_folder_path):
            subspecies_path = os.path.join(image_folder_path, subspecies_folder)
            if os.path.isdir(subspecies_path):
                images = [os.path.join(subspecies_path, img) for img in os.listdir(subspecies_path) if img.lower().endswith(('.jpg', '.jpeg'))]
                for img in images:
                    image_paths.append({"Subspecies Name": subspecies_folder, "Image Path": img})
            elif subspecies_folder.lower().endswith(('.jpg', '.jpeg')):
                image_paths.append({"Subspecies Name": os.path.basename(image_folder_path), "Image Path": subspecies_path})

# Convert the collected image paths to a DataFrame
image_df = pd.DataFrame(image_paths)

# Check if the DataFrame is empty
if image_df.empty:
    raise ValueError("No images found in the specified directories.")

# Define wolf subspecies details
subspecies_details = {
    "Gray Wolf": {"Scientific Name": "Canis lupus", "Region": "North America, Europe, Asia", "Physical Characteristics": "Large build, thick fur", "Coat Variations": "Gray, black, white, brown"},
    "Arctic Wolf": {"Scientific Name": "Canis lupus arctos", "Region": "Arctic region", "Physical Characteristics": "Medium-sized build, thick white fur", "Coat Variations": "Pure white, light gray, yellowish tinges"},
    "Timber Wolf": {"Scientific Name": "Canis lupus lycaon", "Region": "North America", "Physical Characteristics": "Large build, thick fur", "Coat Variations": "Gray, black, white, brown"},
    "Eurasian Wolf": {"Scientific Name": "Canis lupus lupus", "Region": "Europe, Asia", "Physical Characteristics": "Large build, thick fur", "Coat Variations": "Grayish-brown with reddish tinges"},
    "Mexican Wolf": {"Scientific Name": "Canis lupus baileyi", "Region": "Mexico, United States", "Physical Characteristics": "Medium-sized build, thick fur", "Coat Variations": "Gray, black, reddish-brown"},
    "Indian Wolf": {"Scientific Name": "Canis lupus pallipes", "Region": "India", "Physical Characteristics": "Medium-sized build, short fur", "Coat Variations": "Light gray to reddish-brown"},
    "Tundra Wolf": {"Scientific Name": "Canis lupus albus", "Region": "Tundra region", "Physical Characteristics": "Medium-sized build, thick fur", "Coat Variations": "White, light gray"},
    "Red Wolf": {"Scientific Name": "Canis lupus rufus", "Region": "Southeastern U.S.", "Physical Characteristics": "Medium-sized build, reddish fur", "Coat Variations": "Reddish-brown with gray and black markings"},
    "Ethiopian Wolf": {"Scientific Name": "Canis simensis", "Region": "Ethiopia", "Physical Characteristics": "Medium-sized build, red fur", "Coat Variations": "Reddish with white markings"},
    "Eastern Wolf": {"Scientific Name": "Canis lycaon", "Region": "Eastern Canada", "Physical Characteristics": "Medium-sized build, thick fur", "Coat Variations": "Reddish-gray with black markings"},
    "Iberian Wolf": {"Scientific Name": "Canis lupus signatus", "Region": "Spain, Portugal", "Physical Characteristics": "Medium-sized, slender build", "Coat Variations": "Grayish-brown with white and black markings"},
    "Steppe Wolf": {"Scientific Name": "Canis lupus campestris", "Region": "Central Asia, Kazakhstan, Russia", "Physical Characteristics": "Medium-sized build, thick fur", "Coat Variations": "Grayish-yellow"},
    "Mongolian Wolf": {"Scientific Name": "Canis lupus chanco", "Region": "Mongolia, Northern China", "Physical Characteristics": "Medium-sized build, thick fur", "Coat Variations": "Light gray tp reddish-brown"},
    "Alaskan Interior Wolf": {"Scientific Name": "Canis lupus pambasileus", "Region": "Alaska", "Physical Characteristics": "Medium-sized build, thick fur", "Coat Variations": "Gray, black, white, brown"},
    "Northern Rocky Mountain Wolf": {"Scientific Name": "Canis lupus irremotus", "Region": "Northern Rocky Mountains", "Physical Characteristics": "Medium-sized build, thick fur", "Coat Variations": "Light gray to black with white markings"},
    "Hudson Bay Wolf": {"Scientific Name": "Canis lupus hudsonicus", "Region": "Hudson Bay area, Canada", "Physical Characteristics": "Medium-sized build, thick fur", "Coat Variations": "Pale gray to white"},
    "Vancouver Island Wolf": {"Scientific Name": "Canis lupus crassodon", "Region": "Vancouver Island, Canada", "Physical Characteristics": "Medium-sized build, thick fur", "Coat Variations": "Dark gray to black"},
    "Baffin Island Wolf": {"Scientific Name": "Canis lupus manningi", "Region": "Baffin Island, Canada", "Physical Characteristics": "Medium-sized build, thick fur", "Coat Variations": "White, light gray"}
}

wolf_data = []
for _ in range(10000):
    subspecies = random.choice(list(subspecies_details.keys()))
    details = subspecies_details[subspecies]
    subspecies_images = image_df[image_df["Subspecies Name"] == subspecies]["Image Path"].tolist()
    if subspecies_images:
        image_path = random.choice(subspecies_images)
        wolf_data.append({
            "Subspecies Name": subspecies,
            "Scientific Name": details["Scientific Name"],
            "Region": details["Region"],
            "Physical Characteristics": details["Physical Characteristics"],
            "Coat Variations": details["Coat Variations"],
            "Image Path": image_path
        })
    else:
        print(f"No images found for subspecies: {subspecies}")

# Convert to DataFrame and save to CSV
wolf_dataset_df = pd.DataFrame(wolf_data)

# Sort the DataFrame by the "Subspecies Name" column
wolf_dataset_df_sorted = wolf_dataset_df.sort_values(by="Subspecies Name")

csv_path = "Wolf_Dataset_with_Images.csv"
wolf_dataset_df_sorted.to_csv(csv_path, index=False)

print(f"Dataset saved as {csv_path}")


Dataset saved as Wolf_Dataset_with_Images.csv
