In [2]:
import os
import shutil
import random
from collections import Counter

# Paths to the downloaded datasets
dataset1_path = "E:/Coding/garbage_classification/datasets/class_x6"  # Data 6 class
dataset2_path = "E:/Coding/garbage_classification/datasets/class_x12"  # Data 12 class

# Destination directory for the combined dataset
combined_dataset_path = "E:/Coding/garbage_classification/datasets/combined_dataset"

# Create the destination directory if it doesn't exist
os.makedirs(combined_dataset_path, exist_ok=True)

# Function to copy images and labels from a dataset to the combined dataset
def copy_dataset(source_path, destination_path):
    for class_name in os.listdir(source_path):
        # Map classes to desired categories
        if class_name in ["trash", "clothes", "shoes"]:
            continue  # Skip classes that don't fit into the desired categories
        
        # Categorize organic waste
        if class_name == "organic" or class_name == "biological":
            category = "organic"
        # Categorize inorganic waste
        elif class_name in ["glass", "plastic", "paper", "cardboard", "white-glass", "brown-glass", "metal"]:
            category = "inorganic"
        # Categorize recyclable waste
        elif class_name in ["glass", "plastic", "paper", "white-glass", "brown-glass", "metal", "battery"]:
            category = "recyclable"
        else:
            continue
        
        class_path = os.path.join(source_path, class_name)
        destination_class_path = os.path.join(destination_path, category)
        os.makedirs(destination_class_path, exist_ok=True)
        
        # Copy images
        for file_name in os.listdir(class_path):
            file_path = os.path.join(class_path, file_name)
            shutil.copy(file_path, destination_class_path)

# Combine datasets into three categories: organic, inorganic, and recyclable
copy_dataset(dataset1_path, combined_dataset_path)  # Copy from 6-class dataset
copy_dataset(dataset2_path, combined_dataset_path)  # Copy from 12-class dataset

print("Combined dataset created successfully.")


Combined dataset created successfully.


In [3]:
# Check directory structure
print("Directory Structure:")
for category in ["organic", "inorganic", "recyclable"]:
    category_path = os.path.join(combined_dataset_path, category)
    if os.path.isdir(category_path):
        print(f"- {category}: Found")
    else:
        print(f"- {category}: Not Found")

# Count number of images in each category
print("\nNumber of Images:")
for category in ["organic", "inorganic", "recyclable"]:
    category_path = os.path.join(combined_dataset_path, category)
    if os.path.isdir(category_path):
        num_images = len(os.listdir(category_path))
        print(f"- {category}: {num_images} images")
    else:
        print(f"- {category}: N/A")

Directory Structure:
- organic: Found
- inorganic: Found
- recyclable: Found

Number of Images:
- organic: 985 images
- inorganic: 4957 images
- recyclable: 945 images


In [None]:

# Define the combined dataset directory
combined_dataset_path = "/path/to/combined_dataset"

# Define the waste categories
waste_categories = ["organic", "inorganic", "recyclable"]

# Dictionary to store counts of images in each category
image_counts = {}

# Function to perform quality check on images
def quality_check():
    print("Performing quality check on images...")
    for category in waste_categories:
        category_path = os.path.join(combined_dataset_path, category)
        image_files = os.listdir(category_path)
        # Check for any empty directories
        if not image_files:
            print(f"Empty directory found in category: {category}")
        # Check for non-image files
        non_image_files = [file for file in image_files if not file.lower().endswith(('.jpg', '.jpeg', '.png'))]
        if non_image_files:
            print(f"Non-image files found in category {category}: {non_image_files}")
    print("Quality check completed.")

# Function to check balancedness of classes
def check_balancedness():
    print("Checking balancedness of classes...")
    for category in waste_categories:
        category_path = os.path.join(combined_dataset_path, category)
        image_counts[category] = len(os.listdir(category_path))
    print("Number of Images:")
    for category, count in image_counts.items():
        print(f"- {category}: {count} images")
    print("Balancedness check completed.")

# Main function to perform quality check and balancedness check
def main():
    quality_check()
    print("\n")
    check_balancedness()

if __name__ == "__main__":
    main()
