In [12]:
import os
from dotenv import load_dotenv, find_dotenv
from loguru import logger

# find .env automagically by walking up directories until it's found
logger.info("Finding .env file")
dotenv_path = find_dotenv()

# load up the entries as environment variables
load_dotenv(dotenv_path)
logger.success("Loaded .env file")

logger.info("Reading environment variables")
project_dir = os.environ.get("PROJECT_DIR")
logger.success(f"PROJECT_DIR: {project_dir}")

# Set up logging configuration
#logger.add("logs.log", rotation="500 MB", level="INFO")

# Log the folder path and file names
logger.info("Listing files in the raw data folder")
dataset_path = os.path.join(project_dir, 'data/raw')
file_list = os.listdir(dataset_path)

logger.success(f"Files in folder {dataset_path}:")
logger.info([file_name for file_name in file_list])

[32m2024-06-18 19:45:46.381[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1mFinding .env file[0m
[32m2024-06-18 19:45:46.390[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36m<module>[0m:[36m11[0m - [32m[1mLoaded .env file[0m
[32m2024-06-18 19:45:46.393[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m13[0m - [1mReading environment variables[0m
[32m2024-06-18 19:45:46.398[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36m<module>[0m:[36m15[0m - [32m[1mPROJECT_DIR: /home/batek254/waste_management[0m
[32m2024-06-18 19:45:46.401[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m21[0m - [1mListing files in the raw data folder[0m
[32m2024-06-18 19:45:46.407[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36m<module>[0m:[36m25[0m - [32m[1mFiles in folder /home/batek254/waste_management/data/raw:[0m
[32m2024-06-18 19:45:46.410[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36

In [16]:
from PIL import Image

# Retrieve the names of all folders (representing trash types) within the dataset directory
garbage_types = os.listdir(dataset_path)

# Set to store unique image dimensions for the entire dataset
all_dimensions_set = set()

# Iterate over each trash type (folder) to process images
for garbage_type in garbage_types:
    folder_path = os.path.join(dataset_path, garbage_type)
    
    # Verify that the current item is a directory
    if os.path.isdir(folder_path):
        image_files = [f for f in os.listdir(folder_path) if f.endswith(('jpg', 'jpeg'))]
        
        # Display the count of images in the current folder
        num_images = len(image_files)
        logger.info(f"{garbage_type} folder contains {num_images} images.")
        
        # Loop over each image to check its dimensions
        for image_file in image_files:
            image_path = os.path.join(folder_path, image_file)
            with Image.open(image_path) as img:
                # Extract the width, height, and channels (color depth) of the image and add to the dimensions set
                width, height = img.size
                channels = len(img.getbands())
                all_dimensions_set.add((width, height, channels))
                
# Determine if all images in the entore dataset have the same dimensions 
if len(all_dimensions_set) == 1: 
    width, height, channel = all_dimensions_set.pop()
    logger.success(f"\nAll images in the dataset have the same dimensions: {width}x{height} with {channels} color channels.")
else:
    logger.error("\nThe images in the dataset have different dimensions or color channels.")


[32m2024-06-18 19:49:20.272[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m19[0m - [1mtrash folder contains 137 images.[0m
[32m2024-06-18 19:49:20.569[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m19[0m - [1mpaper folder contains 594 images.[0m
[32m2024-06-18 19:49:21.654[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m19[0m - [1mcardboard folder contains 403 images.[0m
[32m2024-06-18 19:49:22.406[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m19[0m - [1mplastic folder contains 482 images.[0m
[32m2024-06-18 19:49:23.221[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m19[0m - [1mglass folder contains 501 images.[0m
[32m2024-06-18 19:49:24.060[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m19[0m - [1mmetal folder contains 410 images.[0m
[32m2024-06-18 19:49:24.767[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36m<module>[0m:[36m33[0m - [32m[1m