In [1]:
import os
!pip install tabulate
from tabulate import tabulate

Collecting tabulate
  Using cached tabulate-0.9.0-py3-none-any.whl (35 kB)
Installing collected packages: tabulate
Successfully installed tabulate-0.9.0


In [2]:
# Print the number of samples for each class in non-splitted dataset

def count_dataset(dataset_dir):
    # Create a dictionary to store the image counts for each class
    counts = {}

    # Loop through each class folder in the dataset
    for class_folder in os.listdir(dataset_dir):
        class_path = os.path.join(dataset_dir, class_folder)
        if os.path.isdir(class_path):
            # Count the number of images in the class folder
            count = len([f for f in os.listdir(class_path) if f.endswith('.jpg') or f.endswith('.png') or f.endswith('.jpeg')]) # Add your required extensions here
            # Add the count to the dictionary
            counts[class_folder] = count

    return counts

def print_dataset(counts):
    # Print the report
    print('Dataset report:')
    for class_folder, count in counts.items():
        print(f'{class_folder}: {count} images')

    # Print the overall total
    overall_total = sum(counts.values())
    print(f'Overall total images: {overall_total} images')




In [3]:
us_dataset = 'data/MSID_BINARY'
us_dataset_count = count_dataset(us_dataset)
print_dataset(us_dataset_count)

us_aug_dataset = 'data/MSID_BINARY_AUG'
us_aug_dataset_count = count_dataset(us_aug_dataset)
print('------------------')
print_dataset(us_aug_dataset_count)

Dataset report:
Others: 298 images
Monkeypox: 279 images
Overall total images: 577 images
------------------
Dataset report:
Others: 3576 images
Monkeypox: 3348 images
Overall total images: 6924 images


In [5]:
# Print the number of samples for each class in splitted dataset

def print_splitted_dataset(dataset_dir):
    # Loop through each class folder in the train, validation, and test subdirectories
    classes = os.listdir(os.path.join(dataset_dir, 'train'))
    classes.sort() # optional to sort the class folders alphabetically
    data = []
    total_train = 0
    total_val = 0
    total_test = 0
    for class_folder in classes:
        # Count the number of images in the train, validation, and test directories for the class
        train_images = len(os.listdir(os.path.join(dataset_dir, 'train', class_folder)))
        val_images = len(os.listdir(os.path.join(dataset_dir, 'val', class_folder)))
        test_images = len(os.listdir(os.path.join(dataset_dir, 'test', class_folder)))
        # Add the number of images to the total for each split
        total_train += train_images
        total_val += val_images
        total_test += test_images
        # Add the class data to the data list
        data.append([class_folder, train_images, val_images, test_images])

    # Add the totals to the data list
    data.append(['Total', total_train, total_val, total_test])

    # Print the table
    headers = ['Class', 'Train Images', 'Validation Images', 'Test Images']
    print(tabulate(data, headers=headers))

In [6]:
print_splitted_dataset('data/MSID_BINARY_AUG_SP')

Class        Train Images    Validation Images    Test Images
---------  --------------  -------------------  -------------
Monkeypox            2343                  669            336
Others               2503                  715            358
Total                4846                 1384            694


In [11]:
print_splitted_dataset('data/MSID_US_SP')

Class         Train Images    Validation Images    Test Images
----------  --------------  -------------------  -------------
Chickenpox              74                   21             12
Measles                 63                   18             10
Monkeypox               84                   24             12
Normal                  79                   22             12
Total                  300                   85             46
