In [2]:
import os
from PIL import Image
import pandas as pd

In [None]:
def analyze_images(file_path):
    extensions = ('.jpeg', '.jpg', '.png')
    analysis_data = []

    for class_label in range(0, 20):
        path = os.path.join(file_path, str(class_label))

        if os.path.exists(path):
            files = os.listdir(path)

            image_files = [f for f in files if f.lower().endswith(extensions)]

            for image_file in image_files:
                image_path = os.path.join(path, image_file)

                try:
                    with Image.open(image_path) as img:
                        width, height = img.size
                        mode = img.mode
                        file_size = os.path.getsize(image_path) / 1024 # KB

                        analysis_data.append({
                            'filename': image_file,
                            'class': class_label,
                            'width': width,
                            'height': height,
                            'aspect_ratio': round(width/height, 2),
                            'mode': mode,
                            'file_size_kb': round(file_size, 2),
                            'path': image_path
                        })
                except Exception as e:
                    print(f"Error reading {image_path}: {e}")

    return pd.DataFrame(analysis_data)

In [12]:
df = analyze_images('data')

print(f"Total images: {len(df)}")
print(f"Number of classes: {df['class'].nunique()}")
print(f"Min dimensions: {df['width'].min()}x{df['height'].min()}")
print(f"Max dimensions: {df['width'].max()}x{df['height'].max()}")
print("\n=== Class Distribution ===")
class_dist = df['class'].value_counts().sort_index()
print(class_dist)

Total images: 24000
Number of classes: 20
Min dimensions: 50x50
Max dimensions: 50x50

=== Class Distribution ===
class
0     1200
1     1200
2     1200
3     1200
4     1200
5     1200
6     1200
7     1200
8     1200
9     1200
10    1200
11    1200
12    1200
13    1200
14    1200
15    1200
16    1200
17    1200
18    1200
19    1200
Name: count, dtype: int64
