In [2]:
#Imports
import os
import random
from pathlib import Path
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Path to the dataset folder
data_path = Path("../dataset")  # Replace with the actual path

classes=[]

# List all subfolders (classes/age groups)
for d in data_path.iterdir():
    if d.is_dir():
        classes.append(d.name)

classes = sorted(classes)

print(f"Found {len(classes)} classes:", classes)

In [None]:
# Analyze class distribution
class_distribution = {}
for cls in classes:
    class_path = data_path / cls
    num_images = len([f for f in class_path.iterdir() if f.is_file()])
    class_distribution[cls] = num_images

# Plot class distribution
plt.figure(figsize=(10, 5))
plt.bar(class_distribution.keys(), class_distribution.values())
plt.xticks(rotation=90)
plt.title("Class Distribution")
plt.xlabel("Class (Age)")
plt.ylabel("Number of Images")
plt.show()

In [None]:
# Plot random samples from each class
num_classes = len(classes)
samples_per_class = 2
fig, axes = plt.subplots(num_classes, samples_per_class, figsize=(samples_per_class * 3, num_classes * 3))

for i, cls in enumerate(classes):
    class_path = data_path / cls
    images = [f for f in class_path.iterdir() if f.is_file() and f.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp']]
    
    # Select random images from the class
    random_images = random.sample(images, min(samples_per_class, len(images)))

    for j, img_path in enumerate(random_images):
        try:
            img = Image.open(img_path).convert("RGB")
            axes[i, j].imshow(img)
            axes[i, j].axis("off")
            axes[i, j].set_title(f"Class: {cls}")
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")

plt.tight_layout()
plt.show()


In [None]:
# Pick one image and print its dimensions
sample_class = classes[1]  # Choose the first class as an example
sample_class_path = data_path / sample_class
sample_images = [f for f in sample_class_path.iterdir() if f.is_file() and f.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp']]

if sample_images:
    sample_image_path = sample_images[240]  # Pick the first image in the class
    try:
        sample_image = Image.open(sample_image_path)
        print(f"Sample image path: {sample_image_path}")
        print(f"Sample image dimensions: {sample_image.size}")
        # Convert the image to a numpy array for sanity check
        img = np.array(sample_image)
        print(f"Type of the values in the image array: {img.dtype} and the shape of the array: {img.shape}")
    except Exception as e:
        print(f"Error loading sample image {sample_image_path}: {e}")

In [None]:
#Dataset Size
total_size = 0
total_files = 0
for cls in classes:
    class_path = data_path / cls
    images = [f for f in class_path.iterdir() if f.is_file()]
    total_files += len(images)
print(f"Total files: {total_files}")

In [44]:
import fiftyone as fo
import fiftyone.zoo as foz

In [None]:
# Create a FiftyOne dataset
dataset = fo.Dataset.from_dir(
    dataset_type=fo.types.ImageClassificationDirectoryTree,
    dataset_dir=str(data_path),
)

In [None]:
# Print dataset info
print(dataset)
# Visualize the dataset in FiftyOne App
session = fo.launch_app(dataset)