In [None]:
import os
import matplotlib.pyplot as plt
import random
from PIL import Image
import numpy as np

In [None]:
# Path dataset
DATASET_PATH = "../dataset/mango_dataset_dl_split"

train_dir = os.path.join(DATASET_PATH, "train")
val_dir = os.path.join(DATASET_PATH, "val")

# Cek folder kelas
classes = os.listdir(train_dir)
print("Kelas yang ditemukan:", classes)


# Hitung jumlah gambar per kelas
def count_images(folder):
    count = {}
    for cls in os.listdir(folder):
        path = os.path.join(folder, cls)
        count[cls] = len(os.listdir(path))
    return count


train_count = count_images(train_dir)
val_count = count_images(val_dir)

print("\nJumlah data TRAIN:")
for cls, n in train_count.items():
    print(f" - {cls}: {n} gambar")

print("\nJumlah data VALIDATION:")
for cls, n in val_count.items():
    print(f" - {cls}: {n} gambar")

# Visualisasi distribusi data
plt.figure(figsize=(6, 4))
plt.bar(
    train_count.keys(), train_count.values(), color="green", alpha=0.6, label="Train"
)
plt.bar(
    val_count.keys(), val_count.values(), color="orange", alpha=0.6, label="Validation"
)
plt.title("Distribusi Data Mango Healthy vs Rotten")
plt.xlabel("Kelas")
plt.ylabel("Jumlah Gambar")
plt.legend()
plt.show()


# Tampilkan contoh gambar
def show_random_images(folder, n=4):
    plt.figure(figsize=(8, 4))
    for i, cls in enumerate(os.listdir(folder)):
        class_path = os.path.join(folder, cls)
        sample_images = random.sample(os.listdir(class_path), n)
        for j, img_name in enumerate(sample_images):
            img_path = os.path.join(class_path, img_name)
            img = Image.open(img_path)
            plt.subplot(len(classes), n, i * n + j + 1)
            plt.imshow(img)
            plt.axis("off")
            if j == 0:
                plt.ylabel(cls, rotation=0, labelpad=40, fontsize=10)
    plt.suptitle("Contoh Gambar Dataset Mango (Healthy vs Rotten)")
    plt.tight_layout()
    plt.show()


show_random_images(train_dir, n=4)

# Hitung ukuran rata-rata gambar
sizes = []
for cls in os.listdir(train_dir):
    class_path = os.path.join(train_dir, cls)
    for img_name in os.listdir(class_path)[:20]:  # ambil sebagian saja untuk efisiensi
        img_path = os.path.join(class_path, img_name)
        with Image.open(img_path) as img:
            sizes.append(img.size)

sizes = np.array(sizes)
mean_size = np.mean(sizes, axis=0)
print(f"\n📏 Rata-rata ukuran gambar: {mean_size[0]:.1f} x {mean_size[1]:.1f} pixel")