In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
from collections import Counter
from google.colab import drive

In [None]:
# Mount Google Drive (jika belum dimount)
from google.colab import drive
drive.mount('/content/drive')

# Setup Path Dataset
dataset_path = '/content/drive/MyDrive/FP AI LANJUT/BATIK _GENERATOR/dataset'

# Filter hanya mengambil folder yang diawali dengan "batik_" untuk menghindari folder lain yang tidak relevan
all_classes = sorted([d for d in os.listdir(dataset_path)
                      if os.path.isdir(os.path.join(dataset_path, d))
                      and d.startswith('batik_')])

print(f"Ditemukan {len(all_classes)} kelas batik: \n{all_classes}")

Mounted at /content/drive
Ditemukan 20 kelas batik: 
['batik_betawi', 'batik_bokor_kencono', 'batik_buketan', 'batik_dayak', 'batik_jlamprang', 'batik_kawung', 'batik_liong', 'batik_mega_mendung', 'batik_parang', 'batik_sekarjagad', 'batik_sidoluhur', 'batik_sidomukti', 'batik_sidomulyo', 'batik_singa_barong', 'batik_srikaton', 'batik_tribusono', 'batik_tujuh_rupa', 'batik_tuntrum', 'batik_wahyu_tumurun', 'batik_wirasat']


In [None]:
mode_data = []
problematic_files = []

for class_name in all_classes:
    folder_path = os.path.join(dataset_path, class_name)
    files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

    modes = []

    for filename in files:
        img_path = os.path.join(folder_path, filename)
        try:
            with Image.open(img_path) as img:
                modes.append(img.mode)

                if img.mode != 'RGB':
                    problematic_files.append((class_name, filename, img.mode))
        except:
            pass

    mode_counts = Counter(modes)

    row = {'Jenis Batik': class_name, 'Total Gambar': len(files)}
    for mode, count in mode_counts.items():
        row[mode] = count

    mode_data.append(row)

# --- MENAMPILKAN HASIL ---
df_modes = pd.DataFrame(mode_data).fillna(0)

print("\n" + "="*50)
print("   TABEL DISTRIBUSI MODE GAMBAR (CHANNEL)")
print("="*50)

# Tampilkan tabel dengan highlight warna
# Kolom selain RGB akan kita sorot karena itu yang perlu penanganan khusus
display(df_modes.style.format(precision=0))

# --- REKOMENDASI TINDAKAN ---
non_rgb_cols = [col for col in df_modes.columns if col not in ['Jenis Batik', 'Total Gambar', 'RGB']]
total_non_rgb = 0
if non_rgb_cols:
    total_non_rgb = df_modes[non_rgb_cols].sum().sum()


   TABEL DISTRIBUSI MODE GAMBAR (CHANNEL)


Unnamed: 0,Jenis Batik,Total Gambar,RGB,RGBA,P
0,batik_betawi,150,150,0,0
1,batik_bokor_kencono,150,150,0,0
2,batik_buketan,150,147,3,0
3,batik_dayak,150,140,6,4
4,batik_jlamprang,150,150,0,0
5,batik_kawung,150,149,0,1
6,batik_liong,150,128,22,0
7,batik_mega_mendung,150,149,0,1
8,batik_parang,150,149,0,1
9,batik_sekarjagad,150,150,0,0


In [None]:
df_stats = pd.DataFrame(problematic_files)

print("\n=== FILE BERMASALAH ===")
display(df_stats)


=== FILE BERMASALAH ===


Unnamed: 0,0,1,2
0,batik_buketan,buketan_6 - Copy.png,RGBA
1,batik_buketan,buketan_154.png,RGBA
2,batik_buketan,buketan_6.png,RGBA
3,batik_dayak,dayak (3).png,P
4,batik_dayak,dayak (10).png,RGBA
5,batik_dayak,dayak (16).png,RGBA
6,batik_dayak,dayak (12).png,P
7,batik_dayak,dayak (8).png,P
8,batik_dayak,dayak (7).png,RGBA
9,batik_dayak,dayak (2).png,RGBA


In [None]:
stats_data = []

for class_name in all_classes:
    folder_path = os.path.join(dataset_path, class_name)
    image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

    widths = []
    heights = []

    for img_file in image_files:
        try:
            with Image.open(os.path.join(folder_path, img_file)) as img:
                w, h = img.size
                widths.append(w)
                heights.append(h)
        except:
            pass # Skip file rusak

    if widths:
        stats_data.append({
            'Jenis Batik': class_name,
            'Jml Gambar': len(widths),
            'Min Size': f"{min(widths)}x{min(heights)}",
            'Max Size': f"{max(widths)}x{max(heights)}",
            'Avg Width': int(sum(widths)/len(widths)),
            'Avg Height': int(sum(heights)/len(heights)),
            # Rasio > 1 = Landscape, < 1 = Portrait
            'Rasio (W/H)': round(sum(widths)/sum(heights), 2)
        })

df_stats = pd.DataFrame(stats_data)

print("\n=== STATISTIK DIMENSI PER FOLDER ===")
display(df_stats)


=== STATISTIK DIMENSI PER FOLDER ===


Unnamed: 0,Jenis Batik,Jml Gambar,Min Size,Max Size,Avg Width,Avg Height,Rasio (W/H)
0,batik_betawi,150,164x150,1600x1200,425,396,1.07
1,batik_bokor_kencono,150,400x286,640x960,551,721,0.76
2,batik_buketan,150,286x289,1588x2117,950,977,0.97
3,batik_dayak,150,88x101,3648x3072,379,377,1.0
4,batik_jlamprang,150,146x150,1600x1200,622,564,1.1
5,batik_kawung,150,100x100,640x960,436,561,0.78
6,batik_liong,150,366x324,960x1000,567,573,0.99
7,batik_mega_mendung,150,194x143,3264x3904,1445,1231,1.17
8,batik_parang,150,159x127,640x960,388,479,0.81
9,batik_sekarjagad,150,84x160,640x960,401,546,0.73


In [None]:
import numpy as np

# Loop untuk visualisasi per folder
for class_name in all_classes:
    folder_path = os.path.join(dataset_path, class_name)
    image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

    if not image_files: continue

    widths = []
    heights = []
    valid_paths = []

    for img_file in image_files:
        path = os.path.join(folder_path, img_file)
        try:
            with Image.open(path) as img:
                w, h = img.size
                widths.append(w)
                heights.append(h)
                valid_paths.append(path)
        except: pass

    # --- PLOTTING ---
    fig = plt.figure(figsize=(14, 4))
    gs = fig.add_gridspec(1, 4)

    samples = np.random.choice(valid_paths, min(3, len(valid_paths)), replace=False)
    for i, path in enumerate(samples):
        ax = fig.add_subplot(gs[0, i])
        img = Image.open(path)
        ax.imshow(img)
        ax.axis('off')
        ax.set_title(f"Sampel {i+1}\n{img.size}", fontsize=9)

    # 2. Tampilkan Scatter Plot
    ax_scatter = fig.add_subplot(gs[0, 3])
    ax_scatter.scatter(widths, heights, alpha=0.6, c='green', s=20)
    ax_scatter.set_title(f"Sebaran Ukuran: {class_name}", fontsize=10, fontweight='bold')
    ax_scatter.set_xlabel("Width")
    ax_scatter.set_ylabel("Height")
    ax_scatter.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

Output hidden; open in https://colab.research.google.com to view.

In [None]:
import pandas as pd
from tqdm.notebook import tqdm

# --- KONFIGURASI TARGET ---
TARGET_SIZE = 256  # Ubah jadi 512 jika target GAN Anda 512
# --------------------------

folder_audit = []

print(f"üìã Melakukan Audit Lokal per Folder (Target Minimal: {TARGET_SIZE}x{TARGET_SIZE})...\n")

for class_name in tqdm(all_classes, desc="Memproses Folder"):
    folder_path = os.path.join(dataset_path, class_name)
    files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]

    total = len(files)
    safe_count = 0
    warning_count = 0
    min_res_found = "N/A" # Default jika kosong
    min_w, min_h = 99999, 99999

    # Cek setiap file dalam folder ini
    for filename in files:
        try:
            # Lazy load (buka header saja agar cepat)
            with Image.open(os.path.join(folder_path, filename)) as img:
                w, h = img.size

                # Update resolusi terkecil di folder ini
                if w < min_w: min_w = w
                if h < min_h: min_h = h

                # Cek kelayakan untuk GAN
                if w < TARGET_SIZE or h < TARGET_SIZE:
                    warning_count += 1
                else:
                    safe_count += 1
        except:
            pass # Skip file error

    # Hitung Persentase Kelayakan
    safety_score = 0
    if total > 0:
        safety_score = (safe_count / total) * 100
        min_res_found = f"{min_w}x{min_h}"

    # Tentukan Status Folder
    status = "‚úÖ Aman"
    if warning_count > 0:
        if safety_score < 50:
            status = "‚õî KRITIS (Mayoritas Kekecilan)"
        else:
            status = "‚ö†Ô∏è Perlu Cek (Ada File Kecil)"

    folder_audit.append({
        'Nama Batik': class_name,
        'Total Gambar': total,
        '‚úÖ Lolos (>Target)': safe_count,
        '‚ùå Warning (<Target)': warning_count,
        'Kelayakan (%)': round(safety_score, 1),
        'Resolusi Terkecil': min_res_found,
        'Status Folder': status
    })

# --- MENAMPILKAN TABEL ---
df_audit_folder = pd.DataFrame(folder_audit)

# Urutkan dari yang paling banyak masalah (Warning terbanyak)
df_audit_folder = df_audit_folder.sort_values(by='‚ùå Warning (<Target)', ascending=False)

print("\n" + "="*60)
print(f"   LAPORAN KELAYAKAN DATASET (Min: {TARGET_SIZE}px)")
print("="*60)

# Styling Tabel:
# - Background Merah pada kolom Warning jika angkanya tinggi
# - Background Hijau pada kolom Kelayakan jika mendekati 100%
display(df_audit_folder.style.background_gradient(cmap='Reds', subset=['‚ùå Warning (<Target)'])
                         .background_gradient(cmap='Greens', subset=['Kelayakan (%)']))

print(f"\nüí° TIPS:")
print(f"1. Fokus pada folder dengan status '‚õî KRITIS' atau yang kolom Warning-nya merah pekat.")
print(f"2. File di kolom 'Warning' sebaiknya TIDAK dipakai training, atau harus di-upscale dulu.")

üìã Melakukan Audit Lokal per Folder (Target Minimal: 256x256)...



Memproses Folder:   0%|          | 0/20 [00:00<?, ?it/s]


   LAPORAN KELAYAKAN DATASET (Min: 256px)


Unnamed: 0,Nama Batik,Total Gambar,‚úÖ Lolos (>Target),‚ùå Warning (  Kelayakan (%)  Resolusi Terkecil  Status Folder,Unnamed: 5,Unnamed: 6,Unnamed: 7
3,batik_dayak,150,44,106,29.3,88x101,‚õî KRITIS (Mayoritas Kekecilan)
8,batik_parang,150,67,83,44.7,159x127,‚õî KRITIS (Mayoritas Kekecilan)
9,batik_sekarjagad,150,81,69,54.0,84x160,‚ö†Ô∏è Perlu Cek (Ada File Kecil)
5,batik_kawung,150,88,62,58.7,100x100,‚ö†Ô∏è Perlu Cek (Ada File Kecil)
0,batik_betawi,150,94,56,62.7,164x150,‚ö†Ô∏è Perlu Cek (Ada File Kecil)
4,batik_jlamprang,150,121,29,80.7,146x150,‚ö†Ô∏è Perlu Cek (Ada File Kecil)
16,batik_tujuh_rupa,150,139,11,92.7,300x225,‚ö†Ô∏è Perlu Cek (Ada File Kecil)
7,batik_mega_mendung,150,143,7,95.3,194x143,‚ö†Ô∏è Perlu Cek (Ada File Kecil)
12,batik_sidomulyo,150,149,1,99.3,214x345,‚ö†Ô∏è Perlu Cek (Ada File Kecil)
1,batik_bokor_kencono,150,150,0,100.0,400x286,‚úÖ Aman



üí° TIPS:


In [None]:
import matplotlib.pyplot as plt
import os
import random
from PIL import Image
import numpy as np

# --- KONFIGURASI ---
TARGET_SIZE = 256
NUM_SAMPLES = 5  # Jumlah gambar yang ingin dicek sebagai sampel

# Ambil semua file gambar dari dataset (flatten list)
all_files = []
for folder in all_classes:
    path = os.path.join(dataset_path, folder)
    if os.path.isdir(path):
        all_files.extend([os.path.join(path, f) for f in os.listdir(path) if f.lower().endswith(('.jpg', '.png', '.jpeg'))])

# Ambil sampel acak (campuran gambar besar dan kecil jika ada)
sample_files = random.sample(all_files, min(NUM_SAMPLES, len(all_files)))

print(f"üîÑ Memproses preview untuk {len(sample_files)} gambar...")
print(f"üéØ Target Size: {TARGET_SIZE}x{TARGET_SIZE}")

def smart_process(img_path, target_size):
    """
    Fungsi untuk Resize Proporsional + Center Crop
    Mengembalikan: (Gambar Asli, Gambar Hasil)
    """
    try:
        img = Image.open(img_path).convert('RGB')
        original_img = img.copy() # Simpan copy untuk ditampilkan nanti

        # 1. Hitung rasio
        w, h = img.size

        # Logika: Sisi terpendek harus jadi 512.
        # Jika w=300 (kurang), maka 512/300 = 1.7 (di-upscale)
        # Jika w=1000 (lebih), maka 512/1000 = 0.5 (di-downscale)
        ratio = target_size / min(w, h)
        new_w, new_h = int(w * ratio), int(h * ratio)

        # 2. Resize dengan filter berkualitas tinggi (LANCZOS)
        img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)

        # 3. Center Crop
        left = (new_w - target_size) / 2
        top = (new_h - target_size) / 2
        right = (new_w + target_size) / 2
        bottom = (new_h + target_size) / 2

        img = img.crop((left, top, right, bottom))

        return original_img, img, f"Asli: {w}x{h}"

    except Exception as e:
        print(f"Error: {e}")
        return None, None, None

# --- VISUALISASI ---
plt.figure(figsize=(10, 4 * len(sample_files)))

for i, filepath in enumerate(sample_files):
    original, processed, info_text = smart_process(filepath, TARGET_SIZE)

    if original and processed:
        # Tampilkan Gambar Asli
        plt.subplot(len(sample_files), 2, 2*i + 1)
        plt.imshow(original)
        plt.title(f"BEFORE ({info_text})")
        plt.axis('off')

        # Tampilkan Gambar Hasil Proses
        plt.subplot(len(sample_files), 2, 2*i + 2)
        plt.imshow(processed)
        plt.title(f"AFTER ({TARGET_SIZE}x{TARGET_SIZE})")
        plt.axis('off')

plt.tight_layout()
plt.show()

Output hidden; open in https://colab.research.google.com to view.