In [3]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
from tqdm import tqdm
import warnings

# Supaya output bersih
warnings.filterwarnings('ignore')

# ==================== KONFIGURASI PATH KAGGLE ====================
# Sesuai dengan nama dataset di screenshot kamu: 'eye-dataset'
DATA_DIR = '/kaggle/input/eye-diseases-classification/dataset' 
IMG_SIZE = 224
BATCH_SIZE = 32

# Kategori penyakit (Pastikan nama folder di dalam dataset sama persis dengan ini)
CATEGORIES = ['normal', 'cataract', 'glaucoma', 'diabetic_retinopathy']

# ==================== 1. DATASET CLASS ====================
class EyeDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label, img_path

def load_dataset(data_dir):
    """Load semua gambar dan label dari direktori Kaggle"""
    image_paths = []
    labels = []
    
    print(f"Mencari data di: {data_dir}")
    
    # Cek apakah folder utama ada
    if not os.path.exists(data_dir):
        print(f"Error: Folder {data_dir} tidak ditemukan!")
        print("Coba cek 'Copy File Path' di sidebar kanan Kaggle.")
        return [], []

    for category in CATEGORIES:
        category_path = os.path.join(data_dir, category)
        
        # Cek apakah folder kategori (misal /cataract) ada
        if os.path.exists(category_path):
            files = os.listdir(category_path)
            print(f"   Ditemukan folder '{category}': {len(files)} gambar")
            
            for img_name in files:
                img_path = os.path.join(category_path, img_name)
                # Ambil hanya file gambar
                if img_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                    image_paths.append(img_path)
                    labels.append(category)
        else:
            print(f"   Warning: Folder kategori '{category}' tidak ditemukan di {category_path}")
    
    return image_paths, labels

# ==================== 2. PROSES EKSTRAKSI (RESNET50) ====================
def extract_features_resnet(image_paths, labels, batch_size=32):
    
    # Cek Device (Gunakan GPU P100/T4 di Kaggle agar cepat)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"\nMenggunakan device: {device}")
    
    if str(device) == 'cpu':
        print("Warning: Kamu sedang menggunakan CPU. Aktifkan GPU Accelerator di Settings Kaggle agar lebih cepat!")
    
    # Load Model
    print("Loading ResNet50 pretrained model...")
    resnet = models.resnet50(pretrained=True)
    resnet = nn.Sequential(*list(resnet.children())[:-1]) # Buang head
    resnet = resnet.to(device)
    resnet.eval()
    
    # Preprocessing standar ImageNet
    transform = transforms.Compose([
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                             std=[0.229, 0.224, 0.225])
    ])
    
    # Encode label text ke angka
    label_to_idx = {cat: idx for idx, cat in enumerate(CATEGORIES)}
    encoded_labels = [label_to_idx[label] for label in labels]
    
    dataset = EyeDataset(image_paths, encoded_labels, transform)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=2)
    
    features_list = []
    labels_list = []
    paths_list = []
    
    print(f"Mulai ekstraksi fitur dari {len(image_paths)} gambar...")
    
    with torch.no_grad():
        for images, labels_batch, paths_batch in tqdm(dataloader, desc="Proses Ekstraksi"):
            images = images.to(device)
            output = resnet(images)
            output = output.view(output.size(0), -1)
            
            features_list.append(output.cpu().numpy())
            labels_list.append(labels_batch.numpy())
            paths_list.extend(paths_batch)
    
    return np.vstack(features_list), np.hstack(labels_list), paths_list

# ==================== 3. SIMPAN CSV ====================
def save_to_csv(features, labels, paths):
    print("\nMenyimpan ke CSV di /kaggle/working/...")
    
    feature_cols = [f'feature_{i}' for i in range(features.shape[1])]
    df = pd.DataFrame(features, columns=feature_cols)
    
    df.insert(0, 'image_path', paths)
    df.insert(0, 'label_encoded', labels)
    df.insert(0, 'label', [CATEGORIES[label] for label in labels])
    
    # Simpan di working directory Kaggle
    output_filename = '/kaggle/working/resnet50_features(normalized).csv'
    df.to_csv(output_filename, index=False)
    
    print(f"Sukses! File tersimpan di: {output_filename}")
    return df

# ==================== MAIN ====================
if __name__ == "__main__":
    print("="*50)
    print("PROGRAM EKSTRAKSI FITUR MATA (RESNET50)")
    print("="*50)
    
    # 1. Load Data
    image_paths, labels = load_dataset(DATA_DIR)
    
    if len(image_paths) > 0:
        # 2. Ekstraksi
        features, encoded_labels, paths = extract_features_resnet(image_paths, labels, BATCH_SIZE)
        
        # 3. Simpan
        save_to_csv(features, encoded_labels, paths)
        
        print("\nSelesai! Silakan cek tab 'Output' di Kaggle untuk mendownload CSV-nya.")
    else:
        print("\nTidak ada gambar yang diproses. Cek path dataset kamu lagi.")

PROGRAM EKSTRAKSI FITUR MATA (RESNET50)
Mencari data di: /kaggle/input/eye-diseases-classification/dataset
   Ditemukan folder 'normal': 1074 gambar
   Ditemukan folder 'cataract': 1038 gambar
   Ditemukan folder 'glaucoma': 1007 gambar
   Ditemukan folder 'diabetic_retinopathy': 1098 gambar

Menggunakan device: cuda
Loading ResNet50 pretrained model...


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 172MB/s] 


Mulai ekstraksi fitur dari 4217 gambar...


Proses Ekstraksi: 100%|██████████| 132/132 [00:33<00:00,  3.93it/s]



Menyimpan ke CSV di /kaggle/working/...
Sukses! File tersimpan di: /kaggle/working/resnet50_features(normalized).csv

Selesai! Silakan cek tab 'Output' di Kaggle untuk mendownload CSV-nya.


In [5]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
from tqdm import tqdm
import warnings

# Matikan warning agar output bersih
warnings.filterwarnings('ignore')

# ==================== KONFIGURASI ====================
DATA_DIR = '/kaggle/input/eye-diseases-classification/dataset'  # Sesuaikan path jika perlu

# EfficientNet-B3 WAJIB menggunakan resolusi 300x300
IMG_SIZE = 300  
BATCH_SIZE = 32

# Urutan Kategori
CATEGORIES = ['normal', 'cataract', 'glaucoma', 'diabetic_retinopathy']

# ==================== 1. DATASET CLASS ====================
class EyeDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label, img_path

def load_dataset(data_dir):
    """Load path gambar dan labelnya"""
    image_paths = []
    labels = []
    
    print(f"Membaca dataset dari: {data_dir}")
    if not os.path.exists(data_dir):
        print("Error: Path dataset tidak ditemukan!")
        return [], []

    for category in CATEGORIES:
        category_path = os.path.join(data_dir, category)
        if os.path.exists(category_path):
            files = [f for f in os.listdir(category_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
            for img_name in files:
                image_paths.append(os.path.join(category_path, img_name))
                labels.append(category)
            print(f"   {category}: {len(files)} gambar")
        else:
            print(f"   Folder '{category}' tidak ditemukan.")
    
    return image_paths, labels

# ==================== 2. PROSES EKSTRAKSI (EFFICIENTNET-B3) ====================
def extract_features_efficientnet(image_paths, labels, batch_size=32):
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"\nMenggunakan Device: {device}")
    
    # 1. Load Model EfficientNet-B3
    print("Loading EfficientNet-B3 (Pretrained)...")
    weights = models.EfficientNet_B3_Weights.DEFAULT
    model = models.efficientnet_b3(weights=weights)
    
    # 2. Hapus Classifier (Ambil fiturnya saja)
    model.classifier = nn.Identity()
    model = model.to(device)
    model.eval()
    
    # 3. Preprocessing (Resize ke 300x300)
    transform = transforms.Compose([
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                             std=[0.229, 0.224, 0.225])
    ])
    
    # 4. Buat DataLoader
    label_to_idx = {cat: idx for idx, cat in enumerate(CATEGORIES)}
    encoded_labels = [label_to_idx[label] for label in labels]
    
    dataset = EyeDataset(image_paths, encoded_labels, transform)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=2)
    
    # 5. Loop Ekstraksi
    features_list = []
    labels_list = []
    paths_list = []
    
    print(f"\nMulai Ekstraksi Fitur ({len(image_paths)} gambar)...")
    
    with torch.no_grad():
        for images, labels_batch, paths_batch in tqdm(dataloader):
            images = images.to(device)
            
            output = model(images)
            output = output.view(output.size(0), -1) # Flatten
            
            features_list.append(output.cpu().numpy())
            labels_list.append(labels_batch.numpy())
            paths_list.extend(paths_batch)
    
    # Gabungkan hasil
    features = np.vstack(features_list)
    all_labels = np.hstack(labels_list)
    
    return features, all_labels, paths_list

# ==================== 3. OUTPUT CSV ====================
def save_to_csv(features, labels, paths):
    print("\nMenyimpan ke CSV...")
    
    # Buat nama kolom (feature_0 ... feature_1535)
    cols = [f'feature_{i}' for i in range(features.shape[1])]
    
    df = pd.DataFrame(features, columns=cols)
    
    # Tambah info gambar di depan
    df.insert(0, 'image_path', paths)
    df.insert(0, 'label_encoded', labels)
    df.insert(0, 'label', [CATEGORIES[l] for l in labels])
    
    # Simpan
    filename = 'efficientnet_features(normalized).csv'
    df.to_csv(filename, index=False)
    
    print(f"Selesai! File tersimpan: {filename}")
    print(f"   Dimensi Data: {df.shape}")
    return df

# ==================== MAIN PROGRAM ====================
if __name__ == "__main__":
    print("="*50)
    print("EFFICIENTNET-B3 FEATURE EXTRACTION (SIMPLE)")
    print("="*50)
    
    # 1. Input
    image_paths, labels = load_dataset(DATA_DIR)
    
    if len(image_paths) > 0:
        # 2. Proses
        features, encoded_labels, paths = extract_features_efficientnet(
            image_paths, labels, BATCH_SIZE
        )
        
        # 3. Output
        save_to_csv(features, encoded_labels, paths)
        
        print("\nSiap! Silakan download CSV dari tab 'Output'.")
    else:
        print("\nTidak ada gambar yang bisa diproses.")

EFFICIENTNET-B3 FEATURE EXTRACTION (SIMPLE)
Membaca dataset dari: /kaggle/input/eye-diseases-classification/dataset
   normal: 1074 gambar
   cataract: 1038 gambar
   glaucoma: 1007 gambar
   diabetic_retinopathy: 1098 gambar

Menggunakan Device: cuda
Loading EfficientNet-B3 (Pretrained)...


Downloading: "https://download.pytorch.org/models/efficientnet_b3_rwightman-b3899882.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b3_rwightman-b3899882.pth
100%|██████████| 47.2M/47.2M [00:00<00:00, 193MB/s]



Mulai Ekstraksi Fitur (4217 gambar)...


100%|██████████| 132/132 [00:29<00:00,  4.51it/s]



Menyimpan ke CSV...
Selesai! File tersimpan: efficientnet_features(normalized).csv
   Dimensi Data: (4217, 1539)

Siap! Silakan download CSV dari tab 'Output'.
