# Proyek Klasifikasi Gambar: Orange Fruit Dataset
- **Nama:** Heriswaya
- **Email:** heriswaya2014@gmail.com
- **ID Dicoding:** heriswaya_

# **About Data**
The orange fruit dataset was gathered using a phone camera in the `HEIC (highly enhanced image container)` format and was converted to `JPEG format`. This dataset was collected in research carried out to determine the dominant diseases affecting oranges in the eastern parts of `Uganda`. It can be used for any artificial intelligence project and any agricultural research by anyone.

# **Objective**
Membangun sebuah model CNN yang dapat mengklasifikasikan penyakit pada jeruk (`Healthy`,`Citrus Canker`,`Melanose`) menggunakan gambar yang diambil di daerah Uganda Timur.

## Import Semua Packages/Library yang Digunakan

In [1]:
# Mengimpor libraries umum yang sering digunakan
import os, shutil
import zipfile
import random
from random import sample
import shutil
from shutil import copyfile
import pathlib
from pathlib import Path
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm as tq

# Mengimpor libraries untuk visualisasi
%matplotlib inline
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.image import imread

# Mengimpor libraries untuk pemrosesan data gambar
import cv2
from PIL import Image
import skimage
from skimage import io
from skimage.transform import resize
from skimage.transform import rotate, AffineTransform, warp
from skimage import img_as_ubyte
from skimage.exposure import adjust_gamma
from skimage.util import random_noise

# Mengimpor libraries untuk pembuatan dan evaluasi model
import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf
from tensorflow.keras import Model, layers
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from tensorflow.keras.layers import InputLayer, Conv2D, SeparableConv2D, MaxPooling2D, MaxPool2D, Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.applications.densenet import DenseNet121
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint, Callback, EarlyStopping, ReduceLROnPlateau

# Mengabaikan peringatan
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
# Mencetak versi TensorFlow yang sedang digunakan
print(tf.__version__)

2.18.0


## Data Preparation

### Data Loading

In [19]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [23]:
# Copy file zip dari Drive ke Colab Workspace
!cp /content/drive/MyDrive/klasifikasi/orange-fruit-dataset.zip /content/

In [28]:
# Unzip dataset
!unzip orange-fruit-daatset.zip

unzip:  cannot find or open orange-fruit-daatset.zip, orange-fruit-daatset.zip.zip or orange-fruit-daatset.zip.ZIP.


### Menggabungkan Data `citrus canker`, `healthy`, dan `melanose` ke direktori yang sama

In [None]:
# Direktori asal
origin_dir = "FIELD IMAGES"

# Direktori tujuan
combined_dir = "dataset"

# Buat folder baru untuk dataset gabungan
os.makedirs(combined_dir, exist_ok=True)

# Salin semua folder kelas (citrus canker, healthy, melanose)
for category in os.listdir(origin_dir):
    category_dir = os.path.join(origin_dir, category)
    if os.path.isdir(category_dir):
        shutil.copytree(category_dir, os.path.join(combined_dir, category), dirs_exist_ok=True)

### Menampilkan beberapa gambar dari ketiga kelas

In [None]:
# Membuat kamus untuk menyimpan gambar untuk setiap kelas dalam dataset
orange_images = {}

# Path ke folder dataset gabungan
path = "dataset"
for class_name in os.listdir(path):
    class_path = os.path.join(path, class_name)
    if os.path.isdir(class_path):
        orange_images[class_name] = os.listdir(class_path)

# Menampilkan secara acak 5 gambar dari masing-masing kelas
fig, axs = plt.subplots(len(orange_images.keys()), 5, figsize=(16, 12))  # ukuran figurenya dibesarkan sedikit

for i, class_name in enumerate(orange_images.keys()):
    images = np.random.choice(orange_images[class_name], 5, replace=False)

    for j, image_name in enumerate(images):
        img_path = os.path.join(path, class_name, image_name)
        img = Image.open(img_path).convert("RGB")  # gambar pada dataset berwarna, jadi pakai RGB
        axs[i, j].imshow(img)
        axs[i, j].set_title(class_name, fontsize=8)  # kasih title kecil di atas gambar
        axs[i, j].axis('off')  # hilangkan axis supaya lebih clean

fig.suptitle('Sample Images from Each Class', fontsize=20)
fig.tight_layout()
plt.show()

### Plot Distribusi gambar diseluruh kelas

In [None]:
# Path ke folder dataset
orange_path = "dataset/"

# List untuk menyimpan informasi file
file_names = []
labels = []
full_paths = []

# Loop untuk mengambil semua file dan label
for path_dir, subdirs, files in os.walk(orange_path):
    for file_name in files:
        full_path = os.path.join(path_dir, file_name)
        label = os.path.basename(path_dir)  # ambil nama folder sebagai label

        full_paths.append(full_path)
        file_names.append(file_name)
        labels.append(label)

# Membuat DataFrame
orange_df = pd.DataFrame({
    "file_name": file_names,
    "path": full_paths,
    "label": labels
})

# Plot distribusi jumlah gambar per kelas
plt.figure(figsize=(8,6))
sns.set_style("whitegrid")
sns.countplot(data=orange_df, x='label', palette="Set2")

plt.title('Distribution of Images Across Classes', fontsize=16)
plt.xlabel('Class', fontsize=12)
plt.ylabel('Number of Images', fontsize=12)
plt.xticks(rotation=15)
plt.show()

### Data Augmentation

### Data Preprocessing

#### Split Dataset

## Modelling

## Evaluasi dan Visualisasi

## Konversi Model

## Inference (Optional)