<a href="https://colab.research.google.com/github/irfan-lie92/Machine_LearningCNN/blob/main/Submission_Progresive_Machine_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Proyek Klasifikasi Gambar: cat_dog_bird_dataset**


- **Nama:** IRFAN ALI
- **Email:** ir.vanaly@gmail.com
- **ID Dicoding:** irfanlie92

## Import Semua Packages/Library yang Digunakan

In [1]:
!pip install kaggle tensorflowjs
import os
import zipfile
import shutil
import tensorflow as tf
import random
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras import mixed_precision
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
import pathlib
from tensorflow.keras.preprocessing import image
import tensorflowjs as tfjs
from google.colab import files



## Data Preparation

### Data Loading

In [2]:
# ===============================
# 1. Upload kaggle.json
# ===============================
print("📂 Silakan upload file kaggle.json dari komputer Anda...")
uploaded = files.upload()

# Pastikan folder ~/.kaggle ada
os.makedirs("/root/.kaggle", exist_ok=True)

# Pindahkan kaggle.json ke folder config Kaggle
for filename in uploaded.keys():
    os.rename(filename, "/root/.kaggle/kaggle.json")

# Ubah permission agar aman
os.chmod("/root/.kaggle/kaggle.json", 600)

# ===============================
# 2. Install Kaggle CLI
# ===============================
!pip install kaggle --upgrade --quiet

# ===============================
# 3. Tes koneksi Kaggle
# ===============================
!kaggle datasets list -s "books"

print("\n✅ kaggle.json berhasil diunggah dan terhubung.")

# ===============================
# 4. Download dataset
# ===============================
os.makedirs("/content/dataset", exist_ok=True)
!kaggle datasets download -d mahmoudnoor/high-resolution-catdogbird-image-dataset-13000 -p /content/dataset

# ===============================
# 5. Ekstrak dataset
# ===============================
zip_path = "/content/dataset/high-resolution-catdogbird-image-dataset-13000.zip"
if os.path.exists(zip_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall("/content/dataset")
    print("✅ Dataset berhasil diekstrak di /content/dataset")
else:
    print("❌ File zip dataset tidak ditemukan.")

📂 Silakan upload file kaggle.json dari komputer Anda...


Saving kaggle.json to kaggle.json
ref                                                               title                                                     size  lastUpdated                 downloadCount  voteCount  usabilityRating  
----------------------------------------------------------------  --------------------------------------------------  ----------  --------------------------  -------------  ---------  ---------------  
jealousleopard/goodreadsbooks                                     Goodreads-books                                         637338  2020-03-09 09:18:31.583000          81540       1928  1.0              
saurabhbagchi/books-dataset                                       Books Dataset                                         25760320  2020-10-09 05:14:41.297000          20415        128  1.0              
dylanjcastillo/7k-books-with-metadata                             7k Books                                               1542454  2020-02-04 20:17:23.623000  

## Data Preprocessing

#### Split Dataset

In [3]:
# ===============================
# Path folder
# ===============================
dataset_dir = "/content/dataset"  # folder asli
split_dir = "/content/split_dataset"
classes = ["bird", "cat", "dog"]

train_dir = os.path.join(split_dir, 'train')
validation_dir = os.path.join(split_dir, 'validation')
test_dir = os.path.join(split_dir, 'test')

# ===============================
# Hapus split_dataset jika ada
# ===============================
if os.path.exists(split_dir):
    shutil.rmtree(split_dir)

# ===============================
# Buat folder train, validation, test
# ===============================
for split in ['train', 'validation', 'test']:
    for cls in classes:
        os.makedirs(os.path.join(split_dir, split, cls), exist_ok=True)

# ===============================
# Fungsi ambil semua file gambar
# ===============================
def list_images(folder):
    image_files = []
    for root, _, files in os.walk(folder):
        for file in files:
            if file.lower().endswith((".jpg", ".jpeg", ".png", ".bmp", ".gif")):
                image_files.append(os.path.join(root, file))
    return image_files

# ===============================
# Split data & copy
# ===============================
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

for class_name in classes:
    class_path = os.path.join(dataset_dir, class_name)
    images = list_images(class_path)
    random.shuffle(images)

    total = len(images)
    train_end = int(total * train_ratio)
    val_end = train_end + int(total * val_ratio)

    for i, img_path in enumerate(images):
        if i < train_end:
            dst = os.path.join(split_dir, "train", class_name, os.path.basename(img_path))
        elif i < val_end:
            dst = os.path.join(split_dir, "validation", class_name, os.path.basename(img_path))
        else:
            dst = os.path.join(split_dir, "test", class_name, os.path.basename(img_path))

        shutil.copy2(img_path, dst)

print("✅ Dataset berhasil dibagi ke folder 'split_dataset'")

# ===============================
# Cek jumlah gambar per kelas
# ===============================
print("\nJumlah gambar per kelas:")
for split in ['train', 'validation', 'test']:
    split_path = os.path.join(split_dir, split)
    print(f"\n📂 {split} set:")
    for class_name in os.listdir(split_path):
        class_path = os.path.join(split_path, class_name)
        if os.path.isdir(class_path):
            count = len(os.listdir(class_path))
            print(f"  {class_name}: {count} gambar")

# ===============================
# ImageDataGenerator
# ===============================
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    horizontal_flip=True,
    shear_range=0.2,
    zoom_range=0.2,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
)

validation_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical'
)

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

✅ Dataset berhasil dibagi ke folder 'split_dataset'

Jumlah gambar per kelas:

📂 train set:
  dog: 3625 gambar
  bird: 2904 gambar
  cat: 2810 gambar

📂 validation set:
  dog: 777 gambar
  bird: 622 gambar
  cat: 602 gambar

📂 test set:
  dog: 778 gambar
  bird: 623 gambar
  cat: 603 gambar
Found 9339 images belonging to 3 classes.
Found 2001 images belonging to 3 classes.
Found 2004 images belonging to 3 classes.


## Modelling

In [None]:
# ================================
# 1. Aktifkan mixed precision
# ================================
mixed_precision.set_global_policy('mixed_float16')

# ================================
# 2. Path dataset
# ================================
split_dir = "/content/split_dataset"
train_dir = os.path.join(split_dir, 'train')
validation_dir = os.path.join(split_dir, 'validation')
test_dir = os.path.join(split_dir, 'test')

# ================================
# 3. Data Generator
# ================================
IMG_SIZE = (96, 96)
BATCH_SIZE = 32

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    zoom_range=0.1,
    horizontal_flip=True
)

val_test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

validation_generator = val_test_datagen.flow_from_directory(
    validation_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

test_generator = val_test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

num_classes = len(train_generator.class_indices)

# ================================
# 4. Model MobileNetV2 (Transfer Learning)
# ================================
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(96, 96, 3))
base_model.trainable = False  # Freeze semua layer dulu

model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dense(num_classes, activation='softmax', dtype='float32')
])

model.compile(
    loss='categorical_crossentropy',
    optimizer=Adam(learning_rate=0.0005),
    metrics=['accuracy']
)

# Callback stop dini saat akurasi ≥ 95%
class StopOnHighAcc(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        if logs.get('val_accuracy') >= 0.95:
            print("✅ Akurasi validasi ≥ 95%, stop training tahap 1.")
            self.model.stop_training = True

# ================================
# 5. Training Tahap 1 (Cepat)
# ================================
print("\n🚀 Tahap 1: Training cepat (hanya classifier)")
history_stage1 = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=20,
    callbacks=[StopOnHighAcc()]
)

# ================================
# 6. Fine-tuning MobileNetV2
# ================================
print("\n🔧 Tahap 2: Fine-tuning 15 layer terakhir MobileNetV2")
base_model.trainable = True
for layer in base_model.layers[:-15]:
    layer.trainable = False

model.compile(
    loss='categorical_crossentropy',
    optimizer=Adam(learning_rate=1e-5),  # lebih kecil agar stabil
    metrics=['accuracy']
)

history_stage2 = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=10  # cepat
)

Found 9339 images belonging to 3 classes.
Found 2001 images belonging to 3 classes.
Found 2004 images belonging to 3 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_96_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step

🚀 Tahap 1: Training cepat (hanya classifier)


  self._warn_if_super_not_called()


Epoch 1/20
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m667s[0m 2s/step - accuracy: 0.7816 - loss: 0.5457 - val_accuracy: 0.8906 - val_loss: 0.2755
Epoch 2/20
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m629s[0m 2s/step - accuracy: 0.8892 - loss: 0.2736 - val_accuracy: 0.8891 - val_loss: 0.2811
Epoch 3/20
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m630s[0m 2s/step - accuracy: 0.8836 - loss: 0.2864 - val_accuracy: 0.9050 - val_loss: 0.2424
Epoch 4/20
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m631s[0m 2s/step - accuracy: 0.9049 - loss: 0.2443 - val_accuracy: 0.9010 - val_loss: 0.2428
Epoch 5/20
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m639s[0m 2s/step - accuracy: 0.9004 - loss: 0.2422 - val_accuracy: 0.9005 - val_loss: 0.2483
Epoch 6/20
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m632s[0m 2s/step - accuracy: 0.9043 - loss: 0.2308 - val_accuracy: 0.8996 - val_loss: 0.2443
Epoch 7/20
[1m292/292

## Evaluasi dan Visualisasi

## Konversi Model

In [None]:
# ===============================
# EVALUASI
# ===============================
loss, accuracy = model.evaluate(test_generator, verbose=2)
print(f"📊 Hasil evaluasi:")
print(f"   Loss     : {loss:.4f}")
print(f"   Accuracy : {accuracy:.4f}")

# ===============================
# VISUALISASI
# ===============================
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss_hist = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(len(acc))

plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training & Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss_hist, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training & Validation Loss')

plt.show()

# ===============================
# KONVERSI MODEL
# ===============================
print("\n💾 Menyimpan model dalam berbagai format...")

# SavedModel
export_dir = 'saved_model/'
tf.saved_model.save(model, export_dir)
print("✅ SavedModel disimpan di 'saved_model/'")

# TF-Lite
converter = tf.lite.TFLiteConverter.from_saved_model(export_dir)
tflite_model = converter.convert()
tflite_model_file = pathlib.Path('tflite/model.tflite')
tflite_model_file.parent.mkdir(parents=True, exist_ok=True)
tflite_model_file.write_bytes(tflite_model)
print("✅ Model TFLite disimpan di 'tflite/model.tflite'")

# Label.txt
with open('tflite/label.txt', 'w') as f:
    f.write('\n'.join(train_generator.class_indices.keys()))
print("✅ Label disimpan di 'tflite/label.txt'")

# TensorFlow.js
tfjs.converters.save_keras_model(model, 'tfjs_model')
print("✅ Model TFJS disimpan di folder 'tfjs_model/'")

In [ ]:
# Unggah gambar uji
uploaded = files.upload()
img_path = list(uploaded.keys())[0]

# Load label dari file
with open('tflite/label.txt', 'r') as f:
    class_names = f.read().splitlines()

# Load model TFLite
interpreter = tf.lite.Interpreter(model_path='tflite/model.tflite')
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Pastikan target size sama dengan input model
target_size = tuple(input_details[0]['shape'][1:3])  # contoh (224, 224)

# Preprocessing gambar
img = image.load_img(img_path, target_size=target_size)
img_array = image.img_to_array(img)
img_array = np.expand_dims(img_array, axis=0) / 255.0

# Inference
interpreter.set_tensor(input_details[0]['index'], img_array.astype(np.float32))
interpreter.invoke()
prediction = interpreter.get_tensor(output_details[0]['index'])

predicted_index = np.argmax(prediction[0])
predicted_class = class_names[predicted_index]
confidence = prediction[0][predicted_index]

print(f"Prediksi: {predicted_class} (Probabilitas: {confidence:.2f})")

# Visualisasi hasil
plt.imshow(img)
plt.title(f'Prediksi: {predicted_class} ({confidence:.2f})')
plt.axis('off')
plt.savefig('inference_result.png')
plt.show()