# IMPORT DATASET

In [None]:
"""# Install kaggle dan kagglehub jika belum
!pip install -q kagglehub

import kagglehub

# Download dataset dari Kaggle (otomatis simpan ke cache)
path = kagglehub.dataset_download("aliefrahmanhakim/type-of-plastic-waste-dataset")
print("Path to dataset files:", path)"""

In [None]:
import os
import numpy as np
import random
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import shutil
from PIL import Image

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from sklearn.model_selection import train_test_split
from tensorflow.keras import Input
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import (GlobalAveragePooling2D, Conv2D, MaxPool2D, Flatten, Dense,
                                     Dropout, Input, BatchNormalization)
from tensorflow.keras.callbacks import CSVLogger, ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, Callback
from tensorflow.keras.applications import EfficientNetB7
from tensorflow.keras.applications.efficientnet import preprocess_input
from sklearn.utils.class_weight import compute_class_weight

from sklearn.metrics import confusion_matrix, classification_report

# Mengabaikan peringatan
import warnings
warnings.simplefilter('ignore')

# DATA LOAD

In [None]:
# Path utama dataset
df = "/kaggle/input/type-of-plastic-waste-dataset"

# 1. Kumpulkan semua gambar dan kelompokkan berdasarkan label
label_to_images = {}  # Kamus: {label: [list_path_gambar]}

for root, dirs, files in os.walk(df):
    for file in files:
        if file.lower().endswith(('.png', '.jpg', '.jpeg')):
            label = os.path.basename(root)  # Ambil label dari nama folder
            if label not in label_to_images:
                label_to_images[label] = []
            label_to_images[label].append(os.path.join(root, file))

# 2. Pilih 1 gambar acak per label (maksimal 4 label)
labels = list(label_to_images.keys())
if len(labels) > 4:
    labels = random.sample(labels, 4)  # Ambil 4 label acak jika label > 4

selected_images = []
for label in labels:
    images = label_to_images[label]
    selected_images.append((label, random.choice(images)))  # (label, path_gambar)

# 3. Tampilkan gambar
plt.figure(figsize=(15, 5))
plt.suptitle("Contoh Gambar per Label", fontsize=16, y=1.05)  # Judul utama

for i, (label, img_path) in enumerate(selected_images):
    img = Image.open(img_path)
    
    plt.subplot(1, 4, i + 1)
    plt.imshow(img)
    plt.title(f"Label: {label}", pad=10)  # `pad` memberi jarak antara judul dan gambar
    plt.axis('off')

plt.tight_layout()
plt.show()

# DATA UNDERSTANDING

In [None]:
# Menghitung jumlah gambar per kategori
category_counts = {}
total_images = 0  
image_paths = []

for root, dirs, files in os.walk(df):
    for file in files:
        if file.lower().endswith(('.jpg', '.png', '.jpeg')):
            label = os.path.basename(root)  
            category_counts[label] = category_counts.get(label, 0) + 1
            total_images += 1
            image_paths.append(os.path.join(root, file))

# Konversi ke DataFrame
df_counts = pd.DataFrame(list(category_counts.items()), columns=["Label", "Jumlah Gambar"])
df_counts = df_counts.sort_values("Jumlah Gambar", ascending=False)  

# Visualisasi bar chart
plt.figure(figsize=(10, 6))
barplot = sns.barplot(
    data=df_counts,
    y="Label",  
    x="Jumlah Gambar",  
    palette="viridis",  
    orient="h"  
)

# Tambahkan nilai di ujung bar
for i, value in enumerate(df_counts["Jumlah Gambar"]):
    barplot.text(
        value + max(df_counts["Jumlah Gambar"])*0.01,  
        i,  # Posisi Y
        f"{value:,}", 
        ha='left',  
        va='center',  
        fontsize=10
    )

plt.title(f"Distribusi Jumlah Gambar per Kategori\nTotal Dataset: {total_images} gambar", pad=20)
plt.xlabel("Jumlah Gambar", labelpad=10)
plt.ylabel("Kategori Plastik", labelpad=10)
plt.grid(axis='x', linestyle='--', alpha=0.4)  
sns.despine(left=True)  
plt.tight_layout()
plt.show()

In [None]:
# Menampilkan distribusi ukuran gambar
image_sizes = []
sample_images = random.sample(image_paths, min(100, len(image_paths)))  # contoh 100 gambar

for img_path in sample_images:
    try:
        with Image.open(img_path) as img:
            image_sizes.append(img.size)  # (width, height)
    except:
        continue

# Konversi ke DataFrame
df_sizes = pd.DataFrame(image_sizes, columns=['Width', 'Height'])

# Visualisasi distribusi ukuran
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
sns.histplot(df_sizes['Width'], kde=True, color='skyblue')
plt.title("Distribusi Lebar Gambar")

plt.subplot(1, 2, 2)
sns.histplot(df_sizes['Height'], kde=True, color='salmon')
plt.title("Distribusi Tinggi Gambar")

plt.tight_layout()
plt.show()

# DATA SPLITTING

In [None]:
# Direktori training dan testing (80/20)
TRAIN_DIR = "/kaggle/input/type-of-plastic-waste-dataset/train"
TEST_DIR = "/kaggle/input/type-of-plastic-waste-dataset/val"

train_HDPE = os.path.join(TRAIN_DIR + '/HDPE')
train_PET = os.path.join(TRAIN_DIR + '/PET')
train_PP = os.path.join(TRAIN_DIR + '/PP')
train_PS = os.path.join(TRAIN_DIR + '/PS')
test_HDPE = os.path.join(TEST_DIR + '/HDPE')
test_PET = os.path.join(TEST_DIR + '/PET')
test_PP = os.path.join(TEST_DIR + '/PP')
test_PS = os.path.join(TEST_DIR + '/PS')

print("TRAIN\n")
print("Total number of HDPE images in training set: ",len(os.listdir(train_HDPE)))
print("Total number of PET images in training set: ",len(os.listdir(train_PET)))
print("Total number of PP images in training set: ",len(os.listdir(train_PP)))
print("Total number of PS images in training set: ",len(os.listdir(train_PS)))
print("\nTEST\n")
print("Total number of HDPE images in test set: ",len(os.listdir(test_HDPE)))
print("Total number of PET images in test set: ",len(os.listdir(test_PET)))
print("Total number of PP images in test set: ",len(os.listdir(test_PP)))
print("Total number of PS images in test set: ",len(os.listdir(test_PS)))

In [None]:
# Mempersiapkan data gambar untuk pelatihan
datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,  
    validation_split=0.2
)
test_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input   
)

train_generator = datagen.flow_from_directory(TRAIN_DIR,
                                              batch_size=32,
                                              target_size=(512,512),
                                              class_mode='categorical',
                                              subset='training',
                                              shuffle=True)

validation_generator = datagen.flow_from_directory(TRAIN_DIR,
                                                   batch_size=32,
                                                   target_size=(512,512),
                                                   class_mode='categorical',
                                                   subset='validation',
                                                   shuffle=False)

test_generator = test_datagen.flow_from_directory(TEST_DIR,
                                                  batch_size=1,
                                                  target_size=(512,512),
                                                  class_mode='categorical',
                                                  shuffle=False)

# MODELLING

## EfficientNet

In [None]:
# Load model EfficientNetB7
base_model = EfficientNetB7(
    weights='imagenet', 
    include_top=False, 
    input_shape=(512,512,3)  
)

# Menambahkan top model 
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = BatchNormalization()(x)
x = Dense(512, activation='relu')(x)  
x = Dropout(0.5)(x)  # Dropout 
x = Dense(256, activation='relu')(x)  # Layer tambahan
x = Dropout(0.3)(x) 
predictions = Dense(4, activation='softmax')(x) 

model_3 = Model(inputs=base_model.input, outputs=predictions)

In [None]:
RESULT_PATH = ''  # Ganti dengan path hasil pelatihan
OUTPUT_DIR = '/kaggle/working/outputs'

if os.path.exists(f"{RESULT_PATH}/training_outputs"):
    print("Memuat file hasil training sebelumnya...")
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    
    # Copy dari dataset ke working directory
    shutil.copytree(f"{RESULT_PATH}/training_outputs", OUTPUT_DIR, dirs_exist_ok=True)
    
    # Pindahkan file ke root working directory
    for file in ['final_model_3.h5', 'best_model_3.h5', 'training_history_3.csv', 'training_log_3.csv']:
        if os.path.exists(f"{OUTPUT_DIR}/{file}"):
            shutil.copy(f"{OUTPUT_DIR}/{file}", '/kaggle/working/')


model_path = 'final_model_3.h5'
best_model_path = 'best_model_3.h5'
history_path = 'training_history_3.csv'
log_path = 'training_log_3.csv'
retrain = False # Ganti menjadi true jika ingin training ulang

if os.path.exists(model_path) and not retrain:
    print("Model sudah dilatih sebelumnya. Memuat hasil yang tersimpan...")
    
    # Muat model
    model_3 = load_model(model_path)
    
    # Muat history dan tampilkan
    history_df = pd.read_csv(history_path)
    print("\n=== Riwayat Pelatihan ===")
    print(history_df.tail())  
    
    # Tampilkan log epoch dari CSVLogger 
    if os.path.exists(log_path):
        epoch_logs = pd.read_csv(log_path)
        print("\n=== Log Epoch Lengkap ===")
        print(epoch_logs)

else:
    print("Model belum dilatih. Memulai pelatihan...")
    
    # Hapus file lama jika ada
    for f in [model_path, history_path, log_path, best_model_path]:
        if os.path.exists(f):
            os.remove(f)
    
    # Simpan log training ke CSV
    csv_logger = CSVLogger(log_path)
    
    # Simpan model terbaik selama training
    checkpoint = ModelCheckpoint(
        best_model_path,
        monitor='val_loss',
        save_best_only=True,
        mode='min',
        verbose=1
    )
    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=15,  
        min_delta=0.0001,
        restore_best_weights=True,
        verbose=1
    )
    
    reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=5,
    min_lr=1e-7,
    mode='min',
    verbose=1
    )
    
    class_indices = train_generator.classes
    class_weights = compute_class_weight('balanced', 
                                        classes=np.unique(class_indices), 
                                        y=class_indices)
    class_weights_dict = dict(enumerate(class_weights))
        
    base_model.trainable = False

    # Melatih Head model
    initial_lr = 1e-3 
    model_3.compile(
        optimizer=Adam(learning_rate=initial_lr),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    # Melatih head epoch
    epochs_head = 15
    history_head = model_3.fit(
        train_generator,
        epochs=epochs_head, # Latih head selama 10 epoch dulu
        validation_data=validation_generator,
        class_weight=class_weights_dict,
        callbacks=[csv_logger, checkpoint, early_stopping, reduce_lr] # Tanpa unfreeze callback
    )

    # Unfreeze (fine-tuning)
    base_model.trainable = True
    
    freeze_first_n = 400
    for layer in base_model.layers[:freeze_first_n]:
        layer.trainable = False
    
    finetune_lr = 1e-5
    model_3.compile(
        optimizer=Adam(learning_rate=finetune_lr), # LR rendah!
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    # Melanjutkan pelatihan untuk sisa epoch
    total_epochs = 100
    initial_epoch_finetune = history_head.epoch[-1] + 1
    
    history_finetune = model_3.fit(
        train_generator,
        epochs=total_epochs, 
        initial_epoch=initial_epoch_finetune, 
        validation_data=validation_generator,
        class_weight=class_weights_dict,
        callbacks=[csv_logger, checkpoint, early_stopping, reduce_lr] 
    )
    print(f"Memuat bobot terbaik dari {best_model_path} sebelum menyimpan model final.")
    model_3.load_weights(best_model_path)
    
    # Simpan model akhir
    model_3.save(model_path)
    
    # Simpan history ke DataFrame
    history_head_df = pd.DataFrame(history_head.history)
    history_finetune_df = pd.DataFrame(history_finetune.history)
    full_history_df = pd.concat([history_head_df, history_finetune_df], ignore_index=True)
    full_history_df.to_csv(history_path, index=False)

    print(f"Model akhir disimpan di: {model_path}")
    print(f"History lengkap disimpan di: {history_path}")
    
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    
    for file in [model_path, best_model_path, history_path, log_path]:
        if os.path.exists(file):
            shutil.copy(file, OUTPUT_DIR)
    
    shutil.make_archive('training_outputs', 'zip', OUTPUT_DIR)
    print("File hasil training siap di-download!")

In [None]:
# Menampilkan grafik hasil akurasi dan loss 
acc = full_history_df['accuracy']
val_acc = full_history_df['val_accuracy']
loss = full_history_df['loss']
val_loss = full_history_df['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r')
plt.plot(epochs, val_acc, 'b')
plt.title('Training and Validation Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

plt.plot(epochs, loss, 'r')
plt.plot(epochs, val_loss, 'b')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.title('Training and Validation Loss')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
# Menampilkan confusion matrix
test_generator.reset()

preds_3 = model_3.predict(test_generator, steps=len(test_generator), verbose=1)
pred_labels = np.argmax(preds_3, axis=1)  

# Ground truth
true_labels = test_generator.classes
class_names = list(test_generator.class_indices.keys())  

# Confusion Matrix
cm = confusion_matrix(true_labels, pred_labels)
cm_df = pd.DataFrame(cm, index=[f'Actual {c}' for c in class_names],
                        columns=[f'Predicted {c}' for c in class_names])

# Plot Confusion Matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm_df, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix")
plt.ylabel("Actual Class")
plt.xlabel("Predicted Class")
plt.show()

# Classification Report
print("\nClassification Report:")
print(classification_report(true_labels, pred_labels, target_names=class_names, digits=4))