### Waste Product Classification Using Transfer Learning (VGG16)


This project builds an image classification system using transfer learning with
the VGG16 architecture to classify waste materials into Organic (O) and
Recyclable (R) categories. The model is trained in two stages: feature extraction
and fine-tuning. Performance is evaluated using accuracy and classification
reports, and predictions are visualized to interpret model behavior.


### Import Required Libraries


In [None]:
import os
import glob
import zipfile
import requests
import numpy as np
from pathlib import Path
from tqdm import tqdm
import matplotlib.pyplot as plt

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

import tensorflow as tf
from tensorflow.keras import optimizers
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import vgg16
from sklearn import metrics

### Download dataset and extract file from data source url


In [None]:
DATASET_URL = "DATA_SOURCE_URL"

ZIP_NAME = "dataset.zip"

with requests.get(DATASET_URL, stream=True) as response:
    response.raise_for_status()
    with open(ZIP_NAME, "wb") as f:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)

def extract_zip(zip_name):
    with zipfile.ZipFile(zip_name, "r") as zip_ref:
        members = zip_ref.infolist()
        with tqdm(total=len(members), unit="file") as pbar:
            for member in members:
                zip_ref.extract(member)
                pbar.update(1)

extract_zip(ZIP_NAME)
os.remove(ZIP_NAME)

### Configuration for dataset


In [None]:
IMG_ROWS, IMG_COLS = 150, 150
BATCH_SIZE = 32
EPOCHS = 10
VAL_SPLIT = 0.2
SEED = 42

TRAIN_DIR = "dataset/train"
TEST_DIR = "dataset/test"

CLASS_NAMES = ["O", "R"]  # O = Organic, R = Recyclable

### Data Generators


In [None]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=VAL_SPLIT,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=VAL_SPLIT
)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=(IMG_ROWS, IMG_COLS),
    batch_size=BATCH_SIZE,
    class_mode="binary",
    subset="training",
    seed=SEED
)

val_generator = val_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=(IMG_ROWS, IMG_COLS),
    batch_size=BATCH_SIZE,
    class_mode="binary",
    subset="validation",
    seed=SEED
)

test_generator = test_datagen.flow_from_directory(
    TEST_DIR,
    target_size=(IMG_ROWS, IMG_COLS),
    batch_size=BATCH_SIZE,
    class_mode="binary",
    shuffle=False
)

### Feature Extraction Model


In [None]:
base_model = vgg16.VGG16(
    weights="imagenet",
    include_top=False,
    input_shape=(IMG_ROWS, IMG_COLS, 3)
)

base_model.trainable = False

model = Sequential([
    base_model,
    Flatten(),
    Dense(512, activation="relu"),
    Dropout(0.3),
    Dense(512, activation="relu"),
    Dropout(0.3),
    Dense(1, activation="sigmoid")
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

model.summary()


def exp_decay(epoch):
    return 1e-5 * np.exp(-0.1 * epoch)

callbacks = [
    EarlyStopping(monitor="val_loss", patience=4, restore_best_weights=True),
    ModelCheckpoint("vgg16_feature_extraction.keras", save_best_only=True),
    LearningRateScheduler(exp_decay)
]

### Train Feature Extraction Model


In [None]:
history_extract = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // BATCH_SIZE,
    validation_data=val_generator,
    validation_steps=val_generator.samples // BATCH_SIZE,
    epochs=EPOCHS,
    callbacks=callbacks,
    verbose=1
)

### Final-tuning model


In [None]:
for layer in base_model.layers:
    layer.trainable = False

for layer in base_model.layers:
    if layer.name in ["block5_conv1", "block5_conv2", "block5_conv3"]:
        layer.trainable = True

model.compile(
    optimizer=optimizers.RMSprop(learning_rate=1e-4),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

callbacks_ft = [
    EarlyStopping(monitor="val_loss", patience=4, restore_best_weights=True),
    ModelCheckpoint("vgg16_fine_tuned.keras", save_best_only=True)
]

history_fine = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // BATCH_SIZE,
    validation_data=val_generator,
    validation_steps=val_generator.samples // BATCH_SIZE,
    epochs=EPOCHS,
    callbacks=callbacks_ft,
    verbose=1
)

### Model Evaluation


In [None]:
model_extract = tf.keras.models.load_model("vgg16_feature_extraction.keras")
model_finetune = tf.keras.models.load_model("vgg16_fine_tuned.keras")

test_imgs, test_labels = [], []

for class_name in CLASS_NAMES:
    files = glob.glob(f"{TEST_DIR}/{class_name}/*")[:50]
    for f in files:
        img = tf.keras.preprocessing.image.load_img(f, target_size=(IMG_ROWS, IMG_COLS))
        img = tf.keras.preprocessing.image.img_to_array(img)
        test_imgs.append(img)
        test_labels.append(class_name)

test_imgs = np.array(test_imgs) / 255.0

num2class = lambda x: ["O" if i[0] < 0.5 else "R" for i in x]

pred_extract = num2class(model_extract.predict(test_imgs))
pred_finetune = num2class(model_finetune.predict(test_imgs))

print("Feature Extraction Model")
print(metrics.classification_report(test_labels, pred_extract))

print("Fine-Tuned Model")
print(metrics.classification_report(test_labels, pred_finetune))