In [None]:
import tensorflow as tf
from tensorflow import keras
from keras import layers
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os


In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("zlatan599/garbage-dataset-classification")

print("Path to dataset files:", path)

path = os.path.join(path,'Garbage_Dataset_Classification' ,'images')

In [None]:
BATCH_SIZE = 32
IMG_SIZE = (224, 224)
DROP_OUT = 0.3
EPOCHS = 5
LEARNING_RATE = 0.0001
NUM_CLASSES = 6
FINE_TUNE_LEARNING_RATE = 0.0001


In [None]:
def load_data():
    full_dataset = tf.keras.utils.image_dataset_from_directory(
        path,
        labels='inferred',
        label_mode='int',
        color_mode='rgb',
        batch_size=BATCH_SIZE,
        image_size=IMG_SIZE,
        shuffle=True,  # Randomly shuffles all images
        seed=123,      # Makes shuffle reproducible
    )   

    total_size = tf.data.experimental.cardinality(full_dataset).numpy()
    train_size = int(0.8 * total_size)  # 80% of batches
    val_size = int(0.1 * total_size)    # 10% of batches
    
    train_dataset = full_dataset.take(train_size)     # First 80% of batches
    remaining = full_dataset.skip(train_size)         # Skip first 80%, get last 20%
    val_dataset = remaining.take(val_size)            # First 10% of remaining (so 10% of total)
    test_dataset = remaining.skip(val_size)           # Skip the validation part, get final 10%
    
    
    return train_dataset, val_dataset, test_dataset



In [None]:
def noramalize(train,val,test):
    normalization_layer = layers.Rescaling(1./255)
    train = train.map(lambda x, y: (normalization_layer(x), y))
    val = val.map(lambda x, y: (normalization_layer(x), y))
    test = test.map(lambda x, y: (normalization_layer(x), y))

    AUTOTUNE = tf.data.AUTOTUNE

    train = train.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
    val = val.cache().prefetch(buffer_size=AUTOTUNE)
    test = test.cache().prefetch(buffer_size=AUTOTUNE)
    return train,val,test

In [None]:
train_orig,val_orig,test_orig = load_data()
train,val,test = noramalize(train_orig,val_orig,test_orig)
NUM_CLASSES = 6


In [None]:
def check_class_distribution(dataset, dataset_name, num_batches=None):
    """Check class distribution in a tf.data.Dataset"""
    all_labels = []
    batch_count = 0
    
    # If num_batches is None, check all batches (might be slow)
    dataset_to_check = dataset.take(num_batches) if num_batches else dataset
    
    for images, labels in dataset_to_check:
        # Handle both single labels and batch of labels
        if len(labels.shape) == 0:  # Single label (batch_size=1)
            all_labels.append(labels.numpy())
        else:  # Batch of labels
            all_labels.extend(labels.numpy())
        batch_count += 1
    
    # Count each class
    unique_labels, counts = np.unique(all_labels, return_counts=True)
    total_samples = len(all_labels)
    
    print(f"\n{dataset_name} Distribution:")
    print(f"Total samples checked: {total_samples} (from {batch_count} batches)")
    print("-" * 40)
    
    for label, count in zip(unique_labels, counts):
        percentage = (count / total_samples) * 100
        print(f"Class {label}: {count:4d} samples ({percentage:5.1f}%)")
    
    return dict(zip(unique_labels, counts))

# Usage:
train_dist = check_class_distribution(train, "Training", num_batches=100)
val_dist = check_class_distribution(val, "Validation")
test_dist = check_class_distribution(test, "Test")

In [None]:
for image_batch, labels_batch in train:
    print(image_batch.shape)
    print(labels_batch.shape)
    break

In [None]:
for images, labels in train_orig.take(5):
    print(f"Test batch labels: {labels.numpy()}")

In [None]:
index = 13
example = train_orig.unbatch().take(1).as_numpy_iterator().next()

exaples = train_orig.unbatch().take(9).as_numpy_iterator()

plt.figure(figsize=(9, 9))

for i, (image, label) in enumerate(exaples):
    plt.subplot(3, 3, i + 1)
    plt.imshow(image.astype("uint8"))
    plt.axis("off")
plt.tight_layout()
plt.show()

In [None]:
def data_augmentor():

    data_aug = keras.models.Sequential()
    data_aug.add(layers.RandomFlip("horizontal_and_vertical"))
    data_aug.add(layers.RandomRotation(0.2))

    return data_aug

In [None]:
def model(augmentation_layer=data_augmentor()):
    IMG_SHAPE = IMG_SIZE + (3,)

    preprocess_input = keras.applications.mobilenet_v2.preprocess_input

    base_model = keras.applications.MobileNetV2(
        input_shape=IMG_SHAPE,
        include_top=False,
        weights='imagenet'
    )
    base_model.trainable = True

    # inputs = keras.Input(shape=IMG_SHAPE)

    # x = augmentation_layer(inputs)
    # x = base_model(x, training=False)
    # x = layers.GlobalAveragePooling2D()(x)
    # x = layers.Dropout(DROP_OUT)(x)
    # x = layers.Dense(128, activation='relu')(x)
    # outputs = layers.Dense(NUM_CLASSES, activation='softmax')(x)
    #model = keras.Model(inputs, outputs)   

    
    model = keras.models.Sequential([
        keras.Input(shape=IMG_SHAPE),
        augmentation_layer,
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dropout(DROP_OUT),
        layers.Dense(128, activation='relu'),
        layers.Dropout(DROP_OUT),
        layers.Dense(NUM_CLASSES, activation='softmax')
    ])



    return model


In [None]:
train_model = model()

model = model()  # Create your model
print(model.summary())

# Check the last layer specifically:
print(f"Output layer units: {model.layers[-1].units}")
print(f"NUM_CLASSES setting: {NUM_CLASSES}")


In [None]:
if os.path.exists('./models/Full_garbage.weights.h5'):
    train_model.load_weights('./models/Full_garbage.weights.h5')

train_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    loss=keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath='./models/Full_garbage.weights.h5',
    monitor='val_accuracy',  # or 'val_loss'
    save_best_only=True,
    save_weights_only=True,  # saves entire model
    mode='max',  # 'max' for accuracy, 'min' for loss
    verbose=1
)


with tf.device('/GPU:0'):
    history = train_model.fit(
        train,
        epochs=EPOCHS,
        validation_data=val,
        callbacks=[checkpoint_callback]
    )

In [None]:

# tune_model = train_model.layers[2] 
# tune_model.trainable = True

# # Fine-tune from this layer onwards
# fine_tune_at = 80

# # Freeze all layers before the `fine_tune_at` layer
# for layer in tune_model.layers[:fine_tune_at]:
#     layer.trainable = False

# tune_model.compile(
#     optimizer=keras.optimizers.Adam(learning_rate=FINE_TUNE_LEARNING_RATE),
#     loss=keras.losses.SparseCategoricalCrossentropy(),
#     metrics=['accuracy']
# )

# with tf.device('/GPU:0'):
#     fine_tune_history = train_model.fit(
#         train,
#         epochs=EPOCHS,
#         validation_data=val,
#         callbacks=[checkpoint_callback]
#     )



In [None]:
acc = [0.] + history.history['accuracy']
val_acc = [0.] + history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,2])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

In [22]:
# Evaluate the model on the test dataset
if os.path.exists('./models/best_garbage.weights.h5'):
    train_model.load_weights('./models/best_garbage.weights.h5')
    
test_loss, test_accuracy = train_model.evaluate(test)
print(f'Test accuracy: {test_accuracy:.4f}, Test loss: {test_loss:.4f}')
# -*- coding: utf-8 -*-

[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 60ms/step - accuracy: 0.9035 - loss: 0.4820
Test accuracy: 0.9035, Test loss: 0.4820
