<a href="https://colab.research.google.com/github/UdaraChamidu/Image-Processing-Based-Smart-Waste-Management-System/blob/udara/solid_waste_detection_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install datasets tensorflow opencv-python matplotlib pandas scikit-learn




In [2]:
from datasets import load_dataset
import numpy as np
import cv2
import os
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
from datasets import load_dataset
import numpy as np
import cv2
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import tensorflow as tf

IMG_SIZE = 224  # Changed to 224 for MobileNetV2 compatibility

images = []
labels = []

print("[INFO] Loading TrashNet dataset from Hugging Face...")

# Load dataset from Hugging Face
try:
    dataset = load_dataset("garythung/trashnet")
    print(f"Dataset loaded successfully. Train samples: {len(dataset['train'])}")
except Exception as e:
    print(f"Error loading dataset: {e}")
    exit()

def enhance_image(img_pil):
    """Enhanced image processing for PIL images"""
    try:
        # Convert PIL to OpenCV format (RGB to BGR)
        img = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)

        # Resize to target size
        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))

        # Convert to grayscale
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # Histogram equalization to improve contrast
        equalized = cv2.equalizeHist(gray)

        # Apply Gaussian blur to reduce noise
        blurred = cv2.GaussianBlur(equalized, (3, 3), 0)

        # Optional: Edge enhancement using Sobel filter
        sobelx = cv2.Sobel(blurred, cv2.CV_64F, 1, 0, ksize=3)
        sobely = cv2.Sobel(blurred, cv2.CV_64F, 0, 1, ksize=3)
        sobel_combined = cv2.magnitude(sobelx, sobely)
        sobel_combined = np.uint8(sobel_combined)

        # Normalize to [0,1]
        final = sobel_combined / 255.0

        # Convert back to 3D (grayscale -> fake RGB) for CNN compatibility
        final = cv2.merge([final, final, final])
        return final

    except Exception as e:
        print(f"Error in image processing: {e}")
        return None

def enhance_image_simple(img_pil):
    """Simple image processing without edge enhancement (alternative)"""
    try:
        # Convert PIL to numpy array and keep RGB format
        img = np.array(img_pil)

        # Resize to target size
        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))

        # Normalize to [0,1]
        img = img.astype(np.float32) / 255.0

        return img

    except Exception as e:
        print(f"Error in simple image processing: {e}")
        return None

# Process dataset samples
print("[INFO] Processing images...")
processed_count = 0

for i, sample in enumerate(dataset['train']):
    try:
        # Get image and label
        pil_image = sample['image']
        label = sample['label']

        # Process image (you can choose between enhance_image or enhance_image_simple)
        processed_img = enhance_image(pil_image)  # Use complex processing
        # processed_img = enhance_image_simple(pil_image)  # Or use simple processing

        if processed_img is not None:
            images.append(processed_img)
            labels.append(label)
            processed_count += 1

            # Progress indicator
            if processed_count % 500 == 0:
                print(f"Processed {processed_count} images...")

    except Exception as e:
        print(f"Error processing sample {i}: {e}")
        continue

print(f"Dataset processing complete: {len(images)} images loaded")

# Convert to NumPy arrays
X = np.array(images, dtype="float32")
y = np.array(labels)

print(f"[INFO] Data shape: X={X.shape}, y={y.shape}")
print(f"[INFO] Unique labels: {np.unique(y)}")

# Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)
y_categorical = to_categorical(y_encoded)

print(f"[INFO] Label encoding complete. Classes: {le.classes_}")
print(f"[INFO] Categorical shape: {y_categorical.shape}")

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42, stratify=y_categorical)

print(f"[INFO] Dataset ready: {len(X_train)} training samples, {len(X_test)} test samples")
print(f"[INFO] Input shape for model: {X_train.shape[1:]}")

# Display sample information
import matplotlib.pyplot as plt

# Show a sample image from the training set
if len(X_train) > 0:
    sample_idx = min(100, len(X_train) - 1)  # Use index 100 or last available
    sample_image = X_train[sample_idx]

    plt.figure(figsize=(6, 4))
    plt.imshow(sample_image)
    plt.title(f"Sample Image - Label: {le.inverse_transform([np.argmax(y_train[sample_idx])])[0]}")
    plt.axis("off")
    plt.show()

# Show label distribution
import pandas as pd
label_counts = pd.Series(le.inverse_transform(np.argmax(y_train, axis=1))).value_counts()
print("\n[INFO] Training data distribution:")
print(label_counts)

plt.figure(figsize=(10, 6))
label_counts.plot(kind='bar')
plt.title('Distribution of Waste Categories in Training Data')
plt.xlabel('Waste Category')
plt.ylabel('Number of Images')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Verify data is ready for MobileNetV2
print(f"\n[INFO] Data verification:")
print(f"X_train range: [{X_train.min():.3f}, {X_train.max():.3f}]")
print(f"Expected input shape for MobileNetV2: (224, 224, 3)")
print(f"Actual input shape: {X_train.shape[1:]}")

if X_train.shape[1:] == (224, 224, 3):
    print("✅ Data shape is compatible with MobileNetV2")
else:
    print("❌ Data shape needs adjustment for MobileNetV2")

print(f"Number of classes: {len(le.classes_)}")
print(f"Classes: {list(le.classes_)}")

[INFO] Loading TrashNet dataset from Hugging Face...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Dataset loaded successfully. Train samples: 5054
[INFO] Processing images...
Processed 500 images...
Processed 1000 images...
Processed 1500 images...
Processed 2000 images...
Processed 2500 images...
Processed 3000 images...
Processed 3500 images...
Processed 4000 images...
Processed 4500 images...
Processed 5000 images...
Dataset processing complete: 5054 images loaded
[INFO] Data shape: X=(5054, 224, 224, 3), y=(5054,)
[INFO] Unique labels: [0 1 2 3 4 5]
[INFO] Label encoding complete. Classes: [0 1 2 3 4 5]
[INFO] Categorical shape: (5054, 6)


In [None]:
import matplotlib.pyplot as plt

# Show a sample image from the training set
index = 1000  # Change this to see other images
sample_image = X_train[index]

plt.figure(figsize=(4, 4))
plt.imshow(sample_image)
plt.title(f"Label: {le.inverse_transform([np.argmax(y_train[index])])[0]}")
plt.axis("off")
plt.show()


In [None]:
import pandas as pd
pd.Series(le.inverse_transform(np.argmax(y_train, axis=1))).value_counts().plot(kind='bar')


In [None]:
# Step 4: Data augmentation
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=30,
    zoom_range=0.2,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True
)

In [None]:
# Step 5: Build model (MobileNetV2)
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model

base_model = MobileNetV2(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze base

x = GlobalAveragePooling2D()(base_model.output)
x = Dropout(0.5)(x)
output = Dense(6, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=output)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
x = GlobalAveragePooling2D()(base_model.output)
x = Dropout(0.5)(x)
output = Dense(6, activation='softmax')(x)

In [None]:

model = Model(inputs=base_model.input, outputs=output)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
              loss='categorical_crossentropy',
              metrics=['accuracy'])


In [None]:
# Step 6: Callbacks
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

callbacks = [
    EarlyStopping(patience=5, restore_best_weights=True),
    ReduceLROnPlateau(patience=3, factor=0.2, verbose=1),
    ModelCheckpoint("best_model.keras", save_best_only=True)
]

In [None]:
# Step 7: Training
batch_size = 32
history = model.fit(
    train_datagen.flow(X_train, y_train, batch_size=batch_size),
    validation_data=(X_test, y_test),
    epochs=30,
    callbacks=callbacks
)

In [None]:
model.save("trashnet_mobilenetv2_model.h5")
print("Model saved successfully!")


In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Predict class probabilities
y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.argmax(y_test, axis=1)

# Generate classification report
print("Classification Report:")
print(classification_report(y_true, y_pred, target_names=le.classes_))

# Generate confusion matrix
cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=le.classes_, yticklabels=le.classes_)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

In [None]:
print(classification_report(y_true, y_pred, target_names=le.classes_))

In [None]:
# Accuracy
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.title("Training vs Validation Accuracy")
plt.show()

# Loss
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.title("Training vs Validation Loss")
plt.show()


In [None]:
from PIL import Image

def predict_image(path):
    img = Image.open(path).resize((224, 224))
    img_array = np.array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)

    pred = model.predict(img_array)
    class_idx = np.argmax(pred)
    print(f"Predicted class: {le.inverse_transform([class_idx])[0]}")

# Example
predict_image("/content/dataset-original/glass/glass104.jpg")

In [None]:
predict_image("/content/download.jpg")