# WHOLE FLOW FOR EASY RERUN

## Install dependencies

In [None]:
pip install tensorflow opencv-python roboflow scikit-learn matplotlib



In [None]:
import os
import shutil
import yaml
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
import matplotlib.pyplot as plt
import random

## prepare dataset

In [None]:
!pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="kllPn6XsBsLz8K2iQx6i")
project = rf.workspace("deep-learning-rp9gw").project("final-proj-jpjtg")
version = project.version(4)
dataset = version.download("yolov8")

loading Roboflow workspace...
loading Roboflow project...


In [None]:
# Set correct dataset directory
dataset_dir = "final-proj-4"
images_dir = os.path.join(dataset_dir, "train", "images")
labels_dir = os.path.join(dataset_dir, "train", "labels")
yaml_path = os.path.join(dataset_dir, "data.yaml")

# Read class names from data.yaml
with open(yaml_path, 'r') as stream:
    data = yaml.safe_load(stream)
    class_names = data['names']

print("Class names:", class_names)

# Create output directory and subfolders per class
output_dir = "final-project-classification"
os.makedirs(output_dir, exist_ok=True)
for class_name in class_names:
    os.makedirs(os.path.join(output_dir, class_name), exist_ok=True)

# Convert YOLO format to classification folders (improved version)
for label_file in os.listdir(labels_dir):
    label_path = os.path.join(labels_dir, label_file)

    with open(label_path, "r") as f:
        lines = [line.strip() for line in f.readlines() if line.strip()]

    if not lines:
        continue

    # Get all unique classes in this image
    classes_in_image = set()
    for line in lines:
        class_idx = int(line.split()[0])
        classes_in_image.add(class_idx)

    # Copy to all relevant class folders
    image_file = label_file.replace(".txt", ".jpg")
    src = os.path.join(images_dir, image_file)

    if os.path.exists(src):
        for class_idx in classes_in_image:
            dst = os.path.join(output_dir, class_names[class_idx], image_file)
            shutil.copy(src, dst)



Class names: ['glass', 'leaf', 'metal', 'paper', 'plastic']


In [None]:
# Verify class distribution
print("\nClass distribution in training set:")
for class_name in class_names:
    class_dir = os.path.join(output_dir, class_name)
    print(f"{class_name}: {len(os.listdir(class_dir))} images")


Class distribution in training set:
glass: 1458 images
leaf: 2733 images
metal: 2541 images
paper: 2322 images
plastic: 2412 images


## mapping the waste to its classifications

In [None]:
waste_category = {
    "glass": ("non-biodegradable", "recyclable"),
    "leaf": ("biodegradable", "non-recyclable"),   # typically organic waste
    "metal": ("non-biodegradable", "recyclable"),
    "paper": ("biodegradable", "recyclable"),
    "plastic": ("non-biodegradable", "recyclable"),
}


## loading the dataset for training

In [None]:
# Set paths
train_dir = output_dir  # Using our converted classification dataset
val_dir = os.path.join(dataset_dir, "valid")  # Assuming validation set is already in classification format

# Settings
batch_size = 32
img_size = (224, 224)

# Load datasets
train_ds = image_dataset_from_directory(
    train_dir,
    image_size=img_size,
    batch_size=batch_size,
    label_mode='int',
    shuffle=True,
    seed=42
)

val_ds = image_dataset_from_directory(
    val_dir,
    image_size=img_size,
    batch_size=batch_size,
    label_mode='int'
)

# Get class names from dataset loader to ensure alignment
class_names = train_ds.class_names
print("\nDataset class names:", class_names)

# Verify dataset content
print("\nChecking training dataset content:")
for images, labels in train_ds.take(1):
    print("Batch shape:", images.shape)
    print("Unique labels in batch:", np.unique(labels))
    print("Class counts in batch:", np.bincount(labels.numpy().flatten()))

Found 11466 files belonging to 5 classes.
Found 645 files belonging to 2 classes.

Dataset class names: ['glass', 'leaf', 'metal', 'paper', 'plastic']

Checking training dataset content:
Batch shape: (32, 224, 224, 3)
Unique labels in batch: [0 1 2 3 4]
Class counts in batch: [ 3  5  7 11  6]


In [None]:
def apply_gaussian_blur(image):
    if image.max() <= 1.0:
        image = (image * 255).numpy().astype(np.uint8)
    blurred = cv2.GaussianBlur(image, (5, 5), 0)
    return blurred.astype(np.float32) / 255.0

def preprocess_with_blur(image, label):
    image = tf.image.resize(image, [224, 224])
    image = tf.numpy_function(apply_gaussian_blur, [image], tf.float32)
    image.set_shape([224, 224, 3])
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, max_delta=0.2)
    image = tf.image.random_contrast(image, lower=0.8, upper=1.2)
    return image, label

# Apply preprocessing
train_ds = train_ds.unbatch()
train_ds = train_ds.map(preprocess_with_blur).batch(batch_size).prefetch(tf.data.AUTOTUNE)

val_ds = val_ds.map(lambda x, y: (tf.image.resize(x, [224, 224]) / 255.0, y)).prefetch(tf.data.AUTOTUNE)

## building the resnet-50 and mobilenetv2 fusion model

In [None]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model

input_tensor = Input(shape=(224, 224, 3))
base_model = ResNet50(weights='imagenet', include_top=False, input_tensor=input_tensor)

# Fine-tune only last 50 layers
base_model.trainable = True
for layer in base_model.layers[:-50]:
    layer.trainable = False

x = GlobalAveragePooling2D()(base_model.output)
x = Dense(256, activation='relu')(x)
x = Dense(128, activation='relu')(x)
output = Dense(len(class_names), activation='softmax')(x)

model = Model(inputs=input_tensor, outputs=output)
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
model.summary()

## training the model

In [None]:
# # Callbacks
# early_stopping = EarlyStopping(
#     monitor='val_loss',
#     patience=5,
#     restore_best_weights=True,
#     verbose=1
# )

# reduce_lr = ReduceLROnPlateau(
#     monitor='val_loss',
#     factor=0.3,
#     patience=2,
#     min_lr=1e-6,
#     verbose=1
# )

# checkpoint = ModelCheckpoint(
#     'best_model.h5',
#     monitor='val_accuracy',
#     save_best_only=True,
#     verbose=1
# )

# # Train the model
# history = model.fit(
#     train_ds,
#     validation_data=val_ds,
#     epochs=10,
#     callbacks=[early_stopping, reduce_lr, checkpoint]
# )

In [None]:
# import numpy as np
# import tensorflow as tf
# from tensorflow.keras.models import load_model

# # Load existing model if available
# try:
#     model = load_model('best_model.h5')
#     print("Loaded saved model to continue training")
# except:
#     print("No saved model found, starting fresh")

# # Load previous training history if available
# try:
#     with open('training_history.npy', 'rb') as f:
#         saved_history = np.load(f, allow_pickle=True).item()
#     initial_epoch = len(saved_history.get('loss', []))
#     print(f"Resuming from epoch {initial_epoch}")
# except:
#     saved_history = {'loss': [], 'accuracy': [], 'val_loss': [], 'val_accuracy': []}
#     initial_epoch = 0

# # Define callbacks (only ones needed manually)
# early_stopping = tf.keras.callbacks.EarlyStopping(
#     monitor='val_loss',
#     patience=5,
#     restore_best_weights=True,
#     verbose=1
# )
# reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
#     monitor='val_loss',
#     factor=0.3,
#     patience=2,
#     min_lr=1e-6,
#     verbose=1
# )
# model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
#     'best_model.h5',
#     monitor='val_accuracy',
#     save_best_only=True,
#     verbose=1
# )

# # Loss, optimizer, and metrics
# loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
# optimizer = tf.keras.optimizers.Adam()
# train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()
# val_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()

# # Custom training loop by batch
# num_epochs = 8  # Add more if needed
# for epoch in range(initial_epoch, num_epochs):
#     print(f"\nEpoch {epoch + 1}/{num_epochs}")
#     batch_losses = []

#     for step, (x_batch_train, y_batch_train) in enumerate(train_ds):
#         with tf.GradientTape() as tape:
#             logits = model(x_batch_train, training=True)
#             loss_value = loss_fn(y_batch_train, logits)

#         grads = tape.gradient(loss_value, model.trainable_weights)
#         optimizer.apply_gradients(zip(grads, model.trainable_weights))

#         batch_losses.append(loss_value.numpy())
#         train_acc_metric.update_state(y_batch_train, logits)

#         if step % 50 == 0:
#             print(f"  Step {step}: loss = {loss_value:.4f}")

#     # Compute training metrics
#     epoch_loss = np.mean(batch_losses)
#     epoch_acc = train_acc_metric.result().numpy()
#     train_acc_metric.reset_states()

#     # Run validation at the end of the epoch
#     val_losses = []
#     for x_batch_val, y_batch_val in val_ds:
#         val_logits = model(x_batch_val, training=False)
#         v_loss = loss_fn(y_batch_val, val_logits)
#         val_losses.append(v_loss.numpy())
#         val_acc_metric.update_state(y_batch_val, val_logits)

#     val_loss = np.mean(val_losses)
#     val_acc = val_acc_metric.result().numpy()
#     val_acc_metric.reset_states()

#     print(f"  Train loss: {epoch_loss:.4f} | Train acc: {epoch_acc:.4f}")
#     print(f"  Val loss: {val_loss:.4f}   | Val acc: {val_acc:.4f}")

#     # Save model manually (like ModelCheckpoint)
#     model.save('best_model.h5')

#     # Update and save training history
#     saved_history['loss'].append(epoch_loss)
#     saved_history['accuracy'].append(epoch_acc)
#     saved_history['val_loss'].append(val_loss)
#     saved_history['val_accuracy'].append(val_acc)
#     np.save('training_history.npy', saved_history)

#     # Manually apply ReduceLROnPlateau logic
#     reduce_lr.on_epoch_end(epoch=epoch, logs={'val_loss': val_loss})

#     # Manually trigger early stopping
#     early_stopping.on_epoch_end(epoch=epoch, logs={'val_loss': val_loss})
#     if early_stopping.stopped_epoch > 0:
#         print("Early stopping triggered.")
#         break

# # At the end of training, model and history are saved
# print("Training complete.")

# training model

In [None]:
# Callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.3,
    patience=2,
    min_lr=1e-6,
    verbose=1
)

checkpoint = ModelCheckpoint(
    'best_model.h5',
    monitor='val_accuracy',
    save_best_only=True,
    verbose=1
)

# Train the model
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=8,
    callbacks=[early_stopping, reduce_lr, checkpoint]
)

Epoch 1/8
    359/Unknown [1m2611s[0m 7s/step - accuracy: 0.4095 - loss: 1.3878

Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches. You may need to use the `.repeat()` function when building your dataset.



Epoch 1: val_accuracy improved from -inf to 0.00000, saving model to best_model.h5




[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2696s[0m 7s/step - accuracy: 0.4097 - loss: 1.3874 - val_accuracy: 0.0000e+00 - val_loss: 72.6669 - learning_rate: 0.0010
Epoch 2/8
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7s/step - accuracy: 0.5684 - loss: 1.0429
Epoch 2: val_accuracy did not improve from 0.00000
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2761s[0m 8s/step - accuracy: 0.5684 - loss: 1.0428 - val_accuracy: 0.0000e+00 - val_loss: 11.1861 - learning_rate: 0.0010
Epoch 3/8
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7s/step - accuracy: 0.5909 - loss: 0.9944
Epoch 3: val_accuracy improved from 0.00000 to 0.03256, saving model to best_model.h5




[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2673s[0m 7s/step - accuracy: 0.5909 - loss: 0.9943 - val_accuracy: 0.0326 - val_loss: 5.7133 - learning_rate: 0.0010
Epoch 4/8
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7s/step - accuracy: 0.6059 - loss: 0.9576
Epoch 4: val_accuracy improved from 0.03256 to 0.37209, saving model to best_model.h5




[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2667s[0m 7s/step - accuracy: 0.6059 - loss: 0.9576 - val_accuracy: 0.3721 - val_loss: 1.9412 - learning_rate: 0.0010
Epoch 5/8
[1m 82/359[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m33:12[0m 7s/step - accuracy: 0.6026 - loss: 0.9433

## plotting of accuracy and training loss

In [None]:
import matplotlib.pyplot as plt

# Extract metrics
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(len(acc))

# Plot Accuracy
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy', marker='o')
plt.plot(epochs_range, val_acc, label='Validation Accuracy', marker='x')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

# Plot Loss
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss', marker='o')
plt.plot(epochs_range, val_loss, label='Validation Loss', marker='x')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.tight_layout()
plt.show()


## heat map

In [None]:
import os
import cv2
import random
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Model

# === Define class names based on your model's output layer ===
# Ensure this matches the model's output (length & order)
class_names = ["glass", "leaf", "metal", "paper", "plastic"]  # Adjust if necessary
# Define waste classification categories
waste_category = {
    "glass": ("non-biodegradable", "recyclable"),
    "leaf": ("biodegradable", "non-recyclable"),   # typically organic waste
    "metal": ("non-biodegradable", "recyclable"),
    "paper": ("biodegradable", "recyclable"),
    "plastic": ("non-biodegradable", "recyclable"),
}


# Set path to test images
test_images_dir = '/content/final-proj-4/test/images'
all_images = [f for f in os.listdir(test_images_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
random_images = random.sample(all_images, 10)

# === Load your model ===
# Make sure your model is already loaded before running this script
# Example: model = tf.keras.models.load_model('your_model_path.h5')

# Name of last convolutional layer for Grad-CAM
last_conv_layer_name = "conv5_block3_out"  # Adjust based on your model architecture

# Build Grad-CAM model
grad_model = Model(
    inputs=[model.inputs],
    outputs=[model.get_layer(last_conv_layer_name).output, model.output]
)

# === Grad-CAM heatmap function ===
def get_gradcam_heatmap(img_array, class_index):
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array)
        loss = predictions[:, class_index]

    grads = tape.gradient(loss, conv_outputs)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
    conv_outputs = conv_outputs[0]

    heatmap = conv_outputs @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap + 1e-10)
    return heatmap.numpy()

# === Prediction and heatmap generation ===
def predict_with_heatmap(img_path):
    img = cv2.imread(img_path)
    if img is None:
        return None, None, None
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    resized = cv2.resize(img_rgb, (224, 224))
    input_tensor = resized / 255.0
    input_tensor = np.expand_dims(input_tensor, axis=0)

    preds = model.predict(input_tensor)
    class_index = np.argmax(preds)

    if class_index >= len(class_names):
        print(f"[Warning] class_index {class_index} out of range for class_names.")
        class_label = "Unknown"
    else:
        class_label = class_names[class_index]

    category = waste_category.get(class_label.upper(), ("Unknown", "Unknown"))

    # Grad-CAM heatmap
    heatmap = get_gradcam_heatmap(input_tensor, class_index)
    heatmap_resized = cv2.resize(heatmap, (img_rgb.shape[1], img_rgb.shape[0]))
    heatmap_resized = np.uint8(255 * heatmap_resized)
    heatmap_colored = cv2.applyColorMap(heatmap_resized, cv2.COLORMAP_JET)
    superimposed = cv2.addWeighted(img_rgb, 0.6, heatmap_colored, 0.4, 0)

    label_text = f"{class_label.upper()} ({category[0]}, {category[1]})"
    return img_rgb, superimposed, label_text

# === Visualization ===
fig, axes = plt.subplots(10, 2, figsize=(14, 30))  # 10 rows, 2 columns

for i in range(10):
    img_file = random_images[i]
    img_path = os.path.join(test_images_dir, img_file)
    orig, heatmap, label = predict_with_heatmap(img_path)

    if orig is not None and heatmap is not None:
        axes[i][0].imshow(orig)
        axes[i][0].set_title(f"Image {i+1}\n{label}", fontsize=10)
        axes[i][0].axis('off')

        axes[i][1].imshow(heatmap)
        axes[i][1].set_title("Grad-CAM Heatmap", fontsize=10)
        axes[i][1].axis('off')
    else:
        axes[i][0].set_visible(False)
        axes[i][1].set_visible(False)

plt.tight_layout()
plt.suptitle("Waste Classification with Grad-CAM", fontsize=18, y=1.02)
plt.show()


## image testing

In [None]:
import os
import cv2
import random
import numpy as np
import matplotlib.pyplot as plt

# Define waste classification categories
waste_category = {
    "BIODEGRADABLE": ("biodegradable", "non-recyclable"),
    "CARDBOARD": ("biodegradable", "recyclable"),
    "GLASS": ("non-biodegradable", "recyclable"),
    "METAL": ("non-biodegradable", "recyclable"),
    "PAPER": ("biodegradable", "recyclable"),
    "PLASTIC": ("non-biodegradable", "recyclable"),
}

# Path to test images
test_images_dir = '/content/final-proj-4/test/images'
all_images = [f for f in os.listdir(test_images_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
random_images = random.sample(all_images, 10)

# Predict and collect image + label
def predict_and_collect(img_path):
    img = cv2.imread(img_path)
    if img is None:
        print(f"Error loading: {img_path}")
        return None, None

    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    resized_img = cv2.resize(img_rgb, (224, 224))
    input_tensor = resized_img / 255.0
    input_tensor = np.expand_dims(input_tensor, axis=0)

    pred = model.predict(input_tensor)
    class_idx = np.argmax(pred)
    class_label = class_names[class_idx]
    category = waste_category.get(class_label.upper(), ("Unknown", "Unknown"))

    return img_rgb, f"{class_label.upper()} ({category[0]}, {category[1]})"

# Create 5x2 grid (2 rows, 5 columns)
fig, axes = plt.subplots(2, 5, figsize=(18, 8))  # Adjust figure size
axes = axes.flatten()

for i, img_file in enumerate(random_images):
    img_path = os.path.join(test_images_dir, img_file)
    img_rgb, label_text = predict_and_collect(img_path)

    if img_rgb is not None:
        axes[i].imshow(img_rgb)
        axes[i].set_title(label_text, fontsize=9)
        axes[i].axis('off')
    else:
        axes[i].set_visible(False)

# Hide any unused axes (in case of loading errors)
for j in range(len(random_images), len(axes)):
    axes[j].set_visible(False)

# Tighter spacing
plt.subplots_adjust(wspace=0.1, hspace=0.25)
fig.suptitle("Waste Classification Predictions (5x2)", fontsize=14, y=1.02)
plt.show()


# FINALIZATION

## saving of model made

In [None]:
model.save("waste_classifier_model_koto.h5")

## webcam implementation of model

In [None]:
import cv2
import numpy as np

def classify_frame(frame):
    img = cv2.resize(frame, (224, 224))
    img_array = tf.keras.preprocessing.image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    predictions = model.predict(img_array)
    pred_class = class_names[np.argmax(predictions)]

    biodegradable, recyclable = waste_info.get(pred_class, ("Unknown", "Unknown"))
    return pred_class, biodegradable, recyclable

# Webcam loop
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    pred_class, biodeg, recyc = classify_frame(frame)

    # Overlay text
    label = f"{pred_class} | {biodeg} | {recyc}"
    cv2.putText(frame, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
    cv2.imshow("Waste Classifier", frame)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
#cv2.destroyAllWindows()


# TRIAL FOR OTHER WAY

In [None]:
# === ROBFLOw DATASET PREPARATION ===
from roboflow import Roboflow
import os
import shutil
import yaml
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import img_to_array, array_to_img
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications import ResNet50, MobileNetV2
from tensorflow.keras.layers import Input, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt

# Download dataset from Roboflow
from roboflow import Roboflow
rf = Roboflow(api_key="kllPn6XsBsLz8K2iQx6i")
project = rf.workspace("material-identification").project("garbage-classification-3")
version = project.version(1)
dataset = version.download("yolov8")

# Paths for dataset
dataset_dir = "GARBAGE-CLASSIFICATION-3-1"
images_dir = os.path.join(dataset_dir, "train", "images")
labels_dir = os.path.join(dataset_dir, "train", "labels")
yaml_path = os.path.join(dataset_dir, "data.yaml")

# Read class names from data.yaml
with open(yaml_path, 'r') as stream:
    data = yaml.safe_load(stream)
    class_names = data['names']  # ['biodegradable', 'cardboard', 'cloth', 'glass', 'metal', 'paper', 'plastic']

# Create folders per class for classification structure
output_dir = "waste_classification_dataset"
os.makedirs(output_dir, exist_ok=True)
for class_name in class_names:
    os.makedirs(os.path.join(output_dir, class_name), exist_ok=True)

# Convert YOLO labels to classification folder structure
for label_file in os.listdir(labels_dir):
    label_path = os.path.join(labels_dir, label_file)
    with open(label_path, "r") as f:
        first_line = f.readline().strip()
        if not first_line:
            continue  # Skip empty labels
        class_idx = int(first_line.split()[0])
    image_file = label_file.replace(".txt", ".jpg")
    src = os.path.join(images_dir, image_file)
    dst = os.path.join(output_dir, class_names[class_idx], image_file)
    if os.path.exists(src):
        shutil.copy(src, dst)

# Define category mapping for broad classification
waste_category = {
    "biodegradable": ("biodegradable", "non-recyclable"),
    "cardboard": ("biodegradable", "recyclable"),
    "cloth": ("biodegradable", "non-recyclable"),
    "glass": ("non-biodegradable", "recyclable"),
    "metal": ("non-biodegradable", "recyclable"),
    "paper": ("biodegradable", "recyclable"),
    "plastic": ("non-biodegradable", "recyclable"),
}

# === LOAD AND PREPROCESS IMAGES ===
from tensorflow.keras.preprocessing.image import load_img

def load_images_and_labels(base_dir, class_names, img_size=(48,48)):
    X = []
    y = []
    for idx, class_name in enumerate(class_names):
        class_dir = os.path.join(base_dir, class_name)
        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            try:
                img = load_img(img_path, target_size=img_size)
                img_arr = img_to_array(img) / 255.0  # normalize pixels 0-1
                X.append(img_arr)
                y.append(idx)
            except Exception as e:
                print(f"Error loading {img_path}: {e}")
    return np.array(X), np.array(y)

# Load train images
X_train, y_train = load_images_and_labels(os.path.join(output_dir), class_names)

# For simplicity, we split train set into train and test here (e.g., 80-20 split)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42, stratify=y_train)

# One-hot encode labels
y_train_cat = to_categorical(y_train, num_classes=len(class_names))
y_test_cat = to_categorical(y_test, num_classes=len(class_names))

# === FEATURE EXTRACTION MODEL SETUP ===
input_layer = Input(shape=(48,48,3))

# Load pretrained models without top layers, weights frozen
resnet_model = ResNet50(weights='imagenet', include_top=False, input_tensor=input_layer)
mobilenet_model = MobileNetV2(weights='imagenet', include_top=False, input_tensor=input_layer)

for layer in resnet_model.layers:
    layer.trainable = False
for layer in mobilenet_model.layers:
    layer.trainable = False

# Extract features with GlobalAveragePooling
resnet_features = GlobalAveragePooling2D()(resnet_model.output)
mobilenet_features = GlobalAveragePooling2D()(mobilenet_model.output)

# Define feature extractor models
resnet_extractor = Model(inputs=resnet_model.input, outputs=resnet_features)
mobilenet_extractor = Model(inputs=mobilenet_model.input, outputs=mobilenet_features)

# Extract features on train and test sets
features_resnet_train = resnet_extractor.predict(X_train)
features_mobilenet_train = mobilenet_extractor.predict(X_train)

features_resnet_test = resnet_extractor.predict(X_test)
features_mobilenet_test = mobilenet_extractor.predict(X_test)

# Normalize features
scaler_resnet = MinMaxScaler()
scaler_mobilenet = MinMaxScaler()

features_resnet_train_scaled = scaler_resnet.fit_transform(features_resnet_train)
features_mobilenet_train_scaled = scaler_mobilenet.fit_transform(features_mobilenet_train)

features_resnet_test_scaled = scaler_resnet.transform(features_resnet_test)
features_mobilenet_test_scaled = scaler_mobilenet.transform(features_mobilenet_test)

# Concatenate features
X_train_features = np.concatenate([features_resnet_train_scaled, features_mobilenet_train_scaled], axis=1)
X_test_features = np.concatenate([features_resnet_test_scaled, features_mobilenet_test_scaled], axis=1)

# === TRAIN RANDOM FOREST CLASSIFIER ===
rf_model = RandomForestClassifier(n_estimators=100, max_depth=10, min_samples_split=5, min_samples_leaf=2, random_state=42)
rf_model.fit(X_train_features, y_train)

# === PREDICTION AND EVALUATION ===
predictions = rf_model.predict(X_test_features)

# Map predicted indices to class names
predicted_class_names = [class_names[p] for p in predictions]

print("\n--- Sample predictions with broad categories ---")
for i in range(10):
    pred_class = predicted_class_names[i].lower()  # convert to lowercase for lookup
    bio_cat, recyc_cat = waste_category[pred_class]
    print(f"Sample {i}: Predicted = {predicted_class_names[i]}, Biodegradable = {bio_cat}, Recyclable = {recyc_cat}")

# Evaluation metrics
print("\n--- Classification Report ---")
print(classification_report(y_test, predictions, target_names=class_names))

print("\n--- Confusion Matrix ---")
print(confusion_matrix(y_test, predictions))

print("\n--- Accuracy ---")
print(accuracy_score(y_test, predictions))

# === OPTIONAL: Plot feature importance or training details ===
# (You can add plots here if needed)
