In [None]:

# --- Setup (no tensorflow_addons needed) ---
import os, random, math
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report, f1_score
from tensorflow.keras import layers as L, Model
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.optimizers import Adam

SEED = 13
tf.random.set_seed(SEED); np.random.seed(SEED); random.seed(SEED)

IMG_SIZE = (224, 224)
BATCH_SIZE = 32
AUTOTUNE = tf.data.AUTOTUNE
NUM_CLASSES = 2
CLASS_NAMES = ['O', 'R']  # adjust to your folder names if needed


In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("techsash/waste-classification-data")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'waste-classification-data' dataset.
Path to dataset files: /kaggle/input/waste-classification-data


In [None]:
pip install split-folders

Collecting split-folders
  Downloading split_folders-0.5.1-py3-none-any.whl.metadata (6.2 kB)
Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1


In [None]:
import splitfolders
import os

# Path to your dataset
base_path = os.path.join(path, "DATASET")

# Define the output directory for the split data in a writable location
output_dir = "/content/Split"

# Split the TRAIN folder into train/val
splitfolders.ratio(
    input=os.path.join(base_path, "TRAIN"),
    output=output_dir,  # New folder for the split, changed to a writable path
    seed=42,
    ratio=(0.8, 0.2),  # 80% train, 20% val
    group_prefix=None
)

print(f"Split complete: 80% train, 20% val. Data saved to {output_dir}")

# Update train_dir and val_dir to reflect the new output location
train_dir = os.path.join(output_dir, "train")
val_dir = os.path.join(output_dir, "val")
test_dir  = os.path.join(base_path, "TEST")   # untouched holdout

Copying files: 22564 files [03:04, 122.33 files/s]

Split complete: 80% train, 20% val. Data saved to /content/Split





In [None]:
import tensorflow as tf, numpy as np, os
from collections import Counter
from tensorflow.keras.applications.efficientnet import preprocess_input

SEED = 42
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
AUTOTUNE = tf.data.AUTOTUNE
NUM_CLASSES = 2 # Ensure this is correctly set if not already


train_ds = tf.keras.utils.image_dataset_from_directory(
    train_dir, image_size=IMG_SIZE, batch_size=BATCH_SIZE, seed=SEED, shuffle=True
)
val_ds = tf.keras.utils.image_dataset_from_directory(
    val_dir, image_size=IMG_SIZE, batch_size=BATCH_SIZE, seed=SEED, shuffle=False
)
test_ds = tf.keras.utils.image_dataset_from_directory(
    test_dir, image_size=IMG_SIZE, batch_size=BATCH_SIZE, shuffle=False
)

def preprocess(image, label):
    image = preprocess_input(image)
    label = tf.one_hot(label, NUM_CLASSES) # One-hot encode labels
    return image, label

train_ds = train_ds.map(preprocess, num_parallel_calls=AUTOTUNE).prefetch(AUTOTUNE)
val_ds = val_ds.map(preprocess, num_parallel_calls=AUTOTUNE).prefetch(AUTOTUNE)
test_ds = test_ds.map(preprocess, num_parallel_calls=AUTOTUNE).prefetch(AUTOTUNE)

Found 18051 files belonging to 2 classes.
Found 4513 files belonging to 2 classes.
Found 2513 files belonging to 2 classes.


In [None]:
from collections import Counter

# Extract labels from the training dataset and convert them to integer class labels
train_labels = np.concatenate([np.argmax(y, axis=1) for x, y in train_ds], axis=0)
# val_labels = np.concatenate([np.argmax(y, axis=1) for x, y in val_ds], axis=0)

cw = Counter(train_labels)
total = sum(cw.values())
class_weight = {i: total/(NUM_CLASSES*cw[i]) for i in range(NUM_CLASSES)}
print("Class counts:", dict(cw))
print("Class weights:", class_weight)

Class counts: {np.int64(0): 10052, np.int64(1): 7999}
Class weights: {0: 0.8978810187027457, 1: 1.1283285410676334}


In [None]:
data_augment = tf.keras.Sequential([
    L.RandomFlip("horizontal"),
    L.RandomRotation(0.08),   # ~±4.5 degrees
    L.RandomZoom(0.1),
    L.RandomContrast(0.1),
], name="augment")


In [None]:

# --- Model: EfficientNetB0 + dropout ---
base = EfficientNetB0(weights="imagenet", include_top=False, input_shape=IMG_SIZE+(3,))

inp = L.Input(shape=IMG_SIZE+(3,))
x = data_augment(inp)                      # augmentation inside the model
x = base(x, training=False)                # make sure BN runs in inference mode when frozen
x = L.GlobalAveragePooling2D()(x)
x = L.Dropout(0.25)(x)
#out = L.Dense(1, activation="sigmoid")(x)
#model = Model(inp, out)

out = L.Dense(2, activation="softmax", dtype="float32")(x)
loss = tf.keras.losses.SparseCategoricalCrossentropy()
metrics = ["accuracy"]
if NUM_CLASSES == 2:
  metrics.append(tf.keras.metrics.AUC(name='pr_auc', curve='PR'))

model = Model(inp, out)
model.compile(optimizer=Adam(1e-3), loss=loss, metrics=metrics)
model.summary()

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
# Cell: compile & fit (REPLACE WHOLE CELL)
import tensorflow as tf
from tensorflow.keras import backend as K

# Custom F1 metric class
class F1Score(tf.keras.metrics.Metric):
    def __init__(self, name='f1_score', **kwargs):
        super(F1Score, self).__init__(name=name, **kwargs)
        self.precision = tf.keras.metrics.Precision()
        self.recall = tf.keras.metrics.Recall()

    def update_state(self, y_true, y_pred, sample_weight=None):
        self.precision.update_state(y_true, y_pred, sample_weight)
        self.recall.update_state(y_true, y_pred, sample_weight)

    def result(self):
        p = self.precision.result()
        r = self.recall.result()
        return 2 * ((p * r) / (p + r + K.epsilon()))

    def reset_states(self):
        self.precision.reset_states()
        self.recall.reset_states()

# --- compile ---
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False), # Changed to CategoricalCrossentropy
    metrics = [
        tf.keras.metrics.BinaryAccuracy(name="accuracy"),
        tf.keras.metrics.AUC(name="auc"),                 # ROC-AUC
        tf.keras.metrics.AUC(name="pr_auc", curve="PR"),  # Precision–Recall AUC
        tf.keras.metrics.Precision(name="precision"),
        tf.keras.metrics.Recall(name="recall"),
        F1Score(name="f1_score")
    ]
    # run_eagerly=False by default; keep for performance
)

# --- Explicitly call predict once to build the model's prediction graph before training ---
# Using a small batch of dummy data to trace the model's prediction graph
if isinstance(val_ds, tf.data.Dataset):
    for x_batch, _ in val_ds.take(1):
        _ = model.predict(x_batch)
else:
    _ = model.predict(next(iter(val_ds))[0])


# --- train ---
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=3,
    callbacks=[]
)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
Epoch 1/3
[1m565/565[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5543s[0m 10s/step - accuracy: 0.9015 - auc: 0.9604 - f1_score: 0.9015 - loss: 0.2570 - pr_auc: 0.9581 - precision: 0.9015 - recall: 0.9015 - val_accuracy: 0.9169 - val_auc: 0.9706 - val_f1_score: 0.9169 - val_loss: 0.2221 - val_pr_auc: 0.9685 - val_precision: 0.9169 - val_recall: 0.9169
Epoch 2/3
[1m565/565[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6949s[0m 12s/step - accuracy: 0.9282 - auc: 0.9768 - f1_score: 0.9282 - loss: 0.1922 - pr_auc: 0.9755 - precision: 0.9282 - recall: 0.9282 - val_accuracy: 0.9207 - val_auc: 0.9687 - val_f1_score: 0.9207 - val_loss: 0.2379 - val_pr_auc: 0.9655 - val_precision: 0.9207 - val_recall: 0.9207
Epoch 3/3
[1m565/565[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6665s[0m 12s/step - accuracy: 0.9348 - auc: 0.9808 - f1_score: 0.9348 - loss: 0.1719 - pr_auc: 0.9793 - precision: 0.9348 - recall: 0.9348 - val