Waste management using cnn model


In [None]:
import os, json, math, itertools, argparse, random
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

def set_seed(s=42):
    random.seed(s); np.random.seed(s); tf.random.set_seed(s)

def get_gens(train_dir, val_dir, img_size=224, batch=32):
    train_aug = ImageDataGenerator(
        rescale=1./255,
        rotation_range=25,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        vertical_flip=True,  # waste has weak orientation bias
        fill_mode='nearest'
    )
    val_aug = ImageDataGenerator(rescale=1./255)

    train_gen = train_aug.flow_from_directory(
        train_dir, target_size=(img_size,img_size), batch_size=batch,
        class_mode='categorical', shuffle=True)
    val_gen = val_aug.flow_from_directory(
        val_dir, target_size=(img_size,img_size), batch_size=batch,
        class_mode='categorical', shuffle=False)
    return train_gen, val_gen

def compute_class_weights(generator):
    counts = np.bincount(generator.classes)
    total = counts.sum()
    weights = {i: total/(len(counts)*c) for i,c in enumerate(counts)}
    return weights

def build_model(num_classes=2, lr=1e-4, trainable_at=None):
    base = ResNet50(weights='imagenet', include_top=False, input_shape=(224,224,3))
    if trainable_at is None:
        base.trainable = False
    else:
        for layer in base.layers: layer.trainable = False
        for layer in base.layers[trainable_at:]: layer.trainable = True

    x = GlobalAveragePooling2D()(base.output)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    out = Dense(num_classes, activation='softmax')(x)
    model = Model(base.input, out)
    opt = tf.keras.optimizers.Adam(learning_rate=lr)
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def plot_confmat(cm, classes, out_path):
    fig = plt.figure(figsize=(4,4))
    plt.imshow(cm, interpolation='nearest')
    plt.title('Confusion Matrix'); plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45); plt.yticks(tick_marks, classes)
    fmt = 'd'
    thresh = cm.max()/2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    plt.ylabel('True label'); plt.xlabel('Predicted label')
    plt.tight_layout(); plt.savefig(out_path, bbox_inches='tight'); plt.close(fig)

def main(args):
    set_seed()
    os.makedirs(args.artifacts, exist_ok=True)
    train_gen, val_gen = get_gens(args.train_dir, args.val_dir, batch=args.batch)

    model = build_model(num_classes=2, lr=args.lr, trainable_at=args.unfreeze_at)

    cweights = compute_class_weights(train_gen)

    cbs = [
        EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6),
        ModelCheckpoint(os.path.join(args.artifacts, 'model.h5'), monitor='val_loss',
                        save_best_only=True)
    ]

    steps_train = math.ceil(train_gen.samples / args.batch)
    steps_val = math.ceil(val_gen.samples / args.batch)

    hist = model.fit(
        train_gen, steps_per_epoch=steps_train,
        validation_data=val_gen, validation_steps=steps_val,
        epochs=args.epochs, class_weight=cweights, callbacks=cbs, verbose=1
    )

    # Save history
    with open(os.path.join(args.artifacts, 'history.json'), 'w') as f:
        json.dump(hist.history, f)

    # Eval + confusion matrix
    val_gen.reset()
    preds = model.predict(val_gen, steps=steps_val)
    y_pred = preds.argmax(axis=1); y_true = val_gen.classes
    target_names = list(val_gen.class_indices.keys())
    print(classification_report(y_true, y_pred, target_names=target_names, digits=4))
    cm = confusion_matrix(y_true, y_pred)
    plot_confmat(cm, target_names, os.path.join(args.artifacts, 'confusion_matrix.png'))

if __name__ == "__main__":
    p = argparse.ArgumentParser()
    p.add_argument("--train_dir", type=str, required=True)  # e.g., data/TRAIN
    p.add_argument("--val_dir",   type=str, required=True)  # e.g., data/TEST  (binary set)
    p.add_argument("--epochs",    type=int, default=20)
    p.add_argument("--batch",     type=int, default=32)
    p.add_argument("--lr",        type=float, default=1e-4)
    p.add_argument("--unfreeze_at", type=int, default=None, help="e.g., 140 to fine-tune last blocks")
    p.add_argument("--artifacts", type=str, default="artifacts")
    args = p.parse_args()
    main(args)


Note: you may need to restart the kernel to use updated packages.


'DOSKEY' is not recognized as an internal or external command,
operable program or batch file.


In [None]:
def set_seed(s=42):
    random.seed(s); np.random.seed(s); tf.random.set_seed(s)

def get_gens(train_dir, val_dir, img_size=224, batch=32):
    train_aug = ImageDataGenerator(
        rescale=1./255,
        rotation_range=25,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        vertical_flip=True,  # waste has weak orientation bias
        fill_mode='nearest'
    )
    val_aug = ImageDataGenerator(rescale=1./255)

    train_gen = train_aug.flow_from_directory(
        train_dir, target_size=(img_size,img_size), batch_size=batch,
        class_mode='categorical', shuffle=True)
    val_gen = val_aug.flow_from_directory(
        val_dir, target_size=(img_size,img_size), batch_size=batch,
        class_mode='categorical', shuffle=False)
    return train_gen, val_gen


Note: you may need to restart the kernel to use updated packages.


'DOSKEY' is not recognized as an internal or external command,
operable program or batch file.


In [None]:
def compute_class_weights(generator):
    counts = np.bincount(generator.classes)
    total = counts.sum()
    weights = {i: total/(len(counts)*c) for i,c in enumerate(counts)}
    return weights

def build_model(num_classes=2, lr=1e-4, trainable_at=None):
    base = ResNet50(weights='imagenet', include_top=False, input_shape=(224,224,3))
    if trainable_at is None:
        base.trainable = False
    else:
        for layer in base.layers: layer.trainable = False
        for layer in base.layers[trainable_at:]: layer.trainable = True

    x = GlobalAveragePooling2D()(base.output)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    out = Dense(num_classes, activation='softmax')(x)
    model = Model(base.input, out)
    opt = tf.keras.optimizers.Adam(learning_rate=lr)
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def plot_confmat(cm, classes, out_path):
    fig = plt.figure(figsize=(4,4))
    plt.imshow(cm, interpolation='nearest')
    plt.title('Confusion Matrix'); plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45); plt.yticks(tick_marks, classes)
    fmt = 'd'
    thresh = cm.max()/2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    plt.ylabel('True label'); plt.xlabel('Predicted label')
    plt.tight_layout(); plt.savefig(out_path, bbox_inches='tight'); plt.close(fig)




'DOSKEY' is not recognized as an internal or external command,
operable program or batch file.


In [None]:
def main(args):
    set_seed()
    os.makedirs(args.artifacts, exist_ok=True)
    train_gen, val_gen = get_gens(args.train_dir, args.val_dir, batch=args.batch)

    model = build_model(num_classes=2, lr=args.lr, trainable_at=args.unfreeze_at)

    cweights = compute_class_weights(train_gen)

    cbs = [
        EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6),
        ModelCheckpoint(os.path.join(args.artifacts, 'model.h5'), monitor='val_loss',
                        save_best_only=True)
    ]

    steps_train = math.ceil(train_gen.samples / args.batch)
    steps_val = math.ceil(val_gen.samples / args.batch)

    hist = model.fit(
        train_gen, steps_per_epoch=steps_train,
        validation_data=val_gen, validation_steps=steps_val,
        epochs=args.epochs, class_weight=cweights, callbacks=cbs, verbose=1
    )

    # Save history
    with open(os.path.join(args.artifacts, 'history.json'), 'w') as f:
        json.dump(hist.history, f)

    # Eval + confusion matrix
    val_gen.reset()
    preds = model.predict(val_gen, steps=steps_val)
    y_pred = preds.argmax(axis=1); y_true = val_gen.classes
    target_names = list(val_gen.class_indices.keys())
    print(classification_report(y_true, y_pred, target_names=target_names, digits=4))
    cm = confusion_matrix(y_true, y_pred)
    plot_confmat(cm, target_names, os.path.join(args.artifacts, 'confusion_matrix.png'))

if __name__ == "__main__":
    p = argparse.ArgumentParser()
    p.add_argument("--train_dir", type=str, required=True)  # e.g., data/TRAIN
    p.add_argument("--val_dir",   type=str, required=True)  # e.g., data/TEST  (binary set)
    p.add_argument("--epochs",    type=int, default=20)
    p.add_argument("--batch",     type=int, default=32)
    p.add_argument("--lr",        type=float, default=1e-4)
    p.add_argument("--unfreeze_at", type=int, default=None, help="e.g., 140 to fine-tune last blocks")
    p.add_argument("--artifacts", type=str, default="artifacts")
    args = p.parse_args()
    main(args)


Collecting matplotlib
  Using cached matplotlib-3.10.7-cp312-cp312-win_amd64.whl.metadata (11 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Using cached contourpy-1.3.3-cp312-cp312-win_amd64.whl.metadata (5.5 kB)
Collecting cycler>=0.10 (from matplotlib)
  Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Using cached fonttools-4.60.1-cp312-cp312-win_amd64.whl.metadata (114 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Using cached kiwisolver-1.4.9-cp312-cp312-win_amd64.whl.metadata (6.4 kB)
Collecting pyparsing>=3 (from matplotlib)
  Using cached pyparsing-3.2.5-py3-none-any.whl.metadata (5.0 kB)
Using cached matplotlib-3.10.7-cp312-cp312-win_amd64.whl (8.1 MB)
Using cached contourpy-1.3.3-cp312-cp312-win_amd64.whl (226 kB)
Using cached cycler-0.12.1-py3-none-any.whl (8.3 kB)
Using cached fonttools-4.60.1-cp312-cp312-win_amd64.whl (2.3 MB)
Using cached kiwisolver-1.4.9-cp312-cp312-win_amd64.whl (73 kB)
Using cac

'DOSKEY' is not recognized as an internal or external command,
operable program or batch file.
