In [1]:
# Loading the required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.optimizers import Adam, RMSprop, SGD
from kerastuner.tuners import RandomSearch
import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'numpy'

In [None]:

# Loading the dataset
import os
import cv2
from tqdm import tqdm

# Loading the dataset and preparing it for training
train_path = '../dataset_organized/train/'
test_path = '../dataset_organized/test/'
val_path = '../dataset_organized/validation/'

def load_images(path):
    X = []
    y = []
    
    for folder in os.listdir(path):
        files = os.listdir(path + folder)
        for file in tqdm(files):
            img = cv2.imread(path + folder + '/' + file)
            img = cv2.resize(img, (150, 150))
            X.append(img)
            y.append(folder)
    
    X = np.array(X)
    y = np.array(y)
    
    return X, y

X_train, y_train = load_images(train_path)
X_test, y_test = load_images(test_path)
X_val, y_val = load_images(val_path)

classes_names = np.unique(y_train)
num_classes = len(classes_names)
print(f"Number of classes: {num_classes}")
print(f"Class names: {classes_names}")

# Encoding the target variable
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)
y_val_encoded = label_encoder.transform(y_val)

# One hot encoding the target variable
y_train = to_categorical(y_train_encoded)
y_test = to_categorical(y_test_encoded)
y_val = to_categorical(y_val_encoded)

# Normalizing the images
X_train = X_train / 255.0
X_val = X_val / 255.0
X_test = X_test / 255.0

print(f"Training set shape: {X_train.shape}")
print(f"Validation set shape: {X_val.shape}")
print(f"Test set shape: {X_test.shape}")

# Enhanced Data Augmentation
datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=False,
    fill_mode='nearest',
    brightness_range=[0.8, 1.2]
)

datagen.fit(X_train)


100%|██████████| 287/287 [00:00<00:00, 1407.68it/s]
100%|██████████| 354/354 [00:00<00:00, 1854.36it/s]
100%|██████████| 286/286 [00:00<00:00, 1718.09it/s]
100%|██████████| 403/403 [00:00<00:00, 1554.53it/s]
100%|██████████| 347/347 [00:00<00:00, 1736.18it/s]
100%|██████████| 91/91 [00:00<00:00, 1593.50it/s]
100%|██████████| 70/70 [00:00<00:00, 1582.64it/s]
100%|██████████| 82/82 [00:00<00:00, 1706.14it/s]
100%|██████████| 68/68 [00:00<00:00, 1645.83it/s]
100%|██████████| 108/108 [00:00<00:00, 1610.05it/s]
100%|██████████| 74/74 [00:00<00:00, 1767.10it/s]
100%|██████████| 29/29 [00:00<00:00, 1636.20it/s]
100%|██████████| 46/46 [00:00<00:00, 1565.24it/s]
100%|██████████| 65/65 [00:00<00:00, 1799.28it/s]
100%|██████████| 56/56 [00:00<00:00, 1713.25it/s]
100%|██████████| 83/83 [00:00<00:00, 1628.90it/s]
100%|██████████| 61/61 [00:00<00:00, 1746.14it/s]
100%|██████████| 17/17 [00:00<00:00, 1723.59it/s]


Number of classes: 6
Class names: ['cardboard' 'glass' 'metal' 'paper' 'plastic' 'trash']
Training set shape: (1768, 150, 150, 3)
Validation set shape: (328, 150, 150, 3)
Test set shape: (431, 150, 150, 3)


In [None]:
def build_model(hp):
    model = Sequential()
    
    # Number of convolutional layers
    num_conv_layers = hp.Int('num_conv_layers', min_value=2, max_value=4)
    
    for i in range(num_conv_layers):
        filters = hp.Int(f'conv_{i+1}_filters', min_value=32, max_value=128, step=32)
        kernel_size = hp.Choice(f'conv_{i+1}_kernel_size', values=[3, 5])
        activation = hp.Choice(f'conv_{i+1}_activation', values=['relu', 'tanh'])
        
        if i == 0:
            model.add(Conv2D(filters=filters, kernel_size=kernel_size, activation=activation, input_shape=(150, 150, 3)))
        else:
            model.add(Conv2D(filters=filters, kernel_size=kernel_size, activation=activation))
        model.add(MaxPooling2D((2, 2)))
    
    model.add(Flatten())
    
    # Dense layer
    dense_units = hp.Int('dense_units', min_value=64, max_value=256, step=64)
    model.add(Dense(units=dense_units, activation=hp.Choice('dense_activation', values=['relu', 'tanh'])))
    model.add(Dropout(hp.Float('dropout_rate', min_value=0.2, max_value=0.5, step=0.1)))
    
    model.add(Dense(6, activation='softmax'))
    
    # Optimizer
    optimizer = hp.Choice('optimizer', values=['adam', 'rmsprop', 'sgd'])
    if optimizer == 'adam':
        opt = Adam(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4]))
    elif optimizer == 'rmsprop':
        opt = RMSprop(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4]))
    else:
        opt = SGD(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4]))
    
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

# Initialize the tuner
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=20,  # Number of hyperparameter combinations to try
    executions_per_trial=1,  # Number of models to train per combination
    directory='my_dir',
    project_name='cnn_hyperparameter_tuning'
)

# Assuming you have already loaded your data into X_train, y_train, X_val, y_val
tuner.search(X_train, y_train, epochs=10, validation_data=(X_val, y_val))

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperparameter search is complete. The optimal number of convolutional layers is {best_hps.get('num_conv_layers')},
the optimal number of filters for each layer and corresponding kernel sizes and activations are:
- Layer 1: {best_hps.get('conv_1_filters')} filters, kernel size {best_hps.get('conv_1_kernel_size')}, activation {best_hps.get('conv_1_activation')}
- Layer 2: {best_hps.get('conv_2_filters')} filters, kernel size {best_hps.get('conv_2_kernel_size')}, activation {best_hps.get('conv_2_activation')}
{'' if best_hps.get('num_conv_layers') < 3 else f'- Layer 3: {best_hps.get("conv_3_filters")} filters, kernel size {best_hps.get("conv_3_kernel_size")}, activation {best_hps.get("conv_3_activation")}'}
{'' if best_hps.get('num_conv_layers') < 4 else f'- Layer 4: {best_hps.get("conv_4_filters")} filters, kernel size {best_hps.get("conv_4_kernel_size")}, activation {best_hps.get("conv_4_activation")}'}
the optimal number of units in the dense layer is {best_hps.get('dense_units')},
the optimal activation function for the dense layer is {best_hps.get('dense_activation')},
the optimal dropout rate is {best_hps.get('dropout_rate')},
and the optimal optimizer and learning rate are {best_hps.get('optimizer')} with a learning rate of {best_hps.get('learning_rate')}.
""")

# Build the model with the best hyperparameters and train it
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))

# Save the best model
model.save('hypertuning_cnn_model.h5')

# Plotting the training and validation accuracy
plt.figure(figsize=(10, 5))
plt.plot(history.history['accuracy'], label='Training Accuracy', color='blue')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy', color='red')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Plotting the training and validation loss
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Training Loss', color='blue')
plt.plot(history.history['val_loss'], label='Validation Loss', color='red')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Search space summary
Default search space size: 16
num_conv_layers (Int)
{'default': None, 'conditions': [], 'min_value': 2, 'max_value': 4, 'step': 1, 'sampling': 'linear'}
conv_1_filters (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 256, 'step': 32, 'sampling': 'linear'}
conv_1_kernel_size (Choice)
{'default': 3, 'conditions': [], 'values': [3, 5], 'ordered': True}
conv_1_activation (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'elu'], 'ordered': False}
conv_1_l2 (Choice)
{'default': 0.0, 'conditions': [], 'values': [0.0, 0.001, 0.01], 'ordered': True}
conv_2_filters (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 256, 'step': 32, 'sampling': 'linear'}
conv_2_kernel_size (Choice)
{'default': 3, 'conditions': [], 'values': [3, 5], 'ordered': True}
conv_2_activation (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'elu'], 'ordered': False}
conv_2_l2 (Choice)
{'default': 0.0, 'conditions': [], 'values

2025-04-23 20:45:25.991041: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 679477248 exceeds 10% of free system memory.
2025-04-23 20:45:26.136296: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 679477248 exceeds 10% of free system memory.
2025-04-23 20:45:27.032471: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 679477248 exceeds 10% of free system memory.
  self._warn_if_super_not_called()


Epoch 1/30


2025-04-23 20:45:29.712425: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 679477248 exceeds 10% of free system memory.
2025-04-23 20:45:30.809098: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 679477248 exceeds 10% of free system memory.


In [None]:
# Loading the model
from keras.models import load_model
model = load_model('hypertuning_cnn_model.h5')

# Evaluating the model
predictions = model.predict(X_test)
loss, accuracy = model.evaluate(X_test, y_test_encoded)
print('Test Loss:', loss)
print('Test Accuracy:', accuracy)

In [None]:
# Model Evaluation

# f1 score, precision, recall, accuracy, confusion matrix, classification report, and ROC curve

from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, roc_curve, auc

# F1 Score
f1 = f1_score(np.argmax(y_test_encoded, axis=1), np.argmax(predictions, axis=1), average='weighted')
print("F1 Score: ", f1)

# Precision
precision = precision_score(np.argmax(y_test_encoded, axis=1), np.argmax(predictions, axis=1), average='weighted')
print("Precision: ", precision)

# Recall
recall = recall_score(np.argmax(y_test_encoded, axis=1), np.argmax(predictions, axis=1), average='weighted')
print("Recall: ", recall)

# Accuracy
accuracy = accuracy_score(np.argmax(y_test_encoded, axis=1), np.argmax(predictions, axis=1))
print("Accuracy: ", accuracy)

# ROC Curve
fpr = dict()
tpr = dict()
roc_auc = dict()

for i in range(6):
    fpr[i], tpr[i], _ = roc_curve(y_test_encoded[:, i], predictions[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

plt.figure(figsize=(10, 10))

for i in range(6):
    plt.plot(fpr[i], tpr[i], label=classes_names[i] + ' (AUC = ' + str(roc_auc[i]) + ')')

plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend()
plt.title('ROC Curve')
plt.show()

# Classification Report
from sklearn.metrics import classification_report

print(classification_report(np.argmax(y_test_encoded, axis=1), np.argmax(predictions, axis=1), target_names=classes_names))

# Confusion Matrix
from sklearn.metrics import confusion_matrix

confusion_matrix = confusion_matrix(np.argmax(y_test_encoded, axis=1), np.argmax(predictions, axis=1))

plt.figure(figsize=(10, 10))
sns.heatmap(confusion_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=classes_names, yticklabels=classes_names)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()
