<a href="https://colab.research.google.com/github/jbsher/p4_waste_classification/blob/master/waste_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import cv2
import os
import numpy as np
import pandas as pd
import tensorflow as tf

from google.colab import drive
from sklearn.model_selection import train_test_split, GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier



In [3]:
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
# The main directory containing the subdirectories of images
train_dir = '/content/drive/MyDrive/Colab Notebooks/TRAIN'

# Subdirectories
sub_dirs = ['O', 'R']

# Initialize empty lists to store the images and labels
images = []
labels = []

# Loop over each subdirectory
for sub_dir in sub_dirs:
    # Create full path to subdirectory
    full_dir = os.path.join(train_dir, sub_dir)

    # Get file names in the folder
    image_names = os.listdir(full_dir)

    # Print the number of files in this subdirectory
    print(f'Number of files in {sub_dir} directory:', len(image_names))

    # Loop over the images in the subdirectory
    for image_name in image_names:
        # Construct the full path of the image
        image_path = os.path.join(full_dir, image_name)

        # Read the image from file
        image = cv2.imread(image_path)

        if image is not None:
            # Resize the image to 28x28
            image = cv2.resize(image, (28, 28))

            # Append the image to the images list
            images.append(image)

            # Assign label based on the subdirectory
            if sub_dir == 'O':
                labels.append(0)
            elif sub_dir == 'R':
                labels.append(1)
        else:
            print(f"Failed to load image at {image_path}")

# Convert the list of images to a numpy array
images = np.array(images)

# Normalize the images
images = images / 255.0
print('Images shape:', images.shape)

# Convert the list of labels to a numpy array
labels = np.array(labels)
print('Labels shape:', labels.shape)


Number of files in O directory: 12565
Number of files in R directory: 9999
Images shape: (22564, 28, 28, 3)
Labels shape: (22564,)


In [5]:
# Flatten the images
n_samples = len(images)
flattened_images = images.reshape((n_samples, -1))

# Create a DataFrame
data = {'label': labels}
for i in range(flattened_images.shape[1]):
    data[f'pixel_{i}'] = flattened_images[:, i]

images_df = pd.DataFrame(data)


images_df.to_csv('/content/drive/MyDrive/Colab Notebooks/images.csv')

In [4]:
# Read in here so we don't create a new csv file
images_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/images.csv')

In [7]:
images_df.drop(columns='Unnamed: 0', inplace=True)
images_df.head()

Unnamed: 0,label,pixel_0,pixel_1,pixel_2,pixel_3,pixel_4,pixel_5,pixel_6,pixel_7,pixel_8,...,pixel_2342,pixel_2343,pixel_2344,pixel_2345,pixel_2346,pixel_2347,pixel_2348,pixel_2349,pixel_2350,pixel_2351
0,0,0.886275,0.85098,0.811765,0.913725,0.878431,0.839216,0.933333,0.901961,0.85098,...,0.294118,0.572549,0.588235,0.631373,0.635294,0.670588,0.694118,0.505882,0.552941,0.6
1,0,0.258824,0.360784,0.721569,0.152941,0.258824,0.639216,0.062745,0.152941,0.427451,...,0.596078,0.690196,0.615686,0.607843,0.698039,0.619608,0.611765,0.690196,0.643137,0.627451
2,0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,0,0.768627,0.729412,0.701961,0.768627,0.729412,0.701961,0.768627,0.729412,0.701961,...,0.705882,0.705882,0.705882,0.705882,0.709804,0.709804,0.709804,0.713725,0.713725,0.713725
4,0,0.721569,0.827451,0.909804,0.670588,0.784314,0.898039,0.623529,0.729412,0.862745,...,0.819608,0.203922,0.356863,0.517647,0.266667,0.380392,0.517647,0.32549,0.4,0.529412


In [8]:
# Extract labels and images
labels = images_df.iloc[:, 0].values
images = images_df.iloc[:, 1:].values

# Reshape the images
images = images.reshape(-1, 28, 28, 3)

In [9]:
# Create a train test split
X = images
y = labels

# Split into train and temp (test + validation)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, stratify = y, random_state=42)

# # 70-15-15 split for training, validation, and test sets
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print("Training set shape:", X_train.shape)
print("Validation set shape:", X_val.shape)
print("Test set shape:", X_test.shape)

Training set shape: (15794, 28, 28, 3)
Validation set shape: (3385, 28, 28, 3)
Test set shape: (3385, 28, 28, 3)


In [None]:
#BEST PERFORMER

In [25]:
model = Sequential()

# First convolutional layer
model.add(Conv2D(32, 3, activation='relu', input_shape=(28, 28, 3)))
model.add(MaxPooling2D(2))
model.add(Dropout(0.25))

# Second convolutional layer
model.add(Conv2D(64, 3, activation='relu'))
model.add(MaxPooling2D(2))
model.add(Dropout(0.25))

# Third convolutional layer
model.add(Conv2D(128, 3, activation='relu'))
model.add(MaxPooling2D(2))
model.add(Dropout(0.25))

# Fully connected layer
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))

# Output layer
model.add(Dense(1, activation='sigmoid'))

In [26]:
# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [27]:
# Add EarlyStopping callback
es = EarlyStopping(monitor='val_loss', patience=3, verbose=0)

# Fit the model
history = model.fit(X_train, y_train,
                    batch_size=128,
                    epochs=10,
                    verbose=1,
                    callbacks=[es],
                    validation_data=(X_val, y_val))


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [23]:
# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)




In [None]:
# TESTING AREA

In [13]:
model = Sequential()

# First convolutional layer
model.add(Conv2D(32, 3, activation='relu', input_shape=(28, 28, 3)))
model.add(MaxPooling2D(2))
model.add(Dropout(0.2))

# Second convolutional layer
model.add(Conv2D(64, 3, activation='relu'))
model.add(MaxPooling2D(2))
model.add(Dropout(0.25))

# Third convolutional layer
model.add(Conv2D(128, 3, activation='relu'))
model.add(Dropout(0.3))

# Fully connected layers
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))

# Output layer
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


# Set up EarlyStopping callback
es= EarlyStopping(monitor='val_loss', patience=5)

# Feature Engineering : Data Augmentation
datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Fit Augmentation
datagen.fit(X_train)

# Create variable for batch size
batch_size = 64

# Model training with data augmentation
model.fit(datagen.flow(X_train, y_train, batch_size=batch_size),
          steps_per_epoch=len(X_train) / batch_size,
          epochs=20,
          validation_data=(X_val, y_val),
          callbacks=[es])


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fae7be1c280>

In [11]:
# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)




In [None]:
# Set up ReduceLROnPlateau callback
# reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              #patience=5, min_lr=0.001)