In [1]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical

2023-10-22 09:47:46.742769: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Function to load and preprocess images
def load_images(directory, label):
    images = []
    labels = []
    for filename in os.listdir(directory):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            img = cv2.imread(os.path.join(directory, filename))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB
            img = cv2.resize(img, (128, 128))  # Resize image to 128x128 pixels
            images.append(img)
            labels.append(label)
    return images, labels

In [3]:
clean_images, clean_labels = load_images("cleaned", label=0)

In [4]:
dirty_images, dirty_labels = load_images("polluted", label=1)

In [5]:
all_images = clean_images + dirty_images
all_labels = clean_labels + dirty_labels


In [6]:
all_images = np.array(all_images)
all_labels = np.array(all_labels)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(all_images, all_labels, test_size=0.2, random_state=42)


In [8]:
X_train = X_train / 255.0
X_test = X_test / 255.0

In [9]:
# Convert labels to one-hot encoding
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [10]:
# Build the CNN model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(2, activation='softmax'))

In [11]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [12]:
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7fbb8286acb0>

In [13]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Test Accuracy: 89.29%


In [15]:

test_image = cv2.imread("/Users/pranaymishra/Desktop/ml_practice/ocean_dataset/cleaned/3.jpg")
test_image = cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB)
test_image = cv2.resize(test_image, (128, 128))  
test_image = test_image / 255.0
test_image = np.reshape(test_image, (1, 128, 128, 3))
prediction = model.predict(test_image)
predicted_class = np.argmax(prediction)
if predicted_class == 0:
    print("The image is predicted to be clean water.")
else:
    print("The image is predicted to be dirty water.")
print(f"Confidence Scores: Clean Water: {prediction[0][0]}, Dirty Water: {prediction[0][1]}")


The image is predicted to be clean water.
Confidence Scores: Clean Water: 0.9698660373687744, Dirty Water: 0.03013400360941887
