In [1]:
import os
import zipfile
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.callbacks import EarlyStopping

# Constants
IMAGE_SIZE = (150, 150)  # Change according to your dataset
BATCH_SIZE = 32
EPOCHS = 20

In [None]:
# Mount Google Drive (if using Google Colab)
from google.colab import drive
drive.mount('/content/drive')


In [2]:
# Path to the dataset zip file
zip_path = '/content/drive/MyDrive/Water Treatment Dataset.zip'

import zipfile

# Extract the zip file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall('/content')  # Extract to /content directory in Colab

In [3]:
import os
# Path to the dataset
data_path = '/content/Dataset'

# Path to the "water" and "not_water" directories
water_path = os.path.join(data_path, 'Water')
not_water_path = os.path.join(data_path, 'Not Water')

# Check if the directories exist
print("Water directory exists:", os.path.isdir(water_path))
print("Not Water directory exists:", os.path.isdir(not_water_path))

Water directory exists: True
Not Water directory exists: True


In [4]:
import glob

# Load the list of filenames from the directories
water_filenames = glob.glob(os.path.join(water_path, '*.*'))  # Adjust the file extension if needed
not_water_filenames = glob.glob(os.path.join(not_water_path, '*.*'))

# Combine filenames and create labels
filenames = water_filenames + not_water_filenames
labels = ['water'] * len(water_filenames) + ['not_water'] * len(not_water_filenames)

# Split the data into training and testing sets
train_filenames, test_filenames, train_labels, test_labels = train_test_split(
    filenames, labels, test_size=0.2, random_state=42, stratify=labels)


In [5]:
# Data generators for training and testing
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
test_datagen = ImageDataGenerator(rescale=1./255)

# Configure data generators
train_generator = train_datagen.flow_from_directory(
    data_path,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training'
)

validation_generator = train_datagen.flow_from_directory(
    data_path,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation'
)

test_generator = test_datagen.flow_from_directory(
    data_path,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary'
)


Found 24580 images belonging to 2 classes.
Found 6144 images belonging to 2 classes.
Found 30724 images belonging to 2 classes.


In [6]:
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(512, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [7]:
# Early stopping to avoid overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    epochs=EPOCHS,
    validation_data=validation_generator,
    validation_steps=len(validation_generator),
    callbacks=[early_stopping]
)

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(test_generator, steps=len(test_generator))
print('Test accuracy:', test_acc * 100, '%')

# Save the model
model.save('water_detection_model.h5')


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Test accuracy: 91.99973940849304 %


  saving_api.save_model(
