In [None]:
import tensorflow
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import load_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, AveragePooling2D, MaxPool2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator

* The task at hand is to detect the presence of asphalt in a set of images in separate folders eg pothole folder contains pothole images and non-pothole folder contains non-pothole images.

In [None]:
# First step
# Load the data
# Path: UAPD_ tongzheng_final\dataset
# Considering we don't have train and test dir, we just have different folders of different sets of images we will use ImageDataGenerator to split the data
# We will use 80% of the data for training and 20% for validation

# ImageDataGenerator is used to generate batches of tensor image data with real-time data augmentation.
# The data will be looped over (in batches).
# Path: UAPD_ tongzheng_final\dataset
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
 # rescale: rescaling factor. Defaults to None. If None or 0, no rescaling is applied, otherwise we multiply the data by the value provided (before applying any other transformation).
 # validation_split: Float. Fraction of images reserved for validation (strictly between 0
    # (no validation) and 1 (only validation)). The data will be split into a training set and a validation set based on this parameter.
    # Path: UAPD_ tongzheng_final\dataset
train_generator = train_datagen.flow_from_directory(
    directory="dataset",
    target_size=(64, 64),
    color_mode="rgb",
    batch_size=32,
    class_mode="binary",
    shuffle=True,
    seed=42,
    subset='training'
)

validation_generator = train_datagen.flow_from_directory(
    directory="dataset",
    target_size=(64, 64),
    color_mode="rgb",
    batch_size=32,
    class_mode="binary",
    shuffle=True,
    seed=42,
    subset='validation'
)

# Visualize the data
# Path: UAPD_ tongzheng_final\dataset
sample_training_images, _ = next(train_generator)
def plotImages(images_arr):
    fig, axes = plt.subplots(1, 5, figsize=(20,20))
    axes = axes.flatten()
    for img, ax in zip(images_arr, axes):
        ax.imshow(img)
        ax.axis('off')
    plt.tight_layout()
    plt.show()

plotImages(sample_training_images[:5])

In [None]:
# Next step: data augmentation
# Data augmentation is a strategy that enables practitioners to significantly increase the diversity of data available for training models, without actually collecting new data.
# Data augmentation techniques such as cropping, padding, and horizontal flipping are commonly used to train large neural networks.
# Data augmentation is used to expand the training dataset in order to improve the performance and ability of the model to generalize.
# Path: UAPD_ tongzheng_final\dataset
train_datagen_augmented = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    rotation_range=45,
    width_shift_range=.15,
    height_shift_range=.15,
    horizontal_flip=True,
    zoom_range=0.5
)

train_generator_augmented = train_datagen_augmented.flow_from_directory(
    directory="dataset",
    target_size=(64, 64),
    color_mode="rgb",
    batch_size=32,
    class_mode="binary",
    shuffle=True,
    seed=42,
    subset='training'
)

validation_generator_augmented = train_datagen_augmented.flow_from_directory(
    directory="dataset",
    target_size=(64, 64),
    color_mode="rgb",
    batch_size=32,
    class_mode="binary",
    shuffle=True,
    seed=42,
    subset='validation'
)

# Visualize the data
# Path: UAPD_ tongzheng_final\dataset
sample_training_images, _ = next(train_generator_augmented)
plotImages(sample_training_images[:5])

In [None]:
# Next step: build the model
# Path: UAPD_ tongzheng_final\dataset
model = Sequential()
model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(64, 64, 3)))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu'))

model.add(Flatten())
model.add(Dense(units=128, activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Next step: train the model
# Path: UAPD_ tongzheng_final\dataset
history = model.fit(train_generator_augmented, epochs=10, validation_data=validation_generator_augmented)

# Next step: evaluate the model
# Path: UAPD_ tongzheng_final\dataset
model.evaluate(validation_generator_augmented)

# Next step: visualize the model
# Path: UAPD_ tongzheng_final\dataset
# Plot the loss
plt.plot(history.history['loss'], label='train loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.legend()
plt.show();

# Plot the accuracy
plt.plot(history.history['accuracy'], label='train acc')
plt.plot(history.history['val_accuracy'], label='val acc')
plt.legend()
plt.show();

In [None]:
# Next step: save the model
# Path: UAPD_ tongzheng_final\dataset
model.save('asphalt_detection.h5')

# Next step: load the model
# Path: UAPD_ tongzheng_final\dataset
model = load_model('asphalt_detection.h5')

# Next step: test the model
# Path: UAPD_ tongzheng_final\dataset
# Test the model
img = image.load_img('dataset/asphalt/asphalt_1.jpg', target_size=(64, 64))
img = image.img_to_array(img)
img = np.expand_dims(img, axis=0)
result = model.predict(img)
print(result)
if result[0][0] == 1:
    prediction = 'asphalt'
else:
    prediction = 'concrete'
print(prediction)