In [1]:
#importing os, shutil and glob for copying concrete images from raw folder to the new structure which is 
#required for ImageDataGenerator.flow_from_directory
import os
import shutil
import glob

In [2]:
#store references of cracked and uncracked images of concrete
data = {}
data['crack'] = []
data['uncrack'] = []

In [3]:
#taking cracked image's paths from respective folder and storing them in our dictionary
src = 'concrete_dataset/ConcreteCrackImagesforClassification/Positive'
for jpgImage in glob.iglob(os.path.join(src, '*jpg')):
    data['crack'].append(jpgImage)

In [4]:
#we can see the number of cracked images are 20000
print(len(data['crack']))

20000


In [5]:
#taking uncracked image's paths from respective folder and storing them in our dictionary
src = 'concrete_dataset/ConcreteCrackImagesforClassification/Negative'
for jpgImage in glob.iglob(os.path.join(src, '*jpg')):
    data['uncrack'].append(jpgImage)

In [6]:
#we can see the number of uncracked images are also 20000
print(len(data['uncrack']))

20000


In [8]:
#creating directory structures which is required for our model to train.
os.mkdir('master_data_concrete')
os.mkdir('master_data_concrete/training')
os.mkdir('master_data_concrete/testing')

os.mkdir('master_data_concrete/training/crack')
os.mkdir('master_data_concrete/training/uncrack')
os.mkdir('master_data_concrete/testing/crack')
os.mkdir('master_data_concrete/testing/uncrack')

In [9]:
#now lets copy 75% of cracked and uncracked images into training data folder and rest 25% to testing data folder
split_size = 0.75
for class_type, imagesList in data.items():
    train_size = int(split_size*(len(imagesList)))
    train_images_list = imagesList[:train_size]
    test_images_list = imagesList[train_size:]
    
    base_dest = 'master_data_concrete'
    #copy training  images of crack type
    for image in train_images_list:
        dest = os.path.join(base_dest, 'training', class_type)
        shutil.copy(image, dest)

    #copy testing images of crack type
    for image in test_images_list:
        dest = os.path.join(base_dest, 'testing', class_type)
        shutil.copy(image, dest)

In [11]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense

In [13]:
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(227, 227, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

In [14]:
model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1)) # this our output layer (since this is binary classification of cracked or uncracked, we use only one unit in output layer).
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [15]:
from keras.preprocessing.image import ImageDataGenerator
batch_size = 16

# this is the augmentation configuration we will use for training
#here alng with rescaling, we are also changing the orientation of the image in only training dataset, 
#this is we are doing so that our model never sees the same image, and this will help in fighting overfitting.
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1./255)

# this is a generator that will read pictures found in
# subfolers of 'master_data_concrete/training', and indefinitely generate
# batches of augmented image data
train_generator = train_datagen.flow_from_directory(
        'data/train',  # this is the target directory
        target_size=(227, 227),  # all images will be resized to 227*227
        batch_size=batch_size,
        class_mode='binary')  # since we use binary_crossentropy loss, we need binary labels

# this is a similar generator, for validation data
validation_generator = test_datagen.flow_from_directory(
        'data/validation',
        target_size=(227, 227),
        batch_size=batch_size,
        class_mode='binary')

Found 30000 images belonging to 2 classes.
Found 10000 images belonging to 2 classes.


In [16]:
from keras.callbacks import EarlyStopping
es = EarlyStopping(monitor='val_accuracy', patience=2, min_delta=0.005)

In [17]:
model.fit_generator(
        train_generator,
#         steps_per_epoch=30000 // batch_size,
        epochs=50,
        validation_data=validation_generator,
#         validation_steps=10000 // batch_size,
        callbacks=[es])
model.save_weights('first_try.h5')


Epoch 1/50
Epoch 2/50
Epoch 3/50


In [19]:
model.evaluate_generator(validation_generator)

[0.11070291697978973, 0.9890000224113464]