# Importing Dependencies and Declaring Global Variables

### In this part we are going to install needed libraries and declare some variable needed for training and testing models.

In [1]:
# install split folders for splitting image data into train,val, and test.
%pip install split-folders

from keras.applications import VGG16,ResNet50
from keras import layers
from keras.preprocessing.image import ImageDataGenerator
import os
from keras.applications.vgg16 import preprocess_input
from keras.models import Sequential
import tensorflow as tf
import numpy as np
from tqdm import tqdm
import splitfolders
import shutil
import random

NUM_CLASS = 2
IMAGE_SIZE = 224
BATCH_SIZE = 100
EPOCHS = 2

Collecting split-folders
  Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1
Note: you may need to restart the kernel to use updated packages.


caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


# Data Collecting and Splitting

### We are going to collect data from the kaggle dataset, and copy it into splitfolders and let split folders split it into 3 part inside the <code>data</code> folder. Next part is initialize the image data generator for easier training with image.

In [2]:
file_path='/kaggle/input/concrete-crack-images-for-classification'
splitted_folder='data'
if not os.path.exists(splitted_folder):
   os.makedirs(splitted_folder)
else:
  # Deleting an non-empty folder
  shutil.rmtree(splitted_folder, ignore_errors=True)
  print("Deleted '%s' directory successfully" % splitted_folder)

  os.makedirs(splitted_folder)

def train_test_valid(images_folder=file_path,splitted_folder=splitted_folder):
  input_folder = images_folder
  output_folder = splitted_folder
  splitfolders.ratio(input_folder,output_folder, seed = 1337,ratio=(.8, 0.1,0.1), group_prefix = None)
train_test_valid()

Copying files: 40000 files [03:08, 212.40 files/s]


In [3]:
data_generator = ImageDataGenerator(
    preprocessing_function=preprocess_input,
)

train_generator = data_generator.flow_from_directory(
    'data/train',
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical')

validation_generator = data_generator.flow_from_directory(
    'data/val',
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical')

test_generator = data_generator.flow_from_directory(
    'data/test',
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    shuffle=False,
    class_mode='categorical')

steps_per_epoch_training = len(train_generator)
steps_per_epoch_validation = len(validation_generator)
steps_per_epoch_test = len(test_generator)

Found 32000 images belonging to 2 classes.
Found 4000 images belonging to 2 classes.
Found 4000 images belonging to 2 classes.


# Model Training and Validation

### Finally we are going to train VGG16 and ResNet50 pre-trained models

In [4]:
vgg16_model = Sequential()
vgg16_model.add(VGG16(include_top=False, pooling='avg', weights='imagenet'))
vgg16_model.add(layers.Dense(NUM_CLASS, activation='softmax'))

# since pre-trained model already trained so we dont need to train it again
vgg16_model.layers[0].trainable = False

vgg16_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
vgg16_model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 512)               14714688  
                                                                 
 dense (Dense)               (None, 2)                 1026      
                                                                 
Total params: 14,715,714
Trainable params: 1,026
Non-trainable params: 14,714,688
_________________________________________________________________


In [5]:
resnet50_model = Sequential()
resnet50_model.add(ResNet50(
    include_top=False,
    pooling='avg',
    weights='imagenet',
    ))
resnet50_model.add(layers.Dense(NUM_CLASS, activation='softmax'))

# since pre-trained model already trained so we dont need to train it again
resnet50_model.layers[0].trainable = False

resnet50_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
resnet50_model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 2048)              23587712  
                                                                 
 dense_1 (Dense)             (None, 2)                 4098      
                                                                 
Total params: 23,591,810
Trainable params: 4,098
Non-trainable params: 23,587,712
_________________________________________________________________


In [6]:
vgg16_history = vgg16_model.fit_generator(
    train_generator,
    steps_per_epoch=steps_per_epoch_training,
    epochs=EPOCHS,
    validation_data=validation_generator,
    validation_steps=steps_per_epoch_validation,
    verbose=1,
)

  vgg16_history = vgg16_model.fit_generator(


Epoch 1/2
Epoch 2/2


In [7]:
resnet50_history = resnet50_model.fit_generator(
    train_generator,
    steps_per_epoch=steps_per_epoch_training,
    epochs=EPOCHS,
    validation_data=validation_generator,
    validation_steps=steps_per_epoch_validation,
    verbose=1,
)

  resnet50_history = resnet50_model.fit_generator(


Epoch 1/2
Epoch 2/2


In [8]:
vgg16_model.save('classifier_vgg16_model.h5')
resnet50_model.save('classifier_resnet50_model.h5')

# Model Testing

### Final part is testing our model how it perform with data that it not has been trained on

In [9]:
resnet = tf.keras.models.load_model('classifier_resnet50_model.h5')
vgg = tf.keras.models.load_model('classifier_vgg16_model.h5')

Print model accuracy

In [10]:
resnet_scores = resnet.evaluate_generator(test_generator)
print("%s%s: %.2f%%" % ("resnet ",resnet.metrics_names[1], resnet_scores[1]*100))

vgg_scores = vgg.evaluate_generator(test_generator)
print("%s%s: %.2f%%" % ("vgg ",vgg.metrics_names[1], vgg_scores[1]*100))

  resnet_scores = resnet.evaluate_generator(test_generator)


resnet accuracy: 99.90%


  vgg_scores = vgg.evaluate_generator(test_generator)


vgg accuracy: 99.67%


Predict test dataset

In [11]:
resnet_predict = resnet.predict_generator(test_generator,steps=steps_per_epoch_test,verbose=1)
vgg_predict = vgg.predict_generator(test_generator,steps=steps_per_epoch_test,verbose=1)

  resnet_predict = resnet.predict_generator(test_generator,steps=steps_per_epoch_test,verbose=1)




  vgg_predict = vgg.predict_generator(test_generator,steps=steps_per_epoch_test,verbose=1)




Print 10 random dataset prediction result

In [12]:
result_vgg = vgg_predict.argmax(axis=-1)
for i in range(10):
    if result_vgg[random.randint(0,4000)] == 0:
        print("Negative") 
    else:
        print("Positive")


Negative
Positive
Negative
Positive
Positive
Negative
Negative
Positive
Negative
Positive


In [13]:
result_resnet = resnet_predict.argmax(axis=-1)
for i in range(10):
    if result_resnet[random.randint(0,4000)] == 0:
        print("Negative") 
    else:
        print("Positive")


Negative
Negative
Negative
Positive
Negative
Negative
Negative
Negative
Negative
Negative


<hr>

Copyright &copy; 2020 [IBM Developer Skills Network](https://cognitiveclass.ai/?utm_source=bducopyrightlink&utm_medium=dswb&utm_campaign=bdu). This notebook and its source code are released under the terms of the [MIT License](https://bigdatauniversity.com/mit-license/).