<h1> Analysis of pretrained models ResNet50 and VGG16 by building classifiers for identifying faults in buildings.

In [0]:
#connect to your google drive
#from google.colab import drive 
#drive.mount('/content/gdrive')

In [0]:
#get dataset and unzip dataset
!wget https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0321EN/data/concrete_data_week4.zip
!unzip concrete_data_week4.zip

In [0]:
#import modules
import keras
from keras.models import Sequential
from keras.layers import Dense

from keras.preprocessing.image import ImageDataGenerator

from keras.applications import VGG16
from keras.applications.vgg16 import preprocess_input as preprocess_resnet

from keras.applications import ResNet50
from keras.applications.resnet50 import preprocess_input as preprocess_vgg16

from keras.models import load_model

Using TensorFlow backend.


In [0]:
#define constants
num_classes = 2
image_resize = 224
batch_size_training = 100
batch_size_validation = 100

<h2> Modifying ResNet Pretrained Model</h2>

In [0]:
#create imagedatagenerator
data_generator_resnet = ImageDataGenerator(preprocessing_function = preprocess_resnet)

In [0]:
#create generator for train and validation dataset
train_generator = data_generator_resnet.flow_from_directory(
    'concrete_data_week4/train',
    target_size=(image_resize, image_resize),
    batch_size=batch_size_training,
    class_mode='categorical')

validation_generator = data_generator_resnet.flow_from_directory(
    'concrete_data_week4/valid',
    target_size = (image_resize, image_resize),
    batch_size = 100,
    class_mode = 'categorical'  
)

Found 30001 images belonging to 2 classes.
Found 9501 images belonging to 2 classes.


In [0]:
#modify the resnet50 pretrained model
model = Sequential()

model.add(ResNet50(
    include_top=False,
    pooling='avg',
    weights='imagenet',
    ))
model.add(Dense(num_classes, activation='softmax'))

model.layers[0].trainable = False

In [0]:
#view model summary
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, 2048)              23587712  
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 4098      
Total params: 23,591,810
Trainable params: 4,098
Non-trainable params: 23,587,712
_________________________________________________________________


In [0]:
#compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [0]:
#parameters for fitting generator
steps_per_epoch_training = len(train_generator)
steps_per_epoch_validation = len(validation_generator)
num_epochs = 2

In [0]:
#fit the generator to model
fit_history = model.fit_generator(
    train_generator,
    steps_per_epoch=steps_per_epoch_training,
    epochs=num_epochs,
    validation_data=validation_generator,
    validation_steps=steps_per_epoch_validation,
    verbose=1,
)

Epoch 1/2
Epoch 2/2


In [0]:
#ave the model
model.save('classifier_resnet_model.h5')
#save model to google drive
#model.save('/content/gdrive/My Drive/keras_models/classifier_resnet_model.h5')

<h2> Modifying VGG16 model<h2>

In [0]:
#create imagedatagenerator
data_generator_vgg16 = ImageDataGenerator(preprocessing_function = preprocess_vgg16)

In [0]:
#create a training data generator
train_generator = data_generator_vgg16.flow_from_directory(
    'concrete_data_week4/train',
    target_size = (image_resize, image_resize),
    batch_size = batch_size_training,
    class_mode = 'categorical'
)

#create a validation data generator
validation_generator = data_generator_vgg16.flow_from_directory(
    'concrete_data_week4/valid',
    target_size = (image_resize, image_resize),
    batch_size = batch_size_validation,
    class_mode = 'categorical'
)

Found 30001 images belonging to 2 classes.
Found 9501 images belonging to 2 classes.


In [0]:
#modify pretrained model
model2 = Sequential()

model2.add(VGG16(
    include_top = False,
    pooling = 'avg',
    weights = 'imagenet'
))

model2.add(Dense(num_classes, activation = 'softmax'))

model2.layers[0].trainable = False

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [0]:
model2.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 512)               14714688  
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 1026      
Total params: 14,715,714
Trainable params: 1,026
Non-trainable params: 14,714,688
_________________________________________________________________


In [0]:
#compile the model
model2.compile(
    optimizer = 'adam',
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

In [0]:
#initialize epochs and steps
steps_per_epoch_training = len(train_generator)
steps_per_epoch_validation = len(validation_generator)
num_epochs = 2

In [0]:
#fit the generator to model
fit_history2 = model2.fit_generator(
    train_generator,
    steps_per_epoch = steps_per_epoch_training,
    epochs = num_epochs,
    validation_data = validation_generator,
    validation_steps = steps_per_epoch_validation
)

Epoch 1/2
Epoch 2/2


In [0]:
#save model locally
model2.save('classifier_vgg16_model.h5')
#save model to drive
#model2.save('/content/gdrive/My Drive/keras_models/classifier_vgg16_model.h5')

In [0]:
#to copy from drive
#!cp /content/gdrive/My\ Drive/keras_models/classifier_resnet_model.h5 ./classifier_resnet_model.h5

<h2>Testing resnet50 model</h2>

In [0]:
#create a test data generator
data_generator = ImageDataGenerator()

test_generator = data_generator.flow_from_directory(
    'concrete_data_week4/test',
    target_size = (image_resize, image_resize),
    shuffle = False
)

Found 500 images belonging to 2 classes.


In [0]:
model_resnet = load_model('classifier_resnet_model.h5')



In [0]:
#evaluate resnet model
eval_resnet = model_resnet.evaluate_generator(
    test_generator, 
    steps=None, 
    callbacks=None,
    max_queue_size=10,
    workers=1,
    use_multiprocessing=False,
    verbose=1)



In [0]:
print('Resnet50 model performance:')
print('loss for test dataset is : {}'.format(eval_resnet[0]))
print('accuracy for test dataset is : {}'.format(eval_resnet[1]))

Resnet50 model performance:
loss for test dataset is : 2.5136446952819824
accuracy for test dataset is : 0.5899999737739563


In [0]:
#predict using resnet model
predict_resnet = model_resnet.predict_generator(
    test_generator, 
    steps=None, 
    callbacks=None, 
    max_queue_size=10, 
    workers=1, 
    use_multiprocessing=False, 
    verbose=1)



In [0]:
resnet_predict_arr = []

for i in predict_resnet:
  if int(round(i[0])) == 1:
    resnet_predict_arr.append('Positive')
  else:
    resnet_predict_arr.append('Negative')

print('PRDICTION FOR RESNET')

for i in resnet_predict_arr[0:5]:
  print(i)

print()
print('Total positives: {}'.format(resnet_predict_arr.count('Positive')))
print('Total negatives: {}'.format(resnet_predict_arr.count('Negative')))

PRDICTION FOR RESNET
Positive
Positive
Positive
Positive
Positive

Total positives: 455
Total negatives: 45


<h2>Testing VGG16 model</h2>

In [0]:
model_vgg16 = load_model('classifier_vgg16_model.h5')



In [0]:
#evaluate vgg16 model
eval_vgg16 = model_vgg16.evaluate_generator(
    test_generator, 
    steps=None, 
    callbacks=None,
    max_queue_size=10,
    workers=1,
    use_multiprocessing=False,
    verbose=1)



In [0]:
print('VGG16 model performance:')
print('loss for test dataset is : {}'.format(eval_vgg16[0]))
print('accuracy for test dataset is : {}'.format(eval_vgg16[1]))

VGG16 model performance:
loss for test dataset is : 0.2777916193008423
accuracy for test dataset is : 0.9279999732971191


In [0]:
#predict using vgg16 model

predict_vgg16 = model_vgg16.predict_generator(
    test_generator, 
    steps=None, 
    callbacks=None, 
    max_queue_size=10, 
    workers=1, 
    use_multiprocessing=False, 
    verbose=1)



In [0]:
vgg16_predict_arr = []

for i in predict_vgg16:
  if int(round(i[0])) == 1:
    vgg16_predict_arr.append('Positive')
  else:
    vgg16_predict_arr.append('Negative')

print('PRDICTION FOR VGG16')

for i in vgg16_predict_arr[0:5]:
  print(i)

print()
print('Total positives: {}'.format(vgg16_predict_arr.count('Positive')))
print('Total negatives: {}'.format(vgg16_predict_arr.count('Negative')))

PRDICTION FOR VGG16
Positive
Positive
Positive
Positive
Positive

Total positives: 286
Total negatives: 214


<h2>In this specific case of classification, the VGG16 model performs far better than ResNet50 with test accuracy of 93% and 59% respectively.</h2>