In [1]:
import pandas as pd
import numpy as np 
import itertools
import keras
from keras.applications.resnet50 import ResNet50
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img 
from keras.models import Sequential 
from keras import optimizers
from keras.preprocessing import image
from keras.layers import Dropout, Flatten, Dense, LeakyReLU 
from keras import applications  
from keras.utils.np_utils import to_categorical
from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard, ReduceLROnPlateau
from keras import backend as K

import matplotlib.pyplot as plt 
import matplotlib.image as mpimg
%matplotlib inline
import math  
import datetime
import time

Using TensorFlow backend.


In [2]:
#Default dimensions we found online
img_width, img_height = 224, 224  
   
#Create a bottleneck file
top_model_weights_path = 'bottleneck_fc_model-resnet50.h5' 

# loading up our datasets
train_data_dir = 'storage/train'  
validation_data_dir = 'storage/valid'
test_data_dir = 'storage/test'
   
# number of epochs to train top model  
epochs = 15 #this has been changed after multiple model run  
# batch size used by flow_from_directory and predict_generator  
batch_size = 50

In [3]:
resnet50 = ResNet50(include_top=False, weights='imagenet')



In [4]:
datagen = ImageDataGenerator(rescale=1. / 255,
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)  

In [None]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

start = datetime.datetime.now()

generator = datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode=None,
    shuffle=False)

nb_train_samples = len(generator.filenames)
num_classes = len(generator.class_indices)

predict_size_train = int(math.ceil(nb_train_samples / batch_size))

bottleneck_features_train = resnet50.predict_generator(generator, predict_size_train)

np.save('bottleneck_features_train-resnet50.npy', bottleneck_features_train)

end= datetime.datetime.now() 
elapsed= end-start 
print ('Time: ', elapsed)

Found 3867 images belonging to 6 classes.




Time:  0:08:13.529673


In [None]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

start = datetime.datetime.now() 

datagen = ImageDataGenerator(rescale=1. / 255)

generator = datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode=None,
    shuffle=False)

nb_validation_samples = len(generator.filenames)

predict_size_validation = int(math.ceil(nb_validation_samples / batch_size))

bottleneck_features_validation = resnet50.predict_generator(
generator, predict_size_validation)

np.save('bottleneck_features_validation-resnet50.npy', bottleneck_features_validation)

end= datetime.datetime.now() 
elapsed= end-start 
print ('Time: ', elapsed)

Found 1030 images belonging to 6 classes.
Time:  0:02:24.318270


In [None]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

start = datetime.datetime.now()

datagen = ImageDataGenerator(rescale=1. / 255)

generator = datagen.flow_from_directory(
    test_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode=None,
    shuffle=False)

nb_test_samples = len(generator.filenames)

predict_size_test = int(math.ceil(nb_test_samples / batch_size))

bottleneck_features_test = resnet50.predict_generator(
generator, predict_size_test)

np.save('bottleneck_features_test-resnet50.npy', bottleneck_features_test) 

end= datetime.datetime.now() 
elapsed= end-start 
print ('Time: ', elapsed)

Found 1018 images belonging to 6 classes.


  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))


Time:  0:02:22.560553


In [3]:
datagen_top = ImageDataGenerator(rescale=1./255)

In [4]:
#training data
generator_top = datagen_top.flow_from_directory(  
         train_data_dir,  
         target_size=(img_width, img_height),  
         batch_size=batch_size,  
         class_mode='categorical',  
         shuffle=False)  
   
nb_train_samples = len(generator_top.filenames)  
num_classes = len(generator_top.class_indices)  
   
# load the bottleneck features saved earlier  
train_data = np.load('bottleneck_features_train-resnet50.npy')  
   
# get the class lebels for the training data, in the original order  
train_labels = generator_top.classes  
   
# convert the training labels to categorical vectors  
train_labels = to_categorical(train_labels, num_classes=num_classes)

Found 3867 images belonging to 6 classes.


In [5]:
#validation data
generator_top = datagen_top.flow_from_directory(  
         validation_data_dir,  
         target_size=(img_width, img_height),  
         batch_size=batch_size,  
         class_mode=None,  
         shuffle=False)  
   
nb_validation_samples = len(generator_top.filenames)  
   
validation_data = np.load('bottleneck_features_validation-resnet50.npy')  
   

validation_labels = generator_top.classes  
validation_labels = to_categorical(validation_labels, num_classes=num_classes)

Found 1030 images belonging to 6 classes.


In [6]:
#testing data
generator_top = datagen_top.flow_from_directory(  
         test_data_dir,  
         target_size=(img_width, img_height),  
         batch_size=batch_size,  
         class_mode=None,  
         shuffle=False)  
   
nb_test_samples = len(generator_top.filenames)  
   
test_data = np.load('bottleneck_features_test-resnet50.npy')  
   

test_labels = generator_top.classes  
test_labels = to_categorical(test_labels, num_classes=num_classes)

Found 1018 images belonging to 6 classes.


In [7]:
K.set_session(K.tf.Session(config=K.tf.ConfigProto(intra_op_parallelism_threads=60, 
                                                   inter_op_parallelism_threads=60)))

In [8]:
NAME = 'ResNet50 CNN-{}'.format(int(time.time()))

#This is the best model we found. For additional models, check out I_notebook.ipynb
start = datetime.datetime.now()
model = Sequential()

model.add(Flatten(input_shape=train_data.shape[1:]))

model.add(Dense(256, activation='linear'))
model.add(LeakyReLU(alpha=0.3))
model.add(Dropout(0.5))
model.add(Dense(128, activation='linear'))
model.add(LeakyReLU(alpha=0.3))
model.add(Dropout(0.5))  
model.add(Dense(64, activation='linear'))
model.add(LeakyReLU(alpha=0.3))
model.add(Dropout(0.3)) 
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4),
              metrics=['acc'])  

tensorboard = TensorBoard(log_dir='logs/{}'.format(NAME))
reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.1, patience=5)
checkpoint = ModelCheckpoint(filepath='xception.h5', monitor='val_acc', save_best_only=True)
early_stop = EarlyStopping(monitor='acc', patience=2)

history = model.fit(train_data, train_labels,
                    epochs=70,
                    batch_size=batch_size,
                    validation_data=(validation_data, validation_labels),
                    validation_split=0.2,
                    callbacks=[tensorboard, reduce_lr, checkpoint, early_stop]) 

model.save_weights(top_model_weights_path)  

(eval_loss, eval_accuracy) = model.evaluate(  
 validation_data, validation_labels, batch_size=batch_size, verbose=1)

print("[INFO] accuracy: {:.2f}%".format(eval_accuracy * 100))  
print("[INFO] Loss: {}".format(eval_loss))  
end= datetime.datetime.now()
elapsed= end-start
print ('Time: ', elapsed)

Train on 3867 samples, validate on 1030 samples
Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
[INFO] accuracy: 39.03%
[INFO] Loss: 1.5455994675460372
Time:  0:03:45.768863


In [None]:
model.summary()

In [None]:

#Graphing our training and validation
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'r', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('ResNet 50 Training and validation accuracy')
plt.ylabel('accuracy')  
plt.xlabel('epoch')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'r', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('ResNet50 Training and validation loss')
plt.ylabel('loss')  
plt.xlabel('epoch')
plt.legend()
plt.show()