# Import data

In [38]:
%matplotlib inline

import glob
import os
import numpy as np
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from keras import optimizers
from keras import applications
from keras.utils.np_utils import to_categorical 
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import seaborn as sns

sns.set()

In [2]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [3]:
# Dimensions of our images.
img_width, img_height = 240, 240

# n_classes = 6451
# train_data_dir = 'data/train'
# valid_data_dir = 'data/valid'
n_classes = 46
train_data_dir = 'data/train_sample'
valid_data_dir = 'data/valid_sample'

In [4]:
epochs = 100
batch_size = 32

# train_samples = 859486
# valid_samples = 294058
train_samples = 4911
valid_samples = 1690

In [5]:
# train_datagen = ImageDataGenerator(
#         rescale=1. / 255,      # Normalize pixel values to [0,1]
#         shear_range=0.2,       # Randomly applies shearing transformation
#         zoom_range=0.2,        # Randomly applies shearing transformation
#         horizontal_flip=True)  # Randomly flip the images

train_datagen = ImageDataGenerator(rescale=1./255)

In [6]:
# train_generator = train_datagen.flow_from_directory(
#         train_data_dir,
#         target_size=(img_width, img_height),
#         batch_size=batch_size)

train_generator_bottleneck = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)

Found 4911 images belonging to 46 classes.


In [7]:
datagen = ImageDataGenerator(rescale=1. / 255)

In [8]:
# valid_generator = datagen.flow_from_directory(
#         valid_data_dir,
#         target_size=(img_width, img_height),
#         batch_size=batch_size)

valid_generator_bottleneck = datagen.flow_from_directory(
        valid_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)

Found 1690 images belonging to 46 classes.


# Loading VGG 16 models

In [9]:
model_vgg = applications.VGG16(include_top=False, weights='imagenet')

In [62]:
bottleneck_features_train = model_vgg.predict_generator(train_generator_bottleneck)
np.save(open('models/bottleneck_features_train.npy', 'wb'), bottleneck_features_train)

In [63]:
bottleneck_features_validation = model_vgg.predict_generator(valid_generator_bottleneck)
np.save(open('models/bottleneck_features_validation.npy', 'wb'), bottleneck_features_validation)

In [64]:
num_classes = len(train_generator_bottleneck.class_indices)

train_data = np.load(open('models/bottleneck_features_train.npy', 'rb'))
train_labels = to_categorical(train_generator_bottleneck.classes[:train_samples], num_classes=num_classes)

validation_data = np.load(open('models/bottleneck_features_validation.npy', 'rb'))
validation_labels = to_categorical(valid_generator_bottleneck.classes[:valid_samples], num_classes=num_classes)

In [65]:
print train_data.shape
print train_labels.shape
print validation_data.shape
print validation_labels.shape
print len(train_generator_bottleneck.class_indices)

(4911, 7, 7, 512)
(4911, 46)
(1690, 7, 7, 512)
(1690, 46)
46


In [66]:
model_top = Sequential()
model_top.add(Flatten(input_shape=train_data.shape[1:]))
model_top.add(Dense(512, activation='relu'))
model_top.add(Dropout(0.5))
model_top.add(Dense(n_classes, activation='softmax'))

model_top.compile(loss='categorical_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])

# Build Model

In [29]:
# model = Sequential()
# model.add(Convolution2D(32, (3, 3), input_shape=(img_width, img_height, 3)))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))

# model.add(Convolution2D(32, (3, 3)))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))

# model.add(Convolution2D(64, (3, 3)))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))

# model.add(Flatten())
# model.add(Dense(512))
# model.add(Activation('relu'))
# model.add(Dropout(0.5))
# model.add(Dense(n_classes))
# model.add(Activation('softmax'))

In [30]:
# model.compile(loss='categorical_crossentropy',
#               optimizer='rmsprop',
#               metrics=['accuracy'])

# Training

In [67]:
# Model saving callback
# checkpointer = ModelCheckpoint(filepath='models/data_augmentation.h5', monitor='val_acc', verbose=1, save_best_only=True)
checkpointer = ModelCheckpoint(filepath='models/bottleneck_features.h5', monitor='val_acc', verbose=1, save_best_only=True)

# Early stopping
early_stopping = EarlyStopping(monitor='val_acc', verbose=1, patience=5)

# history = model.fit_generator(
#         train_generator,
#         steps_per_epoch=train_samples // batch_size,
#         epochs=epochs,
#         callbacks=[checkpointer, early_stopping],
#         verbose=2,
#         validation_data=valid_generator,
#         validation_steps=validation_samples // batch_size,)

history = model_top.fit(
        train_data,
        train_labels,
        verbose=2,
        epochs=epochs, 
        batch_size=batch_size,
        callbacks=[checkpointer, early_stopping],
        validation_data=(validation_data, validation_labels))

Train on 4911 samples, validate on 1690 samples
Epoch 1/100
 - 6s - loss: 3.5072 - acc: 0.5266 - val_loss: 1.4123 - val_acc: 0.6686

Epoch 00001: val_acc improved from -inf to 0.66864, saving model to models/bottleneck_features.h5
Epoch 2/100
 - 5s - loss: 1.2063 - acc: 0.7076 - val_loss: 0.9131 - val_acc: 0.7734

Epoch 00002: val_acc improved from 0.66864 to 0.77337, saving model to models/bottleneck_features.h5
Epoch 3/100
 - 5s - loss: 0.8789 - acc: 0.7772 - val_loss: 0.7612 - val_acc: 0.8065

Epoch 00003: val_acc improved from 0.77337 to 0.80651, saving model to models/bottleneck_features.h5
Epoch 4/100
 - 5s - loss: 0.7037 - acc: 0.8169 - val_loss: 0.9024 - val_acc: 0.8006

Epoch 00004: val_acc did not improve from 0.80651
Epoch 5/100
 - 5s - loss: 0.5623 - acc: 0.8454 - val_loss: 0.7099 - val_acc: 0.8491

Epoch 00005: val_acc improved from 0.80651 to 0.84911, saving model to models/bottleneck_features.h5
Epoch 6/100
 - 5s - loss: 0.4854 - acc: 0.8733 - val_loss: 0.5850 - val_acc:

# Evaluation

In [72]:
# model.evaluate_generator(validation_generator, validation_samples)
model_top.evaluate(validation_data, validation_labels)



[0.7823642365647788, 0.8934911239782029]

In [None]:
fig, ax = plt.subplots()
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()
#fig.savefig('../images/data_augmentation.svg', format='svg', dpi=1200)