In [1]:
# extract features with pretrained models
# then use teh extracted features as input to train a simple network
import os
import numpy as np
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import Xception, VGG19, ResNet50, InceptionResNetV2

# target imgage dimension
img_w = 300
img_h = 225 
batch_size = 20
data_directory = './data_gen/train'
datagen_seed = 1987
num_classes = 120
which_model = 'inception_resnet_v2'

if which_model == 'resnet50':
    model = keras.applications.resnet50
    MODEL = keras.applications.ResNet50
elif which_model == 'xception':
    model = keras.applications.xception
    MODEL = keras.applications.Xception
elif which_model == 'inception_resnet_v2':
    model = keras.applications.inception_resnet_v2
    MODEL = keras.applications.InceptionResNetV2
else:
    model = keras.applications.vgg19
    MODEL = keras.applications.VGG19


conv_base = MODEL(weights='imagenet',
                  include_top=False, 
                  input_shape=(img_w, img_h, 3))

conv_base.summary()

output_w = conv_base.output.shape[1]
output_h = conv_base.output.shape[2]
output_f = conv_base.output.shape[3]


Using TensorFlow backend.


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 300, 225, 3)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 149, 112, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 149, 112, 32) 96          conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 149, 112, 32) 0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
conv2d_2 (

In [2]:
# generate train, validation and test sets
data_generator = ImageDataGenerator(validation_split=0.30)

train_generator = \
    data_generator.flow_from_directory(data_directory, 
                                       target_size=(img_w, img_h), 
                                       shuffle=True, 
                                       seed=datagen_seed,
                                       class_mode='categorical', 
                                       batch_size=batch_size, 
                                       subset="training")

validation_generator = \
    data_generator.flow_from_directory(data_directory, 
                                       target_size=(img_w, img_h), 
                                       shuffle=True, 
                                       seed=datagen_seed,
                                       class_mode='categorical', 
                                       batch_size=batch_size, 
                                       subset="validation")

Found 7213 images belonging to 120 classes.
Found 3009 images belonging to 120 classes.


In [3]:
features_dir = './features/'+which_model+'/'

def mkdirIfNotExist(directory):
    if not os.path.exists(directory):
        os.mkdir(directory)
    return directory

mkdirIfNotExist(features_dir)

num_train_examples = 7000
num_validation_examples = 3000


def extrac_features(directory, sample_count, generator):
    features = np.zeros(shape=(sample_count, output_w, output_h, output_f))
    labels = np.zeros(shape=(sample_count, num_classes))
    i = 0
    for inputs_batch, labels_batch in generator:
        print((i+1) * batch_size, '/', sample_count)
        inputs_batch = model.preprocess_input(inputs_batch)
        features_batch = conv_base.predict(inputs_batch)
        features[i * batch_size : (i + 1) * batch_size] = features_batch
        labels[i * batch_size : (i + 1) * batch_size] = labels_batch
        i += 1
        if i * batch_size >= sample_count:
            break
    
    return features, labels
    
    
train_features, train_labels = \
    extrac_features(data_directory, num_train_examples, train_generator) 
validation_features, validation_labels = \
    extrac_features(data_directory, num_validation_examples, validation_generator)   


print('train features: ', train_features.shape)
print('validation features: ', validation_features.shape)

# save features for later use
np.save(features_dir+'train_features.npy', train_features)
np.save(features_dir+'validation_features.npy', validation_features)
np.save(features_dir+'train_labels.npy', train_labels)
np.save(features_dir+'validation_labels.npy', validation_labels)

print('completed...')

20 / 7000
40 / 7000
60 / 7000
80 / 7000
100 / 7000
120 / 7000
140 / 7000
160 / 7000
180 / 7000
200 / 7000
220 / 7000
240 / 7000
260 / 7000
280 / 7000
300 / 7000
320 / 7000
340 / 7000
360 / 7000
380 / 7000
400 / 7000
420 / 7000
440 / 7000
460 / 7000
480 / 7000
500 / 7000
520 / 7000
540 / 7000
560 / 7000
580 / 7000
600 / 7000
620 / 7000
640 / 7000
660 / 7000
680 / 7000
700 / 7000
720 / 7000
740 / 7000
760 / 7000
780 / 7000
800 / 7000
820 / 7000
840 / 7000
860 / 7000
880 / 7000
900 / 7000
920 / 7000
940 / 7000
960 / 7000
980 / 7000
1000 / 7000
1020 / 7000
1040 / 7000
1060 / 7000
1080 / 7000
1100 / 7000
1120 / 7000
1140 / 7000
1160 / 7000
1180 / 7000
1200 / 7000
1220 / 7000
1240 / 7000
1260 / 7000
1280 / 7000
1300 / 7000
1320 / 7000
1340 / 7000
1360 / 7000
1380 / 7000
1400 / 7000
1420 / 7000
1440 / 7000
1460 / 7000
1480 / 7000
1500 / 7000
1520 / 7000
1540 / 7000
1560 / 7000
1580 / 7000
1600 / 7000
1620 / 7000
1640 / 7000
1660 / 7000
1680 / 7000
1700 / 7000
1720 / 7000
1740 / 7000
1760 / 70