In [1]:
from keras.applications import VGG16
from keras.applications import imagenet_utils
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img
from sklearn.preprocessing import LabelEncoder
from pyimagesearch.io import HDF5DatasetWriter
from imutils import paths
import numpy as np
import progressbar
import random
import os

Using TensorFlow backend.


In [2]:
bs = 32
imagePaths = list(paths.list_images("datasets/animals"))

In [3]:
random.shuffle(imagePaths)
labels = [p.split(os.path.sep)[-2] for p in imagePaths]

In [4]:
le = LabelEncoder()
labels = le.fit_transform(labels)

In [5]:
model = VGG16(weights="imagenet", include_top=False)

In [6]:
dataset = HDF5DatasetWriter((len(imagePaths), 512 * 7 * 7), 
                            'datasets/animals/hdf5/features.hdf5',
                           dataKey='features',
                           bufSize=1000)
dataset.storeClassLabels(le.classes_)

In [7]:
widgets = ["Extracting Features: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA()]
pbar = progressbar.ProgressBar(maxval=len(imagePaths), widgets=widgets).start()

Extracting Features:   0% |                                    | ETA:  --:--:--

In [8]:
for i in np.arange(0, len(imagePaths), bs):
    batchPaths = imagePaths[i:i + bs]
    batchLabels = labels[i:i + bs]
    batchImages = []
    for (j, imagePath) in enumerate(batchPaths):
        image = load_img(imagePath, target_size=(224, 224))
        image = img_to_array(image)
        
        image = np.expand_dims(image, axis=0)
        image = imagenet_utils.preprocess_input(image)
        batchImages.append(image)
    batchImages = np.vstack(batchImages)
    features = model.predict(batchImages, batch_size=bs)
    features = features.reshape((features.shape[0], 512 * 7 * 7))
    dataset.add(features, batchLabels)
    pbar.update(i)
        
dataset.close()
pbar.finish()

Extracting Features: 100% |#####################################| Time: 0:00:39
