In [1]:
from keras.applications import VGG16
from keras.applications import imagenet_utils
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img
from sklearn.preprocessing import LabelEncoder
from imutils import paths
import numpy as np
import progressbar
import random

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
IMG_HELP_PATH = '/Users/shankar/dev/code/ds/studies/data_science/dlcv'
import os
import sys
sys.path.append(os.path.abspath(IMG_HELP_PATH))
from common.io import HDF5DatasetWriter

In [3]:
DATASET = '../datasets/animals/images'
OUTPUT = '../datasets/animals/hdf5/features.hdf5'
BATCH_SIZE = 32
BUFFER_SIZE = 1000

In [4]:
# Grab the list of images that we'll be describing then randomly shuffle them to allow
# for easy training and testing splits via array slicing during training time
print("[INFO] loading images")
imagePaths = list(paths.list_images(DATASET))
random.shuffle(imagePaths)

[INFO] loading images


In [5]:
# Extract the class labels from the image paths then encode the labels
labels = [p.split(os.path.sep)[-2] for p in imagePaths]
le = LabelEncoder()
labels = le.fit_transform(labels)

In [6]:
print("[INFO] Loading Network")
model = VGG16(weights="imagenet", include_top=False)

[INFO] Loading Network


In [7]:
# Initialize the HDF5 dataset writer, then store the class label names in the dataset
dataset = HDF5DatasetWriter((len(imagePaths), 512*7*7), OUTPUT, dataKey='features', bufSize=BUFFER_SIZE)
dataset.storeClassLabels(le.classes_)

In [9]:
[key for key in dataset.keys()]

AttributeError: 'HDF5DatasetWriter' object has no attribute 'keys'

In [10]:
# Initialize the progress bar
widgets = ["Extracting Features: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA()]
pbar = progressbar.ProgressBar(maxval=len(imagePaths), widgets=widgets).start()

Extracting Features: N/A% |                                    | ETA:  --:--:--

In [11]:
# Loop over the images in patches
for i in np.arange(0, len(imagePaths), BATCH_SIZE):
    # Extract the batch of images and labels, then initialize the list of actual images
    # that will be passed through the network for feature extraction
    batchPaths = imagePaths[i:i + BATCH_SIZE]
    batchLabels = labels[i:i + BATCH_SIZE]
    batchImages = []
    
    # Loop over the images and labels in the current batch
    for(j, imagePath) in enumerate(batchPaths):
        # load the input image using the Keras helper utility
        # while enusring the image is resized to 224x224 pixels
        image = load_img(imagePath, target_size=(224, 224))
        image = img_to_array(image)
                         
        # Preprocess the image by (1) expanding the dimensions and (2) subtracting
        # the mean RGB pixel intensity from the ImageNet dataset
        image = np.expand_dims(image, axis=0)
        image = imagenet_utils.preprocess_input(image)
                         
        # Add the image to the batch
        batchImages.append(image)
                         
    # Pass the images through the network and use the outputs as our actual features
    batchImages = np.vstack(batchImages)
    features = model.predict(batchImages, batch_size=BATCH_SIZE)
                         
    # Reshape the features so that each image is represented by a flattened features
    # vector of the MaxPooling2D outputs
    features = features.reshape((features.shape[0], 512*7*7))
                         
    # Add the features and labels to our HDF5 dataset
    dataset.add(features, batchLabels)
    pbar.update(i)
                         


Extracting Features:  99% |################################### | ETA:   0:00:13

In [None]:
dataset.close()
pbar.finish()