## Image Vectorization using Pretrained Networks

In this notebook, we compute image vectors for images in the Holidays dataset against the following pretrained Keras Networks available from the [Keras model zoo](https://keras.io/applications/).


In [1]:
from __future__ import division, print_function
from scipy.misc import imresize
from keras.applications import vgg16, vgg19, inception_v3, resnet50, xception
from keras.models import Model
import matplotlib.pyplot as plt
import numpy as np
import os
%matplotlib inline

Using TensorFlow backend.


In [2]:
DATA_DIR = "/"
IMAGE_DIR = os.path.join(DATA_DIR, "pigtest_a")
filelist = os.listdir(IMAGE_DIR)
print(len(filelist))
image_names = [x for x in filelist if not (x.startswith('.'))]
print(len(image_names))

3000
3000


In [3]:
def image_batch_generator(image_names, batch_size):
    num_batches = len(image_names) // batch_size
    for i in range(num_batches):
        batch = image_names[i * batch_size : (i + 1) * batch_size]
        yield batch
    batch = image_names[(i+1) * batch_size:]
    yield batch
    
def vectorize_images(image_dir, image_size, preprocessor, 
                     model, vector_file, batch_size=32):
    filelist = os.listdir(image_dir)
    image_names = [x for x in filelist if not (x.startswith('.'))]
    num_vecs = 0
    fvec = open(vector_file, "wb")
    for image_batch in image_batch_generator(image_names, batch_size):
        batched_images = []
        for image_name in image_batch:
            image = plt.imread(os.path.join(image_dir, image_name))
            image = imresize(image, (image_size, image_size))
            batched_images.append(image)
        X = preprocessor(np.array(batched_images, dtype="float32"))
        vectors = model.predict(X)
        for i in range(vectors.shape[0]):
            if num_vecs % 100 == 0:
                print("{:d} vectors generated".format(num_vecs))
            image_vector = ",".join(["{:.5e}".format(v) for v in vectors[i].tolist()])
            fvec.write("{:s}\t{:s}\n".format(image_batch[i], image_vector))
            num_vecs += 1
    print("{:d} vectors generated".format(num_vecs))
    fvec.close()
    

## Generate Vectors using Resnet 50

In [6]:
IMAGE_SIZE = 224
VECTOR_FILE = os.path.join("/output/", "resnet-vectors-test-a.tsv")
#resnet_model = load_model('resnet50_weights_tf_dim_ordering_tf_kernels.h5')

resnet_model = resnet50.ResNet50(weights="imagenet", include_top=True)
resnet_model.summary()

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels.h5
Layer (type)                     Output Shape          Param #     Connected to                     
input_2 (InputLayer)             (None, 224, 224, 3)   0                                            
____________________________________________________________________________________________________
conv1 (Conv2D)                   (None, 112, 112, 64)  9472        input_2[0][0]                    
____________________________________________________________________________________________________
bn_conv1 (BatchNormalization)    (None, 112, 112, 64)  256         conv1[0][0]                      
____________________________________________________________________________________________________
activation_95 (Activation)       (None, 112, 112, 64)  0           bn_conv1[0][0]                   
___________________________________________________

In [7]:
model = Model(input=resnet_model.input,
             output=resnet_model.get_layer("flatten_1").output)
preprocessor = resnet50.preprocess_input

vectorize_images(IMAGE_DIR, IMAGE_SIZE, preprocessor, model, VECTOR_FILE)

  


0 vectors generated
100 vectors generated
200 vectors generated
300 vectors generated
400 vectors generated
500 vectors generated
600 vectors generated
700 vectors generated
800 vectors generated
900 vectors generated
1000 vectors generated
1100 vectors generated
1200 vectors generated
1300 vectors generated
1400 vectors generated
1500 vectors generated
1600 vectors generated
1700 vectors generated
1800 vectors generated
1900 vectors generated
2000 vectors generated
2100 vectors generated
2200 vectors generated
2300 vectors generated
2400 vectors generated
2500 vectors generated
2600 vectors generated
2700 vectors generated
2800 vectors generated
2900 vectors generated
3000 vectors generated
