# MobileNetV2

In [1]:
from keras.models import Model
from keras.applications import MobileNetV2 as CNN
from keras.applications.mobilenetv2 import preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Flatten
import time

t1 = time.time()

model = CNN(include_top=True)
model.summary()

Using TensorFlow backend.


Downloading data from https://github.com/JonathanCMitchell/mobilenet_v2_keras/releases/download/v1.1/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224.h5
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 112, 112, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 112, 112, 32) 128         Conv1[0][0]                      
__________________________________________________________________________________________________
Conv1_relu (ReLU)               (None, 112, 112,

In [2]:
x = Flatten()(model.layers[-3].output)
model_sliced = Model(inputs=model.input, outputs=[x])
model_sliced.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 112, 112, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 112, 112, 32) 128         Conv1[0][0]                      
__________________________________________________________________________________________________
Conv1_relu (ReLU)               (None, 112, 112, 32) 0           bn_Conv1[0][0]                   
__________________________________________________________________________________________________
expanded_c

In [3]:
img_gen = ImageDataGenerator(preprocessing_function=preprocess_input)
TARGET_SIZE = (224, 224)
BATCH_SIZE = 64

IMG_DIR_QUERY = '/Users/tanuj.jain/Documents/dedup-data/Transformed_dataset_copy/Query_copy'
IMG_DIR_RETRIEVALS = '/Users/tanuj.jain/Documents/dedup-data/Transformed_dataset_copy/Retrieval_copy'

t1 = time.time()

img_batches_rets = img_gen.flow_from_directory(
    directory=IMG_DIR_RETRIEVALS,
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    color_mode='rgb',
    shuffle=False
)

img_batches_query = img_gen.flow_from_directory(
    directory=IMG_DIR_QUERY,
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    color_mode='rgb',
    shuffle=False
)

feat_vecs_rets = model_sliced.predict_generator(img_batches_rets, len(img_batches_rets), verbose=1)
feat_vecs_query = model_sliced.predict_generator(img_batches_query, len(img_batches_query), verbose=1)

print(time.time() - t1)

Found 12750 images belonging to 1 classes.
Found 2550 images belonging to 1 classes.
1784.0689358711243


In [4]:
file_mapping_rets = dict(zip(range(len(img_batches_rets.filenames)), img_batches_rets.filenames))
file_mapping_query = dict(zip(range(len(img_batches_query.filenames)), img_batches_query.filenames))

In [5]:
# for each query, get the cosine distance from each retrieval image

In [6]:
import numpy as np
from numpy.linalg import norm

In [7]:
def get_normalized_matrix(x):
    x_norm_per_row = norm(x, axis=1)
    x_norm_per_row = x_norm_per_row[:, np.newaxis] # adding another axis
    x_norm_per_row_tiled = np.tile(x_norm_per_row, (1, x.shape[1]))
    x_normalized = x/x_norm_per_row_tiled 
    return x_normalized

In [8]:
feat_vecs_query_norm = get_normalized_matrix(feat_vecs_query)
feat_vecs_rets_norm = get_normalized_matrix(feat_vecs_rets)

print(time.time() - t1)

1804.8574228286743


In [9]:
dist_vec = np.dot(feat_vecs_query_norm, feat_vecs_rets_norm.T)
print(time.time() - t1)

1821.7132318019867


In [10]:
dist_vec.shape

(2550, 12750)

In [11]:
def get_matches_above_threshold(row, thresh):
    valid_inds = np.where(row >= thresh)[0]
    valid_vals = row[valid_inds]
    return valid_inds, valid_vals

In [12]:
dict_ret = {}

for i in range(dist_vec.shape[0]):
    valid_inds, valid_vals = get_matches_above_threshold(dist_vec[i, :], 0.83)
    retrieved_files = [file_mapping_rets[j] for j in valid_inds]
    query_name = file_mapping_query[i]
    dict_ret[query_name] = dict(zip(retrieved_files, valid_vals))

print(time.time() - t1)

1821.919733762741


In [13]:
dict_ret['Query/ukbench00147.jpg']

{'Retrieval/ukbench00147_resize.jpg': 0.8359516}