# Results 2
In order to get better result queries are croped on their center with a (500,500) window.   
It gives a **51%** 10-accuracy result.

In [1]:
import os
import re

import tensorflow as tf
import tensorflow.python.platform
from tensorflow.python.platform import gfile
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pickle
import io

to_ignore = [53,89,110,120,127,131,144,167,159,190]

## Load Dataset
### Catalogue Images

In [2]:
cat_dir = 'db/robes/cat/'
list_cat = [cat_dir+f for f in os.listdir(cat_dir) if re.search('jpg|JPG', f)]
list_cat = list(filter(lambda x: "_0" in x, list_cat))
list_cat.sort(key=lambda x: int(x.split("/")[-1].split("_")[0]))
print(str(len(list_cat))+" items in the catalogue")

210 items in the catalogue


In [4]:
### Query Images
query_dir = 'db/robes/mod/'
list_query = [query_dir+f for f in os.listdir(query_dir) if re.search('jpg|JPG', f)]
list_query.sort(key=lambda x: int(x.split("/")[-1].split(".")[0]))
print(str(len(list_cat))+" queries to perform")

210 queries to perform


## Load Feature Extractor

In [6]:
model_dir = 'models'
def create_graph():
    with gfile.FastGFile(os.path.join(model_dir, 'classify_image_graph_def.pb'), 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        _ = tf.import_graph_def(graph_def, name='')
create_graph()
sess = tf.Session()

## Feature Extraction Routine

In [8]:
#Returns the feature vectors corresponding to a list of image
#This function does the centered crop
def extract_features(list_images,p=False):
    nb_features = 2048
    features = np.zeros((len(list_images),nb_features))

    labels = []

    next_to_last_tensor = sess.graph.get_tensor_by_name('pool_3:0')

    for ind, image in enumerate(list_images):
        if ind in to_ignore:
            if p:
                print('Ignore item %d'%(ind))
                print('Feature vector:', features[ind,:])
            continue
        if (ind%1 == 0) and p:
            print('Processing %s...' % (image))
        if not gfile.Exists(image):
            tf.logging.fatal('File does not exist %s', image)

        image_data = gfile.FastGFile(image, 'rb').read()
        image_data_tensor = tf.image.decode_jpeg(image_data)
        #Croping operation
        image_data_croped = tf.image.resize_image_with_crop_or_pad(image_data_tensor,  500, 500)
        image_data_modified = tf.image.encode_jpeg(image_data_croped)
        predictions = sess.run(next_to_last_tensor,{'DecodeJpeg/contents:0': sess.run(image_data_modified)})
        features[ind,:] = np.squeeze(predictions)
        if p:
            print('Feature vector:', features[ind,:])
        labels.append(io.BytesIO(image_data))
    print("Done")
    return features

## Computing Features

In [9]:
cat_features = extract_features(list_cat)

Done


In [10]:
query_features = extract_features(list_query)

Done


In [11]:
#Safety check
len(cat_features),len(query_features)

(210, 210)

## Computing Results
For each queries, the best matching items in the catalogue, according to cosine similarity, are found.

In [12]:
#Cosine Similarity
def sim(vecA,vecB):
    return vecA.dot(vecB)
#Perform the best matching retrieval
#Accuracy: what it means to match, by default it is to be in the top 10 closest
#p: print debug
def query(i_query,accuracy=10,p=False):
    sim_vec = []
    for i in range(len(cat_features)):
        sim_vec.append(sim(query_features[i_query],cat_features[i]))
    sim_vec = np.array(sim_vec)
    arg_s = sim_vec.argsort()[:-accuracy:-1]
    if p:
        print(i_query,arg_s,[sim_vec[i] for i in arg_s], sim_vec[i_query])
    return i_query in arg_s

In [13]:
#Normalization before querying
for i in range(len(cat_features)):
    if not i in to_ignore:
        cat_features[i] /= np.linalg.norm(cat_features[i])
        query_features[i] /= np.linalg.norm(query_features[i])

In [14]:
#Getting result while getting rid of ignored val
matching_frac = 0.0
ignored = 0
for i in range(len(cat_features)):
    if not i in to_ignore:
        matching_frac += query(i)
    else:
        ignored += 1
matching_frac /= (len(cat_features)-ignored)

# Result

In [16]:
print("This method gives a "+str(matching_frac*100)+" 10-accuracy success")

This method gives a 51.0 10-accuracy success


It is better but it may not look were it should either, maybe the relevant information is not in the center of the query. Also the catalogue item should be croped for better results.