# Results 1
This experiment implements the simpliest way to use a feature extractor.   
We just compare features from the catalogue to features from the query models.   
It gives a **58%** 10-accuracy result.

In [1]:
import os
import re

import tensorflow as tf
import tensorflow.python.platform
from tensorflow.python.platform import gfile
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pickle
import io

to_ignore = [53,89,110,120,127,131,144,167,159,190]

## Load Dataset
### Catalogue Images

In [2]:
cat_dir = 'db/robes/cat/'
list_cat = [cat_dir+f for f in os.listdir(cat_dir) if re.search('jpg|JPG', f)]
list_cat = list(filter(lambda x: "_0" in x, list_cat))
list_cat.sort(key=lambda x: int(x.split("/")[-1].split("_")[0]))
print(str(len(list_cat))+" items in the catalogue")

210 items in the catalogue


### Query Images

In [3]:
query_dir = 'db/robes/mod/'
list_query = [query_dir+f for f in os.listdir(query_dir) if re.search('jpg|JPG', f)]
list_query.sort(key=lambda x: int(x.split("/")[-1].split(".")[0]))
print(str(len(list_cat))+" queries to perform")

210 queries to perform


## Load Feature Extractor

In [4]:
model_dir = 'models'
def create_graph():
    with gfile.FastGFile(os.path.join(model_dir, 'classify_image_graph_def.pb'), 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        _ = tf.import_graph_def(graph_def, name='')
create_graph()
sess = tf.Session()

## Feature Extraction Routine

In [5]:
#Returns the feature vectors corresponding to a list of image
def extract_features(list_images):
    nb_features = 2048
    features = np.zeros((len(list_images),nb_features))

    labels = []

    next_to_last_tensor = sess.graph.get_tensor_by_name('pool_3:0')

    for ind, image in enumerate(list_images):
        if ind in to_ignore:
            print('Ignore item %d'%(ind))
            print('Feature vector:', features[ind,:])
            continue
        if (ind%1 == 0):
            print('Processing %s...' % (image))
        if not gfile.Exists(image):
            tf.logging.fatal('File does not exist %s', image)

        image_data = gfile.FastGFile(image, 'rb').read()
        predictions = sess.run(next_to_last_tensor,{'DecodeJpeg/contents:0': image_data})
        features[ind,:] = np.squeeze(predictions)
        print('Feature vector:', features[ind,:])
        labels.append(io.BytesIO(image_data))

    return features

## Computing Features

In [None]:
cat_features = extract_features(list_cat)

Processing db/robes/cat/0_0.jpg...
Feature vector: [ 0.40074775  0.07182888  0.44221911 ...,  0.21143033  0.06499732
  0.02956949]
Processing db/robes/cat/1_0.jpg...
Feature vector: [ 0.35814914  0.12026802  0.25814605 ...,  1.28692031  0.08103456
  0.19878572]
Processing db/robes/cat/2_0.jpg...
Feature vector: [ 0.27762058  0.30555469  0.42113474 ...,  0.65370804  0.12258876
  0.28485855]
Processing db/robes/cat/3_0.jpg...
Feature vector: [ 0.77828979  0.33109796  0.44746646 ...,  0.17283376  0.02107467  0.        ]
Processing db/robes/cat/4_0.jpg...
Feature vector: [ 0.18785585  0.59689295  0.31479952 ...,  0.17849568  0.44833383  0.        ]
Processing db/robes/cat/5_0.jpg...
Feature vector: [ 0.06242148  0.06589238  0.08463506 ...,  0.63702101  0.15365218
  0.68643653]
Processing db/robes/cat/6_0.jpg...
Feature vector: [ 0.13530213  0.05160119  0.18522428 ...,  1.12796044  0.17820148
  0.5191716 ]
Processing db/robes/cat/7_0.jpg...
Feature vector: [ 0.07847023  0.07439131  0.497088

In [None]:
query_features = extract_features(list_query)

Processing db/robes/mod/0.jpg...
Feature vector: [ 0.32774755  0.00841935  0.58692706 ...,  0.00159362  0.05576468
  0.25206083]
Processing db/robes/mod/1.jpg...
Feature vector: [ 0.37596905  0.14460804  0.0717203  ...,  0.03579425  0.13924043
  0.38153413]
Processing db/robes/mod/2.jpg...
Feature vector: [ 0.61632144  0.27303317  0.49188358 ...,  0.1368213   0.15143654
  0.51621926]
Processing db/robes/mod/3.jpg...
Feature vector: [ 0.27229306  0.03606221  0.07427736 ...,  0.05879694  0.0418688
  0.02784685]
Processing db/robes/mod/4.jpg...
Feature vector: [ 0.27302325  0.18618497  0.20663063 ...,  0.0379703   0.18074931
  0.14269257]
Processing db/robes/mod/5.jpg...
Feature vector: [ 0.12017444  0.042256    0.37772381 ...,  0.03973707  0.13106039
  0.14159293]
Processing db/robes/mod/6.jpg...
Feature vector: [ 0.11276442  0.05175328  0.06608908 ...,  0.04222723  0.12604098
  0.07030558]
Processing db/robes/mod/7.jpg...
Feature vector: [ 0.44902319  0.16569586  0.44643599 ...,  0.0170

In [None]:
#Safety output
len(cat_features),len(query_features)

## Computing Results
We simply, for each queries, get the best matching item in the catalogue according to cosine similarity.

In [24]:
#Cosine Similarity
def sim(vecA,vecB):
    return vecA.dot(vecB)
#Perform the best matching retrieval
#Accuracy: what it means to match, by default it is to be in the top 10 closest
#p: print debug
def query(i_query,accuracy=10,p=False):
    sim_vec = []
    for i in range(len(cat_features)):
        sim_vec.append(sim(query_features[i_query],cat_features[i]))
    sim_vec = np.array(sim_vec)
    arg_s = sim_vec.argsort()[:-accuracy:-1]
    if p:
        print(i_query,arg_s,[sim_vec[i] for i in arg_s], sim_vec[i_query])
    return i_query in arg_s

In [10]:
#Normalization before querying
for i in range(len(cat_features)):
    if not i in to_ignore:
        cat_features[i] /= np.linalg.norm(cat_features[i])
        query_features[i] /= np.linalg.norm(query_features[i])

In [27]:
#Getting result while getting rid of ignored val
matching_frac = 0
ignored = 0
for i in range(len(cat_features)):
    if not i in to_ignore:
        matching_frac += query(i)
    else:
        ignored += 1
matching_frac /= (len(cat_features)-ignored)

# Result

In [28]:
print("This method gives a "+str(matching_frac)+" 10-accuracy success")

This method gives a 0.585 10-accuracy success
