<a href="https://colab.research.google.com/github/dminhq98/extract_features_image/blob/master/faiss_indexing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install annoy

Collecting annoy
[?25l  Downloading https://files.pythonhosted.org/packages/00/15/5a9db225ebda93a235aebd5e42bbf83ab7035e7e4783c6cb528c635c9afb/annoy-1.16.3.tar.gz (644kB)
[K     |▌                               | 10kB 27.1MB/s eta 0:00:01[K     |█                               | 20kB 32.3MB/s eta 0:00:01[K     |█▌                              | 30kB 35.5MB/s eta 0:00:01[K     |██                              | 40kB 38.3MB/s eta 0:00:01[K     |██▌                             | 51kB 40.1MB/s eta 0:00:01[K     |███                             | 61kB 40.6MB/s eta 0:00:01[K     |███▋                            | 71kB 41.5MB/s eta 0:00:01[K     |████                            | 81kB 41.7MB/s eta 0:00:01[K     |████▋                           | 92kB 42.2MB/s eta 0:00:01[K     |█████                           | 102kB 42.6MB/s eta 0:00:01[K     |█████▋                          | 112kB 42.6MB/s eta 0:00:01[K     |██████                          | 122kB 42.6MB/s eta 0:00

In [2]:
pip install faiss-gpu

Collecting faiss-gpu
[?25l  Downloading https://files.pythonhosted.org/packages/0a/8d/d630c7ec7ad93feed005994c9849843d33bed08cf621ffb74fe9f81a45e2/faiss_gpu-1.6.1-cp36-cp36m-manylinux2010_x86_64.whl (41.0MB)
[K     |████████████████████████████████| 41.0MB 74kB/s 
Installing collected packages: faiss-gpu
Successfully installed faiss-gpu-1.6.1


# Dataset

In [3]:
import urllib.request
urllib.request.urlretrieve('ftp://ftp.inrialpes.fr/pub/lear/douze/data/jpg1.tar.gz', 'jpg1.tar.gz')
urllib.request.urlretrieve('ftp://ftp.inrialpes.fr/pub/lear/douze/data/jpg2.tar.gz', 'jpg2.tar.gz')

('jpg2.tar.gz', <email.message.Message at 0x7fa371490780>)

In [0]:
import tarfile
my_tar = tarfile.open('jpg1.tar.gz')
my_tar.extractall() 
my_tar = tarfile.open('jpg2.tar.gz')
my_tar.extractall()
my_tar.close()

In [5]:
import urllib.request
urllib.request.urlretrieve('https://lear.inrialpes.fr/~jegou/code/eval_holidays.tgz', 'eval_holidays.tgz')

('eval_holidays.tgz', <http.client.HTTPMessage at 0x7fa36f3e1e10>)

In [0]:
my_tar = tarfile.open('eval_holidays.tgz')
my_tar.extractall()
my_tar.close()

In [0]:
PATH = '/content/jpg'
PATH_EVAL = '/content/eval_holidays'

# Extract features

In [0]:
from tqdm import tqdm
from torchvision.datasets.folder import default_loader
from torch import nn
import torchvision.models as models
import torchvision.transforms as transforms
import torch
import numpy as np
import h5py
import sys
import logging
import json
from PIL import Image
import shutil
import os
from datetime import datetime
FJoin = os.path.join

In [0]:
def load_model(name, weight=None):
    model = models.__dict__[name](pretrained=True)
    model = nn.Sequential(*list(model.children())[:-1])
    model.eval()
    return model


class ListDataset(torch.utils.data.Dataset):

    def __init__(self,
                 images_list,
                 transform=None,
                 loader=default_loader):
        self.images_list = images_list
        self.loader = loader
        self.transform = transform

    def __getitem__(self, index):
        image_path = self.images_list[index]
        image = self.loader(image_path)
        if self.transform is not None:
            image = self.transform(image)
        return image, image_path

    def __len__(self):
        return len(self.images_list)


class FeatureExtraction:
    """
    Extract features from images.
    """

    def __init__(self,model):
        self.model = model

    def _set_logging(self, logging_filepath):
        """Setup logger to log to file and stdout."""
        log_format = '%(asctime)s.%(msecs).03d: %(message)s'
        date_format = '%H:%M:%S'

        root_logger = logging.getLogger()
        root_logger.setLevel(logging.INFO)

        file_handler = logging.FileHandler(logging_filepath)
        file_handler.setFormatter(
            logging.Formatter(log_format, datefmt=date_format))
        root_logger.addHandler(file_handler)

        console_handler = logging.StreamHandler()
        console_handler.setFormatter(
            logging.Formatter(log_format, datefmt=date_format))
        root_logger.addHandler(console_handler)

        logging.info('Writing log file to %s', logging_filepath)

    def extract_features_to_disk(
            self,
            image_paths,
            output_hdf5,
            batch_size=10,
            workers=4,
            output_log=None):
        """
        Extract a specific list of images and save as HDF5 file

        Parameters
        ----------
        image_paths : list
            List image path.
        output_hdf5 : str
            Output features as HDF5 to this location.
        batch_size : int
            the number of samples that will be propagated through the network (default: 10).
        workers : int
            number of data loading workers (default: 4).
        output_log : str
            Output log file. Default: output_hdf5 + ".log" (default: None).
        """

        if output_log is None:
            output_log = output_hdf5 + '.log'
        self._set_logging(output_log)
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        model = self.model.to(device)
        # Data loading code
        normalize = transforms.Normalize(
            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        dataset = ListDataset(image_paths,
                              transforms.Compose([
                                  transforms.Resize(256),
                                  transforms.CenterCrop(224),
                                  transforms.ToTensor(),
                                  normalize,
                              ]))
        loader = torch.utils.data.DataLoader(
            dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=workers,
            pin_memory=True)
        features = {}
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        with torch.no_grad():
            for i, (input_data, paths) in enumerate(tqdm(loader)):
                input_var = torch.autograd.Variable(input_data).to(device)
                current_features = model(input_var).data.cpu().numpy()
                for j, image_path in enumerate(paths):
                    features[image_path] = current_features[j].reshape(-1, )

        feature_shape = features[list(features.keys())[0]].shape
        logging.info('Feature shape: %s' % (feature_shape,))
        logging.info('Outputting features')

        if sys.version_info >= (3, 0):
            string_type = h5py.special_dtype(vlen=str)
        else:
            string_type = h5py.special_dtype(vlen=unicode)  # noqa
        paths = features.keys()
        logging.info('Stacking features')
        features_stacked = np.vstack([features[path] for path in paths])
        logging.info('Output feature size: %s' % (features_stacked.shape,))
        with h5py.File(output_hdf5, 'a') as f:
            f.create_dataset('features', data=features_stacked)
            f.create_dataset(
                'path_images',
                (len(paths),),
                dtype=string_type)
            # For some reason, assigning the list directly causes an error, so we
            # assign it in a loop.
            for i, image_path in enumerate(paths):
                # f['image_names'][i] = image_path_to_name(image_path)
                f['path_images'][i] = image_path

    def extract_image(self, img):
        """
        Extract vector features of an image.
        """
        normalize = transforms.Normalize(
            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])
        # print(path)
        # img = Image.open(path).convert('RGB')
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        model = self.model.to(device)
        image = transform(img).unsqueeze(0).to(device)
        feature = model(image).data.cpu().numpy().reshape(-1, )
        return feature

In [0]:
class ImageList:
    """
    Get the list of image files and check them in the directory directory
    """

    def check_list_image(self, list_images):
        """
        Check a list of image files, if the image fails then move it to the error_image directory.
        """
        if not os.path.exists('error_image'):
            os.makedirs('error_image')
        for p in list_images:
            try:
                Image.open(p)
            except IOError:
                path_err = 'error_image'
                shutil.move(p, path_err)
                print("file {} error.".format(p))

    def get_list_image(self, path):
        """
        Get a list of image files in a directory.
        """
        file_list = []
        for dir, subdirs, files in os.walk(path):
            file_list.extend([FJoin(dir, f) for f in files])
        file_list = filter(lambda x: not os.path.islink(x), file_list)
        self.check_list_image(file_list)
        file_list, dir_list = [], []
        for dir, subdirs, files in os.walk(path):
            file_list.extend([FJoin(dir, f) for f in files])
            dir_list.extend([FJoin(dir, d) for d in subdirs])
        file_list = filter(lambda x: not os.path.islink(x), file_list)
        dir_list = filter(lambda x: not os.path.islink(x), dir_list)
        return list(file_list), list(dir_list)

def load_features(feature_name):
        """
        Read features, path_images from HDF5 file.
        """
        f = h5py.File(feature_name, 'r')
        features = f['features'][:]
        # print(features.shape)
        path_images = f['path_images']
        path_images = list(path_images)
        # print(len(path_images))
        return features, path_images

In [11]:
data = ImageList()
images,_ = data.get_list_image('/content/jpg')
print(images)
model = load_model('resnet50')
extract = FeatureExtraction(model)
extract.extract_features_to_disk(images,'feature.h5')

['/content/jpg/138902.jpg', '/content/jpg/141001.jpg', '/content/jpg/127500.jpg', '/content/jpg/142201.jpg', '/content/jpg/135502.jpg', '/content/jpg/128000.jpg', '/content/jpg/126200.jpg', '/content/jpg/142101.jpg', '/content/jpg/136300.jpg', '/content/jpg/125800.jpg', '/content/jpg/134603.jpg', '/content/jpg/128400.jpg', '/content/jpg/129300.jpg', '/content/jpg/126102.jpg', '/content/jpg/109300.jpg', '/content/jpg/106602.jpg', '/content/jpg/141703.jpg', '/content/jpg/104600.jpg', '/content/jpg/122500.jpg', '/content/jpg/144903.jpg', '/content/jpg/111001.jpg', '/content/jpg/149701.jpg', '/content/jpg/139300.jpg', '/content/jpg/106900.jpg', '/content/jpg/127403.jpg', '/content/jpg/109101.jpg', '/content/jpg/147400.jpg', '/content/jpg/134001.jpg', '/content/jpg/136005.jpg', '/content/jpg/141303.jpg', '/content/jpg/126805.jpg', '/content/jpg/136004.jpg', '/content/jpg/103304.jpg', '/content/jpg/132510.jpg', '/content/jpg/110701.jpg', '/content/jpg/100503.jpg', '/content/jpg/136007.jpg', 

Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/checkpoints/resnet50-19c8e357.pth


HBox(children=(IntProgress(value=0, max=102502400), HTML(value='')))




02:03:24.493: Writing log file to feature.h5.log
100%|██████████| 150/150 [02:10<00:00,  1.15it/s]
02:05:45.204: Feature shape: (2048,)
02:05:45.205: Outputting features
02:05:45.206: Stacking features
02:05:45.216: Output feature size: (1491, 2048)


# Annoy indexing

In [0]:
from annoy import AnnoyIndex


class AnnoyFeaturesIndexing:
    """
    Index features by AnnoyIndex.

    Parameters
        ----------
        feature_name : str
            Features as HDF5 to this location.
        index_name : str
            Index features as Ann to this location.
    """

    def __init__(self, feature_name, index_name):
        self.feature_name = feature_name
        self.index_name = index_name

    def load_features(self):
        """
        Read features, path_images from HDF5 file.
        """
        f = h5py.File(self.feature_name, 'r')
        features = f['features'][:]
        # print(features.shape)
        path_images = f['path_images']
        path_images = list(path_images)
        # print(len(path_images))
        return features, path_images

    def make_index(self):
        """
        Indexing for feature vectors.
        """
        features, path_images = self.load_features()
        f = len(features[0])
        # Length of item vector that will be indexed
        t = AnnoyIndex(f, 'angular')
        for i in range(len(features)):
            v = features[i]
            t.add_item(i, v)

        t.build(10)  # 10 trees
        t.save(self.index_name)
    def load_index(self):
        """
        Loading  index form index file (.ann).
        """
        features, path_images = self.load_features()
        f = len(features[0])
        u = AnnoyIndex(f, 'angular')
        u.load(self.index_name)
        return u



In [13]:
import time
annoy_index = AnnoyFeaturesIndexing('feature.h5','feature_annoy.ann')
t0 = time.time()
annoy_index.make_index()
print('Time indexing features: {}'.format(time.time()-t0))

Time indexing features: 0.48802781105041504


# Faiss Indexing

In [0]:
import faiss
class FaissFeaturesIndexing:
    """
    Index features by Faiss.

    Parameters
        ----------
        feature_name : str
            Features as HDF5 to this location.
        index_name : str
            Index features as index to this location.
    """

    def __init__(self, feature_name, index_name,use_gpu=False):
        self.feature_name = feature_name
        self.index_name = index_name
        self.use_gpu = use_gpu

    def load_features(self):
        """
        Read features, path_images from HDF5 file.
        """
        f = h5py.File(self.feature_name, 'r')
        features = f['features'][:]
        # print(features.shape)
        path_images = f['path_images']
        path_images = list(path_images)
        # print(len(path_images))
        return features, path_images

    def make_index(self, index_key="Flat",metric='METRIC_L2'):
        """
        Indexing for feature vectors.

        Parameters
        ----------
          metric : faiss::metric::MetricType
          METRIC_L2 : Euclidean L2-distance
          METRIC_INNER_PRODUCT : Inner product, also called cosine distance
        """
        features, path_images = self.load_features()
        d = len(features[0])
        # if metric :
        #   print(metric)
        #   index = faiss.index_factory(d, index_key, metric)
        # else:
        #   index = faiss.index_factory(d, index_key)
        print(metric)
        index = faiss.index_factory(d, index_key, faiss.__dict__[metric])
        if self.use_gpu:
          print('GPU')
          assert faiss.StandardGpuResources, \
              "FAISS was not compiled with GPU support, or loading _swigfaiss_gpu.so failed"
          res = faiss.StandardGpuResources()
          dev_no = 0
          # transfer to GPU (may be partial)
          index = faiss.index_cpu_to_gpu(res, dev_no, index)
          params = faiss.GpuParameterSpace()
        else:
          print('CPU')
          params = faiss.ParameterSpace()
        
        params.initialize(index)
        index.train(features)
        index.add(features)
        if self.use_gpu:
            index = faiss.index_gpu_to_cpu(index)
        
        faiss.write_index(index,self.index_name)
        
    def load_index(self):
        """
        Loading  index form index file (.index).
        """
        index = faiss.read_index(self.index_name)
        if self.use_gpu:
            assert faiss.StandardGpuResources, \
                "FAISS was not compiled with GPU support, or loading _swigfaiss_gpu.so failed"
            res = faiss.StandardGpuResources()
            dev_no = 0
            # transfer to GPU (may be partial)
            index = faiss.index_cpu_to_gpu(res, dev_no, index)

        return index

In [52]:
index_faiss_flat_cosine = FaissFeaturesIndexing('feature.h5','feature_flat_cosine.index')
t0 = time.time()
index_faiss_flat_cosine.make_index('Flat','METRIC_INNER_PRODUCT')
print('Time indexing features: {}'.format(time.time()-t0))

METRIC_INNER_PRODUCT
CPU
Time indexing features: 0.14237236976623535


In [58]:
index_faiss_flat = FaissFeaturesIndexing('feature.h5','feature_flat.index')
t0 = time.time()
index_faiss_flat.make_index('Flat')
print('Time indexing features: {}'.format(time.time()-t0))

METRIC_L2
CPU
Time indexing features: 0.14594388008117676


In [59]:
index_faiss_flat_gpu = FaissFeaturesIndexing('feature.h5','feature_flat_gpu.index',True)
t0 = time.time()
index_faiss_flat_gpu.make_index('Flat')
print('Time indexing features: {}'.format(time.time()-t0))

METRIC_L2
GPU
Time indexing features: 0.29351091384887695


In [60]:
index_faiss_ivf = FaissFeaturesIndexing('feature.h5','feature_ivf.index')
t0 = time.time()
index_faiss_ivf.make_index('IVF155,Flat')
print('Time indexing features: {}'.format(time.time()-t0))

METRIC_L2
CPU
Time indexing features: 0.5220115184783936


In [61]:
index_faiss_ivf_gpu = FaissFeaturesIndexing('feature.h5','feature_ivf_gpu.index',True)
t0 = time.time()
index_faiss_ivf_gpu.make_index('IVF155,Flat')
print('Time indexing features: {}'.format(time.time()-t0))

METRIC_L2
GPU
Time indexing features: 0.4934120178222656


In [62]:
index_faiss_pca = FaissFeaturesIndexing('feature.h5','feature_pca.index')
t0 = time.time()
index_faiss_pca.make_index('PCA64,Flat')
print('Time indexing features: {}'.format(time.time()-t0))

METRIC_L2
CPU
Time indexing features: 1.8109667301177979


In [63]:
index_faiss_pca_gpu = FaissFeaturesIndexing('feature.h5','feature_pca_gpu.index',True)
t0 = time.time()
index_faiss_pca_gpu.make_index('PCA64,Flat')
print('Time indexing features: {}'.format(time.time()-t0))

METRIC_L2
GPU
Time indexing features: 1.986217975616455


In [64]:
index_faiss_pca_ivf = FaissFeaturesIndexing('feature.h5','feature_pca_ivf.index')
t0 = time.time()
index_faiss_pca_ivf.make_index('PCA64,IVF160,Flat')
print('Time indexing features: {}'.format(time.time()-t0))

METRIC_L2
CPU
Time indexing features: 1.9771959781646729


In [65]:
index_faiss_pca_ivf_gpu = FaissFeaturesIndexing('feature.h5','feature_pca_ivf_gpu.index',True)
t0 = time.time()
index_faiss_pca_ivf_gpu.make_index('PCA64,IVF160,Flat')
print('Time indexing features: {}'.format(time.time()-t0))

METRIC_L2
GPU
Time indexing features: 1.9358184337615967


# Evaluation

In [29]:
file_name = 'perfect_result.dat'
fnam = os.path.join(PATH_EVAL,file_name)
query_list = []
with open(fnam) as f:
    for line in f:
      line = line.split(' ')[0]
      path = os.path.join(PATH,line)
      query_list.append(path)
print(query_list[:10])
print(len(query_list))

['/content/jpg/107100.jpg', '/content/jpg/147800.jpg', '/content/jpg/143000.jpg', '/content/jpg/116400.jpg', '/content/jpg/107900.jpg', '/content/jpg/124300.jpg', '/content/jpg/126000.jpg', '/content/jpg/105100.jpg', '/content/jpg/129700.jpg', '/content/jpg/115500.jpg']
500


In [30]:
extract.extract_features_to_disk(query_list,'query_list.h5')

02:12:14.298: Writing log file to query_list.h5.log
02:12:14.298: Writing log file to query_list.h5.log
100%|██████████| 50/50 [00:41<00:00,  1.58it/s]
02:12:55.816: Feature shape: (2048,)
02:12:55.816: Feature shape: (2048,)
02:12:55.817: Outputting features
02:12:55.817: Outputting features
02:12:55.820: Stacking features
02:12:55.820: Stacking features
02:12:55.825: Output feature size: (500, 2048)
02:12:55.825: Output feature size: (500, 2048)


In [0]:
class SimilaritySearch:
    """
    Search and rank image.
    """

    def __init__(self,feature_name):
        self.features, self.path_images = load_features(feature_name)
    def search_topk(self, feature,index, k=10,type='annoy'):
        """
        Retrieve the nearest k images.

        Parameters
        ----------
        img : vector
            Vector image search.
        k : int
            The nearest number of images will be returned (default: 10).
        """
        top = {}
        try:
            
            if type=='annoy':
              # knn = AnnoyFeaturesIndexing(self.feature_name, self.index_name)
              # index = knn.load_index()
              res = index.get_nns_by_vector(feature, k, include_distances=True)
              D = res[1]
              I = res[0]
            else:
              # index_faiss = FaissFeaturesIndexing(self.feature_name,self.index_name,use_gpu)
              # index = index_faiss.load_index()
              # if use_gpu:
              #   assert faiss.StandardGpuResources, \
              #     "FAISS was not compiled with GPU support, or loading _swigfaiss_gpu.so failed"
              #   res = faiss.StandardGpuResources()
              #   dev_no = 0
              #   # transfer to GPU (may be partial)
              #   index = faiss.index_cpu_to_gpu(res, dev_no, index)
              D, I = index.search(feature.reshape(1,-1), k)
              D = D[0]
              I = I[0]
        except:
            return top
        for i, idx in enumerate(I):
            kq = []
            kq.append(self.path_images[idx])
            kq.append(D[i])
            top[str(i)] = kq

        return top

In [0]:
features, path_images = load_features('feature.h5')
def query_result(index,type='faiss',input='query_list.h5',output='result_test.dat'):
  query, query_list = load_features(input)
  file_name = output
  file_name = os.path.join(PATH_EVAL,file_name)
  with open(file_name, 'w') as f:
    for i,que in enumerate(query):
      k = 10
      if type == 'faiss':
        D, I = index.search(que.reshape(1,-1), k)
      else:
        res = index.get_nns_by_vector(que, k, include_distances=True)
        I = res
      name = query_list[i]
      name = name.split('/')[-1]
      f.write(name)
      f.write(' ')
      for j,im in enumerate(I[0]):
        f.write(str(j))
        f.write(' ')
        name = path_images[im]
        name = name.split('/')[-1]
        # print(name)
        f.write(name)
        f.write(' ')
      f.write('\n')
  print("The results of 500 query  is saved in "+file_name)

In [0]:
search = SimilaritySearch('feature.h5')
query, query_list = load_features('query_list.h5')
feature_test = query[0]

In [34]:
index = annoy_index.load_index()
query_result(index,'annoy','query_list.h5','result_annoy.dat')
t0 = time.time()
search.search_topk(feature_test,index,10,'annoy')
print('Time query cpu: {}'.format(time.time()-t0))

The results of 500 query  is saved in /content/eval_holidays/result_annoy.dat
Time query cpu: 0.0010190010070800781


In [53]:
index = index_faiss_flat_cosine.load_index()
query_result(index,'faiss','query_list.h5','result_faiss_cosine.dat')
t0 = time.time()
search.search_topk(feature_test,index,10,'faiss')
print('Time query cpu: {}'.format(time.time()-t0))
index = index_faiss_flat_gpu.load_index()
t0 = time.time()
search.search_topk(feature_test,index,10,'faiss')
print('Time query gpu: {}'.format(time.time()-t0))

The results of 500 query  is saved in /content/eval_holidays/result_faiss_cosine.dat
Time query cpu: 0.004729032516479492
Time query gpu: 0.0006008148193359375


In [66]:
index = index_faiss_flat.load_index()
query_result(index,'faiss','query_list.h5','result_faiss.dat')
t0 = time.time()
search.search_topk(feature_test,index,10,'faiss')
print('Time query cpu: {}'.format(time.time()-t0))
index = index_faiss_flat_gpu.load_index()
t0 = time.time()
search.search_topk(feature_test,index,10,'faiss')
print('Time query gpu: {}'.format(time.time()-t0))

The results of 500 query  is saved in /content/eval_holidays/result_faiss.dat
Time query cpu: 0.0036628246307373047
Time query gpu: 0.0004284381866455078


In [67]:
index = index_faiss_ivf.load_index()
query_result(index,'faiss','query_list.h5','feature_ivf.dat')
t0 = time.time()
search.search_topk(feature_test,index,10,'faiss')
print('Time query cpu: {}'.format(time.time()-t0))
index = index_faiss_ivf_gpu.load_index()
t0 = time.time()
search.search_topk(feature_test,index,10,'faiss')
print('Time query gpu: {}'.format(time.time()-t0))

The results of 500 query  is saved in /content/eval_holidays/feature_ivf.dat
Time query cpu: 0.0006401538848876953
Time query gpu: 0.0005524158477783203


In [80]:
index = index_faiss_pca.load_index()
query_result(index,'faiss','query_list.h5','feature_pca.dat')
t0 = time.time()
search.search_topk(feature_test,index,10,'faiss')
print('Time query cpu: {}'.format(time.time()-t0))
index = index_faiss_pca_gpu.load_index()
t0 = time.time()
search.search_topk(feature_test,index,10,'faiss')
print('Time query gpu: {}'.format(time.time()-t0))

The results of 500 query  is saved in /content/eval_holidays/feature_pca.dat
Time query cpu: 0.0004715919494628906
Time query gpu: 0.0005762577056884766


In [69]:
index = index_faiss_pca_ivf.load_index()
query_result(index,'faiss','query_list.h5','feature_pca_ivf.dat')
t0 = time.time()
search.search_topk(feature_test,index,10,'faiss')
print('Time query cpu: {}'.format(time.time()-t0))
index = index_faiss_pca_ivf_gpu.load_index()
t0 = time.time()
search.search_topk(feature_test,index,10,'faiss')
print('Time query gpu: {}'.format(time.time()-t0))

The results of 500 query  is saved in /content/eval_holidays/feature_pca_ivf.dat
Time query cpu: 0.0006968975067138672
Time query gpu: 0.000537872314453125


In [70]:
%cd /content/eval_holidays

/content/eval_holidays


In [71]:
!python2 holidays_map.py result_annoy.dat

mAP for result_annoy.dat: 0.77607


In [72]:
!python2 holidays_map.py result_faiss_cosine.dat

mAP for result_faiss_cosine.dat: 0.34740


In [73]:
!python2 holidays_map.py result_faiss.dat

mAP for result_faiss.dat: 0.73511


In [74]:
!python2 holidays_map.py feature_ivf.dat

mAP for feature_ivf.dat: 0.64045


In [75]:
!python2 holidays_map.py feature_pca.dat

mAP for feature_pca.dat: 0.72290


In [76]:
!python2 holidays_map.py feature_pca_ivf.dat

mAP for feature_pca_ivf.dat: 0.61085


In [77]:
%cd /content

/content
