In [5]:
import import_hack
from core import steps
from core.data_store.sqlite_table_datastore import SQLiteTableDataStore
from core.data_store.sqlite_table_one_to_many_datastore import SQLiteTableOneToManyDataStore
from core.data_store.file_system_directory_datastore import FileSystemDirectoryDataStore
from core.data_store.numpy_datastore import NumpyDataStore
from core.data_store.stream_ndarray_adapter_datastore import StreamNdarrayAdapterDataStore
from core.quantization.pq_quantizer import PQQuantizer, restore_from_clusters
from core.transformer.bytes_to_ndarray import BytesToNdarray
from core.transformer.ndarray_to_opencvmatrix import NdarrayToOpencvMatrix
from core.transformer.opencvmatrix_to_siftset import OpencvMatrixToSiftsSet
from core.search.exhaustive_searcher import ExhaustiveSearcher
from core.search.inverted_multi_index_searcher import InvertedMultiIndexSearcher
from core.common.ds_utils import print_ds_items_info
from core.evaluation.retrieval_perfomance import PrecisionRecallAveragePrecisionEvaluator
from core.evaluation.ground_truth import BrodatzGroundTruth
import numpy as np
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Compute descriptors(siftsets)

In [6]:
img_dir_path = r'C:\data\images\oxford\oxbuild_images'
siftsets_path='ds_data\siftid_sift_imgid'
siftsets_table_name='siftid_sift_imgid'

We can interpret table as One-to-One(id, itemid) or as One-to-Many(id, itemid, foreignid).
If we want to use it like One-to-One, use SQLiteTableDataStore. Here item = one sift.
If we want to use it like One-to-Many, use SQLiteTableOneToManyDataStore. In this case items would be grouped by foreignid and aggregated into ndarray. Here item = ndarray of sifts, e.g. ndarray with shape (*number of sifts in image*, 128)

In [7]:
images_ds = FileSystemDirectoryDataStore(dir_path=img_dir_path)
siftsets_ds = SQLiteTableOneToManyDataStore(siftsets_path, siftsets_table_name)
print("images count in '{0}': ".format(img_dir_path), images_ds.get_count())

images count in 'C:\data\images\oxford\oxbuild_images':  5063


* get items from **images_ds**
* apply sequence of transformations for each item: bytes->ndarray->opencvMatrix->siftSet
* write result in **siftsets_ds**

In [9]:
transformers_=[BytesToNdarray(), NdarrayToOpencvMatrix(), OpencvMatrixToSiftsSet()]
steps.transform_step(images_ds, transformers_, siftsets_ds)

OperationalError: unable to open database file

In [4]:
print_ds_items_info(siftsets_ds)

count of items in ds:  5063
shape of item[0]:  (5263, 128)
shape of item[1]:  (6774, 128)
shape of item[2]:  (2212, 128)


# Compute sample

To build bag-of-visual-words we need visual words -> we need clusters -> we need to quantize sifts.

We will quantize not all descriptors but sample from them

In [5]:
sifts_ds = SQLiteTableDataStore(siftsets_path, siftsets_table_name)

Here we treat table as One-to-One, so only (id,item) are sampled, foreignid is not considered.

In [6]:
print_ds_items_info(sifts_ds)

count of items in ds:  17814955
shape of item[0]:  (128,)
shape of item[1]:  (128,)
shape of item[2]:  (128,)


In [69]:
with sifts_ds:
    sample_part=(5000000/sifts_ds.get_count())
sample_path='ds_data\siftid_sift_sample{0}'.format(sample_part)
sample_table_name='siftid_sift'

In [70]:
sample_ds = SQLiteTableDataStore(sample_path, sample_table_name)

In [71]:
steps.sampling_step(sifts_ds, sample_part, sample_ds)

In [72]:
print_ds_items_info(sample_ds)

count of items in ds:  5000000
shape of item[0]:  (128,)
shape of item[1]:  (128,)
shape of item[2]:  (128,)


# Quantize local descriptors

In [73]:
n_clusters=50
sift_clusters_path='ds_data\siftid_sift_sample{0}-clusters{1}'.format(sample_part, n_clusters)
sift_clusters_table_name='clusterid_cluster'
sift_clusters_ds = SQLiteTableDataStore(sift_clusters_path, sift_clusters_table_name)
quantizer = PQQuantizer(n_clusters=n_clusters, n_quantizers=2)

In [74]:
steps.quantize_step(sample_ds, quantizer, sift_clusters_ds)

In [75]:
print_ds_items_info(sift_clusters_ds)

count of items in ds:  2
shape of item[0]:  (50, 64)
shape of item[1]:  (50, 64)


# Compute bows(sifts clusters bincounts)

restore pq_quantizer from cluster centers

In [44]:
ds_clusters_ndarray_adapter = StreamNdarrayAdapterDataStore(sift_clusters_ds, detect_final_shape_by_first_elem=True)
clusters = ds_clusters_ndarray_adapter.get_items_sorted_by_ids()
pq_quantizer = restore_from_clusters(clusters)

In [45]:
siftbincount_path='ds_data\imgid_siftbincount'
siftbincount_table_name='imgid_siftbincount'

In [46]:
siftbincount_ds=SQLiteTableDataStore(siftbincount_path, siftbincount_table_name)

In [47]:
transformers=[trs.ArraysToBinCount(pq_quantizer)]
steps.transform_step(siftsets_ds, transformers, siftbincount_ds)

In [59]:
print_ds_items_info(siftbincount_ds)
print_ds_items_info(siftbincount_ds, print_shape=False)

count of items in ds:  5063
shape of item[0]:  (10000,)
shape of item[1]:  (10000,)
shape of item[2]:  (10000,)
count of items in ds:  5063
item[0]:  [0 2 0 ..., 0 0 0]
item[1]:  [1 4 0 ..., 0 0 0]
item[2]:  [0 2 0 ..., 0 0 1]


In [60]:
with siftbincount_ds:
    sift1=next(iter(siftbincount_ds.get_items_sorted_by_ids()))
    print(len(np.nonzero(sift1)[0]))
    print(np.nonzero(sift1))
    print(sift1[np.nonzero(sift1)])

2521
(array([   1,    9,   11, ..., 9986, 9993, 9995], dtype=int64),)
[2 1 2 ..., 6 7 2]


# Compute descriptors(siftsets) for queries

In [61]:
queries_img_dir_path = r'C:\data\images\oxford\queries'
queries_siftsets_path='ds_data\queries\siftid_sift_imgid'
queries_siftsets_table_name='siftid_sift_imgid'
queries_images_ds = FileSystemDirectoryDataStore(dir_path=queries_img_dir_path)
queries_siftsets_ds = SQLiteTableOneToManyDataStore(queries_siftsets_path, queries_siftsets_table_name)
print("images count in '{0}': ".format(queries_img_dir_path), queries_images_ds.get_count())

images count in 'C:\data\images\oxford\queries':  55


In [51]:
transformers_=[trs.BytesToNdarray(), trs.NdarrayToOpencvMatrix(), trs.OpencvMatrixToSiftsSet()]
steps.transform_step(queries_images_ds, transformers_, queries_siftsets_ds)

In [62]:
print_ds_items_info(queries_siftsets_ds)

count of items in ds:  55
shape of item[0]:  (2182, 128)
shape of item[1]:  (3003, 128)
shape of item[2]:  (718, 128)


# Compute bows(sifts clusters bincounts) for queries

In [53]:
ds_clusters_ndarray_adapter = StreamNdarrayAdapterDataStore(sift_clusters_ds, detect_final_shape_by_first_elem=True)
clusters = ds_clusters_ndarray_adapter.get_items_sorted_by_ids()
pq_quantizer = restore_from_clusters(clusters)

queries_siftbincount_path='ds_data\queries\imgid_siftbincount'
queries_siftbincount_table_name='imgid_siftbincount'
queries_siftbincount_ds=SQLiteTableDataStore(queries_siftbincount_path, queries_siftbincount_table_name)

In [54]:
transformers=[trs.ArraysToBinCount(pq_quantizer)]
steps.transform_step(siftsets_ds, transformers, siftbincount_ds)

In [55]:
print_ds_items_info(queries_siftbincount_ds)
print_ds_items_info(queries_siftbincount_ds, print_shape=False)

count of items in ds:  55
shape of item[0]:  (250000,)
shape of item[1]:  (250000,)
shape of item[2]:  (250000,)
count of items in ds:  55
item[0]:  [0 0 0 ..., 0 0 0]
item[1]:  [0 0 0 ..., 0 0 0]
item[2]:  [0 0 0 ..., 0 0 0]


# Exhaustive search by bows

In [56]:
np_neighbor_ids_ds=NumpyDataStore()
siftbincount_ndarray_ds = StreamNdarrayAdapterDataStore(siftbincount_ds, detect_final_shape_by_first_elem=True)

In [57]:
exs_searcher=ExhaustiveSearcher(siftbincount_ndarray_ds.get_items_sorted_by_ids(), siftbincount_ndarray_ds.get_ids_sorted())

In [58]:
n_neighbors=10
steps.search_step(queries_siftbincount_ds, exs_searcher, n_neighbors , np_neighbor_ids_ds)

ValueError: Incompatible dimension for X and Y matrices: X.shape[1] == 250000 while Y.shape[1] == 10000

In [None]:
print_ds_items_info(np_neighbor_ids_ds, first_items_to_print=15, print_shape=False)

In [None]:
img_ids=siftbincount_ndarray_ds.get_ids_sorted()
source_ids=images_ds.get_ids_sorted()
neighbor_source_ids_ds=NumpyDataStore()
steps.transform_step(np_neighbor_ids_ds, [trs.TranslateByKeysTransformer(img_ids, source_ids)], neighbor_source_ids_ds)

for query_source_id, neighbor_source_ids in zip(queries_images_ds.get_ids_sorted(),neighbor_source_ids_ds.get_items_sorted_by_ids()):
    print(query_source_id, neighbor_source_ids)

# Evaluate retrieval perfomance

In [None]:
perfomance_evaluator=PrecisionRecallAveragePrecisionEvaluator(BrodatzGroundTruth())

In [None]:
retrieval_perfomance_ds=NumpyDataStore()

In [None]:
steps.evaluation_step(np_neighbor_ids_ds, perfomance_evaluator, retrieval_perfomance_ds)

In [None]:
print_ds_items_info(retrieval_perfomance_ds, first_items_to_print=3, print_shape=False)

# Approximate search with inverted multi-index

### Quantize global descriptors

In [156]:
productsiftbincountclusters_path='ds_data\imgid_productsiftbincount_clusters'
productsiftbincountclusters_table_name='clusterid_cluster'

In [157]:
productsiftbincountclusters_ds=SQLiteTableDataStore(productsiftbincountclusters_path, productsiftbincountclusters_table_name)

In [158]:
quantizer = PQQuantizer(n_clusters=256, n_quantizers=2)

In [159]:
steps.quantize_step(productsiftbincount_ds, quantizer, productsiftbincountclusters_ds)

In [160]:
print_ds_items_info(productsiftbincountclusters_ds)

count of items in ds:  2
shape of item[0]:  (256, 1500)
shape of item[1]:  (256, 1500)


### Build inverted multi-index

In [161]:
productsiftbincountclusters_ndarray_ds=StreamNdarrayAdapterDataStore(productsiftbincountclusters_ds, detect_final_shape_by_first_elem=True)
cluster_centers=productsiftbincountclusters_ndarray_ds.get_items_sorted_by_ids()
print("cluster_centers: ", cluster_centers.shape)

cluster_centers:  (2, 256, 1500)


In [162]:
productsiftbincount_ndarray_ds = StreamNdarrayAdapterDataStore(productsiftbincount_ds, detect_final_shape_by_first_elem=True)
X = productsiftbincount_ndarray_ds.get_items_sorted_by_ids()
X_ids=productsiftbincount_ndarray_ds.get_ids_sorted()
print("X: ", X.shape)
print("X_ids: ", X_ids.shape)

X:  (999, 3000)
X_ids:  (999,)


In [None]:
imi_searcher=InvertedMultiIndexSearcher(X, X_ids, cluster_centers)

### Search approximate neighbors for all bovws(productsiftbincount_ds)

In [None]:
approximateneighborsids_ds=NumpyDataStore()
steps.search_step(productsiftbincount_ds, imi_searcher, 16 , approximateneighborsids_ds)

In [None]:
print_ds_items_info(approximateneighborsids_ds, print_shape=False, first_items_to_print=50)