In [40]:
import import_hack
import steps
import data_store
from data_store.sqlite_table_datastore import SQLiteTableDataStore
from data_store.sqlite_table_one_to_many_datastore import SQLiteTableOneToManyDataStore
from data_store.file_system_directory_datastore import FileSystemDirectoryDataStore
from data_store.numpy_datastore import NumpyDataStore
from data_store.stream_ndarray_adapter_datastore import StreamNdarrayAdapterDataStore
from quantization.pq_quantizer import PQQuantizer, restore_from_clusters
from transformer import transformers as trs
from search.exhaustive_searcher import ExhaustiveSearcher
from search.inverted_multi_index_searcher import InvertedMultiIndexSearcher
from common.ds_utils import print_ds_items_info
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Compute descriptors(siftsets)

In [6]:
img_dir_path = r'C:\data\images\brodatz\data.brodatz\size_213x213'
siftsets_path='ds_data\siftid_sift_imgid'
siftsets_table_name='siftid_sift_imgid'

We can interpret table as One-to-One(id, itemid) or as One-to-Many(id, itemid, foreignid).
If we want to use it like One-to-One, use SQLiteTableDataStore. Here item = one sift.
If we want to use it like One-to-Many, use SQLiteTableOneToManyDataStore. In this case items would be grouped by foreignid and aggregated into ndarray. Here item = ndarray of sifts, e.g. ndarray with shape (*number of sifts in image*, 128)

In [7]:
images_ds = FileSystemDirectoryDataStore(dir_path=img_dir_path)
siftsets_ds = SQLiteTableOneToManyDataStore(siftsets_path, siftsets_table_name)
print("images count in '{0}': ".format(img_dir_path), images_ds.get_count())

images count in 'C:\data\images\brodatz\data.brodatz\size_213x213':  999


* get items from **images_ds**
* apply sequence of transformations for each item: bytes->ndarray->opencvMatrix->siftSet
* write result in **siftsets_ds**

In [15]:
transformers_=[trs.BytesToNdarray(), trs.NdarrayToOpencvMatrix(), trs.OpencvMatrixToSiftsSet()]
steps.transform_step(images_ds, transformers_, siftsets_ds)

In [8]:
print_ds_items_info(siftsets_ds)

count of items in ds:  999
shape of item[0]:  (379, 128)
shape of item[1]:  (407, 128)
shape of item[2]:  (395, 128)


# Compute sample

To build bag-of-visual-words we need visual words -> we need clusters -> we need to quantize sifts.

We will quantize not all descriptors but sample from them

In [9]:
sifts_ds = SQLiteTableDataStore(siftsets_path, siftsets_table_name)

Here we treat table as One-to-One, so only (id,item) are sampled, foreignid is not considered.

In [10]:
print_ds_items_info(sifts_ds)

count of items in ds:  826845
shape of item[0]:  (128,)
shape of item[1]:  (128,)
shape of item[2]:  (128,)


In [138]:
sample_part=0.3
sample_path='ds_data\siftid_sift_sample{0}'.format(sample_part)
sample_table_name='siftid_sift'

In [139]:
sample_ds = SQLiteTableDataStore(sample_path, sample_table_name)

In [140]:
steps.sampling_step(sifts_ds, sample_part, sample_ds)

In [141]:
print_ds_items_info(sample_ds)

count of items in ds:  248053
shape of item[0]:  (128,)
shape of item[1]:  (128,)
shape of item[2]:  (128,)


# Quantize local descriptors

In [142]:
n_clusters=1500
sift_clusters_path='ds_data\siftid_sift_sample0.1-clusters{0}'.format(n_clusters)
sift_clusters_table_name='clusterid_cluster'

In [143]:
sift_clusters_ds = SQLiteTableDataStore(sift_clusters_path, sift_clusters_table_name)

In [144]:
quantizer = PQQuantizer(n_clusters=n_clusters, n_quantizers=2)

In [145]:
steps.quantize_step(sample_ds, quantizer, sift_clusters_ds)

In [146]:
print_ds_items_info(sift_clusters_ds)

count of items in ds:  2
shape of item[0]:  (1500, 64)
shape of item[1]:  (1500, 64)


# Compute bows(sifts clusters bincounts)

restore pq_quantizer from cluster centers

In [147]:
ds_clusters_ndarray_adapter = StreamNdarrayAdapterDataStore(sift_clusters_ds, detect_final_shape_by_first_elem=True)
clusters = ds_clusters_ndarray_adapter.get_items_sorted_by_ids()
pq_quantizer = restore_from_clusters(clusters)

In [148]:
productsiftbincount_path='ds_data\imgid_productsiftbincount'
productsiftbincount_table_name='imgid_productsiftbincount'

In [149]:
productsiftbincount_ds=SQLiteTableDataStore(productsiftbincount_path, productsiftbincount_table_name)

In [150]:
transformers=[trs.ArraysToProductBinCount(pq_quantizer)]
steps.transform_step(siftsets_ds, transformers, productsiftbincount_ds)

In [1]:
print_ds_items_info(productsiftbincount_ds)

NameError: name 'print_ds_items_info' is not defined

# Exhaustive search by bows

In [152]:
np_neighbor_ids_ds=NumpyDataStore()
productsiftbincount_ndarray_ds = StreamNdarrayAdapterDataStore(productsiftbincount_ds, detect_final_shape_by_first_elem=True)

In [153]:
exs_searcher=ExhaustiveSearcher(productsiftbincount_ndarray_ds.get_items_sorted_by_ids(), productsiftbincount_ndarray_ds.get_ids_sorted())

In [154]:
n_neighbors=15
steps.search_step(productsiftbincount_ds, exs_searcher, n_neighbors , np_neighbor_ids_ds)

In [155]:
print_ds_items_info(np_neighbor_ids_ds, first_items_to_print=10, print_shape=False)

count of items in ds:  999
shape of item[0]:  [  1   4   2   3   5   8 378   7 382   6 385 384 373 370 381]
shape of item[1]:  [  2   3   5   1   8   6   9   4 378   7 382 381 385 421 373]
shape of item[2]:  [  3   2   5   6   1   8   4   9   7 378 381 382 385 370 373]
shape of item[3]:  [  4   7   5   8   1 385 378 381 382 391 373 383 423 370 421]
shape of item[4]:  [  5   8   3   4   2   6   1   9   7 385 381 378 382 373 384]
shape of item[5]:  [  6   9   3   5   2   8   1 385 378 373 370 421 384 381 376]
shape of item[6]:  [  7   4   8 385 387 378   5 381 382 391 395 371 373 855 421]
shape of item[7]:  [  8   5   7   4   2   3   1 385 381   9 378 387 382 373 421]
shape of item[8]:  [  9   6   5   3   2   8   1 385 378 381 370 373 421 384 419]
shape of item[9]:  [ 10 416 415 419 420 421 418 383 376 423 417 373 422 805  57]


# Approximate search with inverted multi-index

### Quantize global descriptors

In [156]:
productsiftbincountclusters_path='ds_data\imgid_productsiftbincount_clusters'
productsiftbincountclusters_table_name='clusterid_cluster'

In [157]:
productsiftbincountclusters_ds=SQLiteTableDataStore(productsiftbincountclusters_path, productsiftbincountclusters_table_name)

In [158]:
quantizer = PQQuantizer(n_clusters=256, n_quantizers=2)

In [159]:
steps.quantize_step(productsiftbincount_ds, quantizer, productsiftbincountclusters_ds)

In [160]:
print_ds_items_info(productsiftbincountclusters_ds)

count of items in ds:  2
shape of item[0]:  (256, 1500)
shape of item[1]:  (256, 1500)


### Build inverted multi-index

In [161]:
productsiftbincountclusters_ndarray_ds=StreamNdarrayAdapterDataStore(productsiftbincountclusters_ds, detect_final_shape_by_first_elem=True)
cluster_centers=productsiftbincountclusters_ndarray_ds.get_items_sorted_by_ids()
print("cluster_centers: ", cluster_centers.shape)

cluster_centers:  (2, 256, 1500)


In [162]:
productsiftbincount_ndarray_ds = StreamNdarrayAdapterDataStore(productsiftbincount_ds, detect_final_shape_by_first_elem=True)
X = productsiftbincount_ndarray_ds.get_items_sorted_by_ids()
X_ids=productsiftbincount_ndarray_ds.get_ids_sorted()
print("X: ", X.shape)
print("X_ids: ", X_ids.shape)

X:  (999, 3000)
X_ids:  (999,)


In [None]:
imi_searcher=InvertedMultiIndexSearcher(X, X_ids, cluster_centers)

### Search approximate neighbors for all bovws(productsiftbincount_ds)

In [None]:
approximateneighborsids_ds=NumpyDataStore()
steps.search_step(productsiftbincount_ds, imi_searcher, 16 , approximateneighborsids_ds)

In [None]:
print_ds_items_info(approximateneighborsids_ds, print_shape=False, first_items_to_print=50)