In [2]:
import import_hack
import core.steps
import core.data_store
from core.data_store.sqlite_table_datastore import SQLiteTableDataStore
from core.data_store.sqlite_table_one_to_many_datastore import SQLiteTableOneToManyDataStore
from core.data_store.file_system_directory_datastore import FileSystemDirectoryDataStore
from core.data_store.numpy_datastore import NumpyDataStore
from core.data_store.stream_ndarray_adapter_datastore import StreamNdarrayAdapterDataStore
from core.quantization.pq_quantizer import PQQuantizer, restore_from_clusters
from core.transformer import transformers as trs
from core.search.exhaustive_searcher import ExhaustiveSearcher
from core.search.inverted_multi_index_searcher import InvertedMultiIndexSearcher
from core.common.ds_utils import print_ds_items_info
from core.evaluation.retrieval_perfomance import PrecisionRecallAveragePrecisionEvaluator
from core.evaluation.ground_truth import BrodatzGroundTruth
import numpy as np
%load_ext autoreload
%autoreload 2

ImportError: cannot import name 'transformers'

# Compute descriptors(siftsets)

In [3]:
img_dir_path = r'C:\data\images\brodatz\data.brodatz\size_213x213'
siftsets_path='ds_data\siftid_sift_imgid'
siftsets_table_name='siftid_sift_imgid'

We can interpret table as One-to-One(id, itemid) or as One-to-Many(id, itemid, foreignid).
If we want to use it like One-to-One, use SQLiteTableDataStore. Here item = one sift.
If we want to use it like One-to-Many, use SQLiteTableOneToManyDataStore. In this case items would be grouped by foreignid and aggregated into ndarray. Here item = ndarray of sifts, e.g. ndarray with shape (*number of sifts in image*, 128)

In [4]:
images_ds = FileSystemDirectoryDataStore(dir_path=img_dir_path)
siftsets_ds = SQLiteTableOneToManyDataStore(siftsets_path, siftsets_table_name)
print("images count in '{0}': ".format(img_dir_path), images_ds.get_count())

images count in 'C:\data\images\brodatz\data.brodatz\size_213x213':  999


* get items from **images_ds**
* apply sequence of transformations for each item: bytes->ndarray->opencvMatrix->siftSet
* write result in **siftsets_ds**

In [15]:
transformers_=[trs.BytesToNdarray(), trs.NdarrayToOpencvMatrix(), trs.OpencvMatrixToSiftsSet()]
steps.transform_step(images_ds, transformers_, siftsets_ds)

In [5]:
print_ds_items_info(siftsets_ds)

count of items in ds:  999
shape of item[0]:  (379, 128)
shape of item[1]:  (407, 128)
shape of item[2]:  (395, 128)


# Compute sample

To build bag-of-visual-words we need visual words -> we need clusters -> we need to quantize sifts.

We will quantize not all descriptors but sample from them

In [6]:
sifts_ds = SQLiteTableDataStore(siftsets_path, siftsets_table_name)

Here we treat table as One-to-One, so only (id,item) are sampled, foreignid is not considered.

In [7]:
print_ds_items_info(sifts_ds)

count of items in ds:  826845
shape of item[0]:  (128,)
shape of item[1]:  (128,)
shape of item[2]:  (128,)


In [8]:
sample_part=0.5
sample_path='ds_data\siftid_sift_sample{0}'.format(sample_part)
sample_table_name='siftid_sift'

In [9]:
sample_ds = SQLiteTableDataStore(sample_path, sample_table_name)

In [9]:
steps.sampling_step(sifts_ds, sample_part, sample_ds)

In [10]:
print_ds_items_info(sample_ds)

count of items in ds:  413422
shape of item[0]:  (128,)
shape of item[1]:  (128,)
shape of item[2]:  (128,)


# Quantize local descriptors

In [11]:
n_clusters=300
sift_clusters_path='ds_data\siftid_sift_sample{0}-clusters{1}'.format(sample_part, n_clusters)
sift_clusters_table_name='clusterid_cluster'
sift_clusters_ds = SQLiteTableDataStore(sift_clusters_path, sift_clusters_table_name)
quantizer = PQQuantizer(n_clusters=n_clusters, n_quantizers=2)

In [25]:
steps.quantize_step(sample_ds, quantizer, sift_clusters_ds)

In [12]:
print_ds_items_info(sift_clusters_ds)

count of items in ds:  2
shape of item[0]:  (300, 64)
shape of item[1]:  (300, 64)


# Compute bows(sifts clusters bincounts)

restore pq_quantizer from cluster centers

In [13]:
ds_clusters_ndarray_adapter = StreamNdarrayAdapterDataStore(sift_clusters_ds, detect_final_shape_by_first_elem=True)
clusters = ds_clusters_ndarray_adapter.get_items_sorted_by_ids()
pq_quantizer = restore_from_clusters(clusters)

In [14]:
siftbincount_path='ds_data\imgid_siftbincount'
siftbincount_table_name='imgid_siftbincount'

In [15]:
siftbincount_ds=SQLiteTableDataStore(siftbincount_path, siftbincount_table_name)

In [110]:
transformers=[trs.ArraysToBinCount(pq_quantizer)]
steps.transform_step(siftsets_ds, transformers, siftbincount_ds)

In [16]:
print_ds_items_info(siftbincount_ds)
print_ds_items_info(siftbincount_ds, print_shape=False)

count of items in ds:  999
shape of item[0]:  (900,)
shape of item[1]:  (900,)
shape of item[2]:  (900,)
count of items in ds:  999
item[0]:  [  0   0   0   0   0   0   1   0   1   0   0   0   0   0   0   0   0   0
   0   1   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   2   0   0   0   0   0   0   0   1   0   0   1   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   3   0   0   0   0   0   0   0   0   0   0   0   0
   2   0   0   1   0   0   0   0   0   0   1   1   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0   0
   0   0   1   0   0   0   0   0   0   0   0   0  20   

In [17]:
with siftbincount_ds:
    sift1=next(iter(siftbincount_ds.get_items_sorted_by_ids()))
    print(len(np.nonzero(sift1)[0]))
    print(np.nonzero(sift1))
    print(sift1[np.nonzero(sift1)])

92
(array([  6,   8,  19,  20,  40,  48,  51,  95, 108, 111, 118, 119, 142,
       186, 200, 210, 218, 224, 226, 228, 230, 233, 238, 242, 245, 252,
       289, 303, 316, 318, 321, 325, 328, 329, 335, 353, 395, 408, 413,
       432, 442, 445, 448, 450, 457, 466, 469, 474, 481, 488, 499, 510,
       516, 523, 582, 592, 598, 605, 616, 624, 628, 646, 652, 658, 659,
       661, 665, 667, 668, 679, 680, 687, 691, 704, 709, 730, 732, 741,
       742, 748, 800, 804, 810, 811, 830, 845, 858, 862, 865, 868, 869, 894], dtype=int64),)
[  1   1   1   1   2   1   1   3   2   1   1   1   1   1   1  20   1   2
   1   1   9   1   1   1   2   1   2   1   2   3   1   1   2   2   3   1
   7   1   1   1   1   1   1  19   1   1   5   1   1   1  10   1   6   1
   1   2   2  16   1   1   1   4   1   5   7  10   1   1   4  24   2   1
   1   1   9   1   1   2   1   1   2   3   3   1   1 118   2   2   5   2
   1   1]


# Exhaustive search by bows

In [18]:
np_neighbor_ids_ds=NumpyDataStore()
siftbincount_ndarray_ds = StreamNdarrayAdapterDataStore(siftbincount_ds, detect_final_shape_by_first_elem=True)

In [19]:
exs_searcher=ExhaustiveSearcher(siftbincount_ndarray_ds.get_items_sorted_by_ids(), siftbincount_ndarray_ds.get_ids_sorted())

In [34]:
n_neighbors=25
steps.search_step(siftbincount_ds, exs_searcher, n_neighbors , np_neighbor_ids_ds)

In [35]:
print_ds_items_info(np_neighbor_ids_ds, first_items_to_print=15, print_shape=False)

count of items in ds:  999
item[0]:  [  1   8   4   2   3   5   7   6 395 855 379 378 377 391 380 385 381   9
 382 853 390 386 848 373 416]
item[1]:  [  2   3   5   8   6   9   1   4 395   7 855 385 379 391 377 378 381 848
 380 853 210 387 392 371 382]
item[2]:  [  3   2   5   6   8   9   1   4 395   7 855 385 391 379 378 377 381 853
 848 380 387 392 854 382 371]
item[3]:  [  4   7   8   5 855 395   1   2 385 848 378 379 391 377   3 387 853 371
 854 381 849 847 382 380 392]
item[4]:  [  5   3   2   8   4   6   9   1 395   7 385 391 855 379 377 378 381 387
 380 853 382 371 848 392 850]
item[5]:  [  6   3   9   2   5   8   1   4 395 385 391 379 378 853 377 380 381 210
 382 387 855 371 390 386 375]
item[6]:  [  7   4   8 855 395 371 849 387   5 848   1 385 847 378 379 391 377 853
 380 381 382   3 372 392 850]
item[7]:  [  8   4   5   3   7   2   1   9 855   6 395 371 849 387 848 847 385 391
 379 377 378 853 381 854 380]
item[8]:  [  9   6   3   2   5   8   4 395   1 387 371 385 391 855 37

# Evaluate retrieval perfomance

In [36]:
perfomance_evaluator=PrecisionRecallAveragePrecisionEvaluator(BrodatzGroundTruth())

In [37]:
retrieval_perfomance_ds=NumpyDataStore()

In [38]:
steps.evaluation_step(np_neighbor_ids_ds, perfomance_evaluator, retrieval_perfomance_ds)

In [39]:
print_ds_items_info(retrieval_perfomance_ds, first_items_to_print=3, print_shape=False)

count of items in ds:  999
item[0]:  [[  1.           2.           3.           4.           5.           6.
    7.           8.           9.          10.          11.          12.
   13.          14.          15.          16.          17.          18.
   19.          20.          21.          22.          23.          24.
   25.        ]
 [  1.           1.           1.           1.           1.           1.
    1.           1.           0.88888889   0.8          0.72727273
    0.66666667   0.61538462   0.57142857   0.53333333   0.5          0.47058824
    0.5          0.47368421   0.45         0.42857143   0.40909091
    0.39130435   0.375        0.36      ]
 [  0.1          0.2          0.3          0.4          0.5          0.6
    0.7          0.8          0.8          0.8          0.8          0.8
    0.8          0.8          0.8          0.8          0.8          0.9
    0.9          0.9          0.9          0.9          0.9          0.9
    0.9       ]
 [  0.1          0.2   

# Evaluate average retrieval perfomance

In [31]:
average_retrieval_perfomance_ds=NumpyDataStore()

In [53]:
steps.averaging_step(retrieval_perfomance_ds, average_retrieval_perfomance_ds)

In [81]:
print_ds_items_info(average_retrieval_perfomance_ds)
print_ds_items_info(average_retrieval_perfomance_ds, first_items_to_print=3, print_shape=False)

count of items in ds:  1
shape of item[0]:  (4, 25)
count of items in ds:  1
item[0]:  [[  1.           2.           3.           4.           5.           6.
    7.           8.           9.          10.          11.          12.
   13.          14.          15.          16.          17.          18.
   19.          20.          21.          22.          23.          24.
   25.        ]
 [  0.998999     0.9004004    0.84918252   0.80980981   0.77777778
    0.75041708   0.72186472   0.69732232   0.66722278   0.61671672
    0.57257257   0.53495162   0.5016555    0.47211497   0.44704705
    0.42505005   0.40428664   0.38638639   0.36979084   0.3548048
    0.34086467   0.32787333   0.31583758   0.30501335   0.29449449]
 [  0.0998999    0.18008008   0.25475475   0.32392392   0.38888889
    0.45025025   0.50530531   0.55785786   0.6005005    0.61671672
    0.62982983   0.64194194   0.65215215   0.66096096   0.67057057
    0.68008008   0.68728729   0.6954955    0.7026026    0.70960961
    0.

In [118]:
x_ds=StreamNdarrayAdapterDataStore(average_retrieval_perfomance_ds, slice_get=(slice(None),0))
precision_ds=StreamNdarrayAdapterDataStore(average_retrieval_perfomance_ds, slice_get=(slice(None),1))
recall_ds=StreamNdarrayAdapterDataStore(average_retrieval_perfomance_ds, slice_get=(slice(None),2))
print_ds_items_info(x_ds, print_shape=False)

count of items in ds:  1
item[0]:  [  1.   2.   3.   4.   5.   6.   7.   8.   9.  10.  11.  12.  13.  14.  15.
  16.  17.  18.  19.  20.  21.  22.  23.  24.  25.]


In [120]:
import matplotlib.pyplot as plt
steps.plotting_step(x_ds, plt, [precision_ds, recall_ds])
plt.show()

[  1.   2.   3.   4.   5.   6.   7.   8.   9.  10.  11.  12.  13.  14.  15.
  16.  17.  18.  19.  20.  21.  22.  23.  24.  25.]
[ 0.998999    0.9004004   0.84918252  0.80980981  0.77777778  0.75041708
  0.72186472  0.69732232  0.66722278  0.61671672  0.57257257  0.53495162
  0.5016555   0.47211497  0.44704705  0.42505005  0.40428664  0.38638639
  0.36979084  0.3548048   0.34086467  0.32787333  0.31583758  0.30501335
  0.29449449]
[ 0.0998999   0.18008008  0.25475475  0.32392392  0.38888889  0.45025025
  0.50530531  0.55785786  0.6005005   0.61671672  0.62982983  0.64194194
  0.65215215  0.66096096  0.67057057  0.68008008  0.68728729  0.6954955
  0.7026026   0.70960961  0.71581582  0.72132132  0.72642643  0.73203203
  0.73623624]


# Approximate search with inverted multi-index

### Quantize global descriptors

In [156]:
productsiftbincountclusters_path='ds_data\imgid_productsiftbincount_clusters'
productsiftbincountclusters_table_name='clusterid_cluster'

In [157]:
productsiftbincountclusters_ds=SQLiteTableDataStore(productsiftbincountclusters_path, productsiftbincountclusters_table_name)

In [158]:
quantizer = PQQuantizer(n_clusters=256, n_quantizers=2)

In [159]:
steps.quantize_step(productsiftbincount_ds, quantizer, productsiftbincountclusters_ds)

In [160]:
print_ds_items_info(productsiftbincountclusters_ds)

count of items in ds:  2
shape of item[0]:  (256, 1500)
shape of item[1]:  (256, 1500)


### Build inverted multi-index

In [161]:
productsiftbincountclusters_ndarray_ds=StreamNdarrayAdapterDataStore(productsiftbincountclusters_ds, detect_final_shape_by_first_elem=True)
cluster_centers=productsiftbincountclusters_ndarray_ds.get_items_sorted_by_ids()
print("cluster_centers: ", cluster_centers.shape)

cluster_centers:  (2, 256, 1500)


In [162]:
productsiftbincount_ndarray_ds = StreamNdarrayAdapterDataStore(productsiftbincount_ds, detect_final_shape_by_first_elem=True)
X = productsiftbincount_ndarray_ds.get_items_sorted_by_ids()
X_ids=productsiftbincount_ndarray_ds.get_ids_sorted()
print("X: ", X.shape)
print("X_ids: ", X_ids.shape)

X:  (999, 3000)
X_ids:  (999,)


In [None]:
imi_searcher=InvertedMultiIndexSearcher(X, X_ids, cluster_centers)

### Search approximate neighbors for all bovws(productsiftbincount_ds)

In [None]:
approximateneighborsids_ds=NumpyDataStore()
steps.search_step(productsiftbincount_ds, imi_searcher, 16 , approximateneighborsids_ds)

In [None]:
print_ds_items_info(approximateneighborsids_ds, print_shape=False, first_items_to_print=50)