In [10]:
import import_hack
import numpy as np
import core.steps as steps
from core import data_store as ds
from core import transformer as tr
from core.common.ds_utils import print_ds_items_info
from os.path import join as pjoin
from core.search.inverted_multi_index_searcher import InvertedMultiIndexSearcher
from core.quantization.pq_quantizer import PQQuantizer, restore_from_clusters, build_pq_params_str
from core.metric.symmetric_distance_computer import SymmetricDistanceComputer
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
base_dir=r'C:\data\computation\brodatz\global_descriptors'

#### Setup global descriptors datastores
descriptors have been computed in global_descriptor__compute_from_image 

In [12]:
histograms_ds = ds.SQLiteTableDataStore(pjoin(base_dir, 'histograms'))
lbphistograms_ds = ds.SQLiteTableDataStore(pjoin(base_dir, 'lbphistograms'))
glcms_ds = ds.SQLiteTableDataStore(pjoin(base_dir, 'glcms'))

# Inverted multi-index search

In [13]:
K_arr = [2 ** i for i in [4,5,6,7,8]]
m_arr = [m for m in [1, 2, 4, 8, 16]]
# inverted multi-index will take 4 * K ** m bytes of memory, so we need to put restrictions on pq_params
bytes_free=1 << 34
pq_params_arr = [{'n_clusters': K, 'n_quantizers': m} for K in K_arr for m in m_arr if 4 * K ** m < bytes_free]

In [14]:
def imi_search(pq_params, descriptors_name, query_descriptors_ds):
    pq_params_str = build_pq_params_str(pq_params)
    quantizer = PQQuantizer(**pq_params)

    centroids_path = pjoin(base_dir, 'centroids', descriptors_name, descriptors_name) + "_" + pq_params_str + '_centroids'
    centroids_ds = ds.SQLiteTableDataStore(centroids_path)
    centroids = ds.get_as_array(centroids_ds)
  
    pqcodes_path = pjoin(base_dir, 'pqcodes', descriptors_name, descriptors_name) + "_" + pq_params_str + '_pqcodes'
    pqcodes_ds = ds.SQLiteTableDataStore(pqcodes_path, ndarray_bytes_only=True)
    ids, pqcodes = ds.get_as_array(pqcodes_ds, return_ids=True)
    
    searcher_ = InvertedMultiIndexSearcher(ids, centroids, x_pqcodes=pqcodes)
    n_nearest = 25

    neighbors_ids_filepath = pjoin(base_dir, 'imi-neighbors-ids', descriptors_name, descriptors_name) + "_" + pq_params_str + '_imi-neighbors-ids'
    neighbors_ids_ds = ds.CSVDataStore(neighbors_ids_filepath)
    
    steps.search_step(query_descriptors_ds, searcher_, n_nearest, neighbors_ids_ds)

In [16]:
for pq_params in pq_params_arr:
    imi_search(pq_params, 'histograms', histograms_ds)

In [18]:
for pq_params in pq_params_arr:
    imi_search(pq_params, 'lbphistograms', lbphistograms_ds)

In [19]:
for pq_params in pq_params_arr:
    imi_search(pq_params, 'glcms', glcms_ds)