# Make Image Descriptor Dictionary

We want to have data structure that dic['image_name'] returns tuple of (keypoints, descriptors) where 

* keypoints is 2d numpy array with rows represent (x, y, a, b, c)
* descriptors in 2d numpay array with size of (num_descriptor, descriptor_dim)

# (Check it again. I Doubt it now after testing) Or we could make a new one from provided feature info for Oxford 5k

Oxford 5k dataset already provide SIFT descriptors, and visual words info. 
The file containing SIFT descriptor does not have information of how many descriptors are belong to which image. 
This missing information can be found in visual words info file. 
After we get the assignment relationship, we can successfully get dictionary of key: image name, values: list of descriptos.

## Requirements

* SIFT descriptor containing file. `feat_oxc1_hesaff_sift.bin`
* Image filename order for the above file `order.txt`
* Bag-of-words informatino file `word_oxc1_hesaff_sift_16M_1M`


In [None]:
# For oxford 5k dataset, it may be possible to recover the "Image name and its associated 128d descriptors"
from utils.oxf5k_feature_reader import feature_reader

# Frist, read all 128d descriptor of image.
feature_bin_path = "./data/feature/feat_oxc1_hesaff_sift.bin"
all_features = feature_reader(feature_bin_path)
print('num of 128d descriptors in oxf5k: ', len(all_features))


In [None]:
# Read the order of filenames related to above features. Refer README in http://www.robots.ox.ac.uk/~vgg/data/oxbuildings/README2.txt
import os
feature_bin_filename_order_path = "./data/feature/order.txt"
with open(feature_bin_filename_order_path) as f:
    filenames = list(map(lambda x: x.strip(), f.readlines()))

BOW_INFO_DIR='./data/word_oxc1_hesaff_sift_16M_1M'
image_feature_count_info = []
for image_name in filenames:
    filename = image_name + ".txt"
    with open(os.path.join(BOW_INFO_DIR, filename)) as f:
        header_text = list(map(lambda x: x.strip(), f.readlines()[:2]))        
        num_descriptor = int(header_text[1])        
        image_feature_count_info.append((image_name, num_descriptor))    
    
print('image_feature_count_info[:5]:',image_feature_count_info[:5])

# Check compatibility of name order
for idx, val in enumerate(image_feature_count_info):
    name_from_vw, _ = val
    if name_from_vw == filenames[idx]:
        continue
    else:
        print(idx, name_from_vw, filenames[idx])
print("compatibility check done")    

In [None]:
import numpy as np
import pickle

image_descriptor_dict = {} # key: image_name, value: 2d numpy array of shape (num_descriptor, dim_descriptor)
start_idx = 0
for image_name, num_descriptor in image_feature_count_info:
    val = np.array(all_features[start_idx:(start_idx+num_descriptor)], dtype=np.uint8)
    image_descriptor_dict[image_name] = val
    
    start_idx += num_descriptor
    # break
# print('image_descriptor_dict:', image_descriptor_dict)
with open('image_descriptor_dict_oxc1_hesaff_sift_16M.pkl', 'wb') as f:
    pickle.dump(image_descriptor_dict, f)

# Read from hessaff.sift files

In [16]:
import os
import cv2
import numpy as np
import pickle
from tqdm import tqdm
from multiprocessing import Pool, TimeoutError

SIFT_DIR = "./data/oxford5k_hesaff_sift"
output_image_descriptor_name = 'image_descriptor_dict_oxc5k_extracted_hesaff_sift'


def sift_to_rootsift(descs):
        if descs.dtype != np.float:
            descs = descs.astype(np.float32)
        # apply the Hellinger kernel by first L1-normalizing and taking the
        # square-root
        eps = 1e-10
        l1_norm = np.linalg.norm(descs, 1)
        descs /= (l1_norm + eps)
        descs = np.sqrt(descs)
        return descs
    
def parse_sift_output(target_path):
    """
    Return:
        kp: keypoint of hessian affine descriptor. location, orientation etc... OpenCV KeyPoint format. 
        des: 128d uint8 np array
    """    
    kp = []
    des = []
    with open(target_path, "r") as f:
        lines = list(map(lambda x: x.strip(), f.readlines()))
        num_descriptor = int(lines[1])
        lines = lines[2:]
        for i in range(num_descriptor):
            # print(i, lines[i])
            val = lines[i].split(" ")
            x = float(val[0])
            y = float(val[1])
            a = float(val[2])
            b = float(val[3])
            c = float(val[4])
            # TODO: generate ellipse shaped key point
            # Refer: https://math.stackexchange.com/questions/1447730/drawing-ellipse-from-eigenvalue-eigenvector
            # Refer: http://www.robots.ox.ac.uk/~vgg/research/affine/det_eval_files/display_features.m
            # Refer: http://www.robots.ox.ac.uk/~vgg/research/affine/detectors.html
            # key_point = cv2.KeyPoint(x, y, 1)
            key_point = [x, y, a ,b, c]
            sift_descriptor = np.array(list(map(lambda x: int(x), val[5:])), dtype=np.uint8)
            rootSIFT = sift_to_rootsift(sift_descriptor)
            kp.append(key_point)
            des.append(rootSIFT)
        
    
    return np.array(kp, dtype=np.float32), np.array(des)



filelist = os.listdir(SIFT_DIR)

image_descriptor_dict = {}

num_descriptors = 0

def run(filename):
    filepath = os.path.join(SIFT_DIR, filename)
    kp, des = parse_sift_output(filepath)
    return (filename, (kp, des))

pool = Pool(processes=20) # start 20 worker processes
# print same numbers in arbitrary order
for filename, tup in tqdm(pool.imap_unordered(run, filelist), total=len(filelist)):
    image_descriptor_dict[filename.replace(".jpg.hesaff.sift", "")] =  tup
    num_descriptors += tup[1].shape[0]
    
print("total num_descriptors:", num_descriptors)
print("avg num_descriptors:", num_descriptors / len(filelist))
    
with open(output_image_descriptor_name + "_{}M.pkl".format(num_descriptor//1000000), 'wb') as f:
    pickle.dump(image_descriptor_dict, f)
print("done")






















  0%|          | 0/5062 [00:00<?, ?it/s][A
  0%|          | 2/5062 [00:00<04:44, 17.78it/s][A
  0%|          | 6/5062 [00:00<03:19, 25.40it/s][A
  0%|          | 13/5062 [00:00<02:20, 35.99it/s][A
  0%|          | 21/5062 [00:00<01:52, 44.97it/s][A
  1%|          | 26/5062 [00:00<01:55, 43.52it/s][A
  1%|          | 34/5062 [00:00<01:50, 45.38it/s][A
  1%|          | 42/5062 [00:00<01:43, 48.73it/s][A
  1%|          | 48/5062 [00:00<01:40, 49.87it/s][A
  1%|          | 54/5062 [00:01<01:41, 49.18it/s][A
  1%|          | 60/5062 [00:01<01:46, 47.13it/s][A
  1%|▏         | 68/5062 [00:01<01:45, 47.52it/s][A
  2%|▏         | 77/5062 [00:01<01:40, 49.85it/s][A
  2%|▏         | 83/5062 [00:01<01:41, 49.17it/s][A
  2%|▏         | 90/5062 [00:01<01:40, 49.55it/s][A
  2%|▏         | 96/5062 [00:01<01:40, 49.36it/s][A
  2%|▏         | 103/5062 [00:02<01:38, 50.32it/s][A
  2%|▏         | 110/5062 [00:02<01:37, 50.74it/s][A
  2%|▏         | 116/5062 [00:02<0

total num_descriptors: 13516675
avg num_descriptors: 2670.2242196760176
done
