In [1]:
import numpy as np 
import cv2 
import pandas as pd
import glob
import time 
from tqdm import tqdm
from sklearn.metrics.pairwise import cosine_similarity

def extract_sift_des(image):
    sift = cv2.ORB_create()
    kp, des = sift.detectAndCompute(image, None)
    return des

def read_bounding_box(path):
    boundary_file = open(path, 'r')
    boundary = boundary_file.readline().strip().split(' ')
    boundary = [int(b) for b in boundary]
    boundary_file.close()
    return boundary

In [2]:
download_path='C:/Users/IV/Desktop/CityU' # change to your own download path
path_query=download_path+'/query_4186'
path_query_txt=download_path+'/query_txt_4186'

# path_query_txt is the directory to the bounding box information of the instance(s) for the query images
path_gallery=download_path+'/gallery_4186'

name_query=glob.glob(path_query+'/*.jpg')
num_query=len(name_query)
name_gallery=glob.glob(path_gallery+'/*.jpg')
num_gallery=len(name_gallery)
record_all=np.zeros((num_query,len(name_gallery)))
query_imgs_no = [x.split('\\')[-1] for x in glob.glob(path_query+'/*.jpg')]
query_imgs_no = [x[:-4] for x in query_imgs_no]

gallery_imgs_no = [x.split('\\')[-1] for x in glob.glob(path_gallery+'/*.jpg')]
gallery_imgs_no = [x[:-4] for x in gallery_imgs_no]

gallery_des = []

for i, gallery_img_no in tqdm(enumerate(gallery_imgs_no)):
    per_gallery_name = path_gallery+'/'+str(gallery_img_no)+'.jpg'
    per_gallery=cv2.imread(per_gallery_name)
    # Image pre-processing
    per_gallery = cv2.GaussianBlur(per_gallery,(3,3),0)
    per_gallery = cv2.cvtColor(per_gallery,cv2.COLOR_BGR2YUV)
    per_gallery[:,:,0] = cv2.equalizeHist(per_gallery[:,:,0])
    per_gallery = cv2.cvtColor(per_gallery,cv2.COLOR_YUV2BGR)
    # feature extraction for per gallery
    per_gallery_des = extract_sift_des(per_gallery)
    gallery_des.append(per_gallery_des)

5000it [03:03, 27.19it/s]


In [3]:
for i, query_img_no in tqdm(enumerate(query_imgs_no[0:20])):
    time_s = time.time()
    dist_record=[]
    gallery_imgs_no_desc=[]
    per_query_name=path_query+'/'+str(query_img_no)+'.jpg'
    per_query=cv2.imread(per_query_name)
    
    # read boundary from text file
    queryfilename = path_query_txt+'/'+str(query_img_no)+'.txt'
    
    # crop the image
    boundary = read_bounding_box(queryfilename)
    x ,y, w, h = boundary
    query_boundary = per_query[y:y+h, x:x+w]

    # Image pre-processing
    query_boundary = cv2.GaussianBlur(query_boundary,(3,3),0)
    query_boundary=cv2.cvtColor(query_boundary,cv2.COLOR_BGR2YUV)
    query_boundary[:,:,0] = cv2.equalizeHist(query_boundary[:,:,0])
    query_boundary = cv2.cvtColor(query_boundary,cv2.COLOR_YUV2BGR)

    # feature extraction for per query
    per_query_des = extract_sift_des(query_boundary)

    # the iteration loop for gallery
    for j, gallery_img_no in tqdm(enumerate(gallery_imgs_no), desc=f"Processing query part {i}"):
        # Compute similarity score
        sim_score = cosine_similarity(per_query_des, gallery_des[j])
        dist_record.append(sim_score)

    # find the indexes with descending similarity order
    ascend_index=sorted(range(len(dist_record)), key=lambda k: np.max(dist_record[k]),reverse=True)

    # update the results for one query
    for k in range(len(ascend_index)):
        gallery_imgs_no_desc.append(np.array(gallery_imgs_no)[ascend_index[k]])

    record_all[i,:]= gallery_imgs_no_desc
    time_e = time.time()
    print('retrieval time for query {} is {}s'.format(query_img_no, time_e-time_s))
    query_idx = i
    print(f'For query image No. {query_imgs_no[query_idx]}, the top 10 ranked similar image No. are {gallery_imgs_no_desc[0]} {gallery_imgs_no_desc[1]} {gallery_imgs_no_desc[2]} {gallery_imgs_no_desc[3]} {gallery_imgs_no_desc[4]} {gallery_imgs_no_desc[5]} {gallery_imgs_no_desc[6]} {gallery_imgs_no_desc[7]} {gallery_imgs_no_desc[8]} {gallery_imgs_no_desc[9] }')

Processing query part 0: 5000it [00:11, 450.85it/s]
1it [00:19, 19.09s/it]

retrieval time for query 1258 is 19.088042974472046s
For query image No. 1258, the top 10 ranked similar image No. are 2403 3831 2726 3960 91 3612 2408 734 3679 4937


Processing query part 1: 5000it [00:10, 486.14it/s]
2it [00:37, 18.56s/it]

retrieval time for query 1656 is 18.18750834465027s
For query image No. 1656, the top 10 ranked similar image No. are 2003 339 3748 4042 4209 3034 1938 3945 772 1227


Processing query part 2: 5000it [00:09, 509.96it/s]
3it [00:55, 18.20s/it]

retrieval time for query 1709 is 17.772496938705444s
For query image No. 1709, the top 10 ranked similar image No. are 177 3162 634 1073 960 4500 2847 2726 3284 4411


Processing query part 3: 5000it [00:07, 643.04it/s]
4it [01:10, 17.14s/it]

retrieval time for query 2032 is 15.516194343566895s
For query image No. 2032, the top 10 ranked similar image No. are 770 3660 4838 4056 21 1318 4098 2522 3344 3692


Processing query part 4: 5000it [00:09, 512.87it/s]
5it [01:28, 17.29s/it]

retrieval time for query 2040 is 17.563448429107666s
For query image No. 2040, the top 10 ranked similar image No. are 2240 2118 1973 2900 4615 2170 2316 3705 977 2536


Processing query part 5: 5000it [00:08, 570.74it/s]
6it [01:45, 17.22s/it]

retrieval time for query 2176 is 17.081005096435547s
For query image No. 2176, the top 10 ranked similar image No. are 2215 632 2553 439 655 3225 1513 380 3548 4790


Processing query part 6: 5000it [00:10, 499.15it/s]
7it [02:03, 17.49s/it]

retrieval time for query 2461 is 18.03589367866516s
For query image No. 2461, the top 10 ranked similar image No. are 343 4098 3171 3361 4298 466 151 1724 2557 2893


Processing query part 7: 5000it [00:08, 557.35it/s]
8it [02:20, 17.43s/it]

retrieval time for query 27 is 17.31149959564209s
For query image No. 27, the top 10 ranked similar image No. are 3457 26 2656 558 4079 3336 3945 1302 4193 65


Processing query part 8: 5000it [00:08, 558.05it/s]
9it [02:37, 17.27s/it]

retrieval time for query 2714 is 16.90504288673401s
For query image No. 2714, the top 10 ranked similar image No. are 4256 2955 4340 2588 4557 3375 422 2575 1293 3612


Processing query part 9: 5000it [00:08, 568.12it/s]
10it [02:54, 17.15s/it]

retrieval time for query 316 is 16.88684368133545s
For query image No. 316, the top 10 ranked similar image No. are 3113 3302 3835 1384 3083 2631 3934 992 3594 985


Processing query part 10: 5000it [00:09, 505.79it/s]
11it [03:12, 17.60s/it]

retrieval time for query 35 is 18.623727560043335s
For query image No. 35, the top 10 ranked similar image No. are 86 2424 1238 203 4523 4175 1860 1559 4249 4754


Processing query part 11: 5000it [00:09, 530.41it/s]
12it [03:30, 17.57s/it]

retrieval time for query 3502 is 17.502586364746094s
For query image No. 3502, the top 10 ranked similar image No. are 3331 990 4713 1725 4473 3493 2715 1128 3038 4271


Processing query part 12: 5000it [00:10, 495.35it/s]
13it [03:48, 17.76s/it]

retrieval time for query 3557 is 18.202712059020996s
For query image No. 3557, the top 10 ranked similar image No. are 4325 932 3921 3758 4978 810 3730 4948 3228 187


Processing query part 13: 5000it [00:06, 815.13it/s]
14it [04:02, 16.60s/it]

retrieval time for query 3833 is 13.901559114456177s
For query image No. 3833, the top 10 ranked similar image No. are 1038 2656 4674 3619 571 4514 4223 2815 4979 88


Processing query part 14: 5000it [00:05, 934.09it/s]
15it [04:15, 15.42s/it]

retrieval time for query 3906 is 12.698879480361938s
For query image No. 3906, the top 10 ranked similar image No. are 456 3168 3866 204 765 2097 982 1845 4262 2134


Processing query part 15: 5000it [00:10, 483.47it/s]
16it [04:34, 16.43s/it]

retrieval time for query 4354 is 18.761903524398804s
For query image No. 4354, the top 10 ranked similar image No. are 2 67 1392 444 4047 183 4323 2998 1818 827


Processing query part 16: 5000it [00:10, 491.40it/s]
17it [04:53, 17.27s/it]

retrieval time for query 4445 is 19.222631216049194s
For query image No. 4445, the top 10 ranked similar image No. are 649 1710 4473 1900 2007 2722 2987 819 4792 1350


Processing query part 17: 5000it [00:10, 498.72it/s]
18it [05:11, 17.53s/it]

retrieval time for query 4716 is 18.149714469909668s
For query image No. 4716, the top 10 ranked similar image No. are 4286 4184 1884 2807 4332 1459 654 691 3345 465


Processing query part 18: 5000it [00:09, 505.57it/s]
19it [05:29, 17.66s/it]

retrieval time for query 4929 is 17.95368456840515s
For query image No. 4929, the top 10 ranked similar image No. are 4080 3661 2238 3945 123 2138 4964 655 3749 4358


Processing query part 19: 5000it [00:06, 815.22it/s]
20it [05:43, 17.16s/it]

retrieval time for query 776 is 13.771233320236206s
For query image No. 776, the top 10 ranked similar image No. are 110 1583 3219 504 2292 4456 1032 2427 647 3460





In [4]:
# write the output file following the example
f=open(r'./rank_list_SIFT.txt','w')
for i in range(num_query):
    f.write('Q'+str(i+1)+': ')
    for j in range(len(name_gallery)):
        f.write(str(np.int32(record_all[i,j]))+' ')
    f.write('\n')
f.close()