In [104]:
import numpy as np 
import cv2 
import pandas as pd
import glob
import time 
from tqdm import tqdm
from sklearn.metrics.pairwise import cosine_similarity
import torch
import torchvision.models as models
import torchvision.transforms as transforms

In [105]:
download_path='C:/Users/IV/Desktop/CityU' # change to your own download path
path_query=download_path+'/query_4186'
path_query_txt=download_path+'/query_txt_4186'

# path_query_txt is the directory to the bounding box information of the instance(s) for the query images
path_gallery=download_path+'/gallery_4186'

name_query=glob.glob(path_query+'/*.jpg')
num_query=len(name_query)
name_gallery=glob.glob(path_gallery+'/*.jpg')
num_gallery=len(name_gallery)

model = models.efficientnet_b2(pretrained=True)
feat_extractor = model.features #define the feature extractor
layer1 = model.features[:-1]
feat_extractor.eval()  #set the mode as evaluation

record_all=np.zeros((num_query,len(name_gallery)))

query_imgs_no = [x.split('\\')[-1] for x in glob.glob(path_query+'/*.jpg')]
query_imgs_no = [x[:-4] for x in query_imgs_no]

gallery_imgs_no = [x.split('\\')[-1] for x in glob.glob(path_gallery+'/*.jpg')]
gallery_imgs_no = [x[:-4] for x in gallery_imgs_no]

gallery_feature_1 = []
gallery_feature_2 = []
gallery_feature_3 = []

for i, gallery_img_no in tqdm(enumerate(gallery_imgs_no)):
    per_gallery_name = path_gallery+'/'+str(gallery_img_no)+'.jpg'
    per_gallery=cv2.imread(per_gallery_name)
    per_gallery = cv2.cvtColor(per_gallery, cv2.COLOR_BGR2RGB)
    per_gallery = cv2.resize(per_gallery, (256, 256), interpolation=cv2.INTER_LINEAR)
    # Crop the central 224x224 region
    h, w = per_gallery.shape[:2]
    crop_size = 224
    h_start = (h - crop_size) // 2
    w_start = (w - crop_size) // 2
    per_gallery = per_gallery[h_start:h_start+crop_size, w_start:w_start+crop_size]
    # feature extraction for per gallery
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
    img_transform = transform(per_gallery) #normalize the input image and transform it to tensor.
    img_transform = torch.unsqueeze(img_transform, 0) #Set batchsize as 1. You can enlarge the batchsize to accelerate.

    # initialize the weights pretrained on the ImageNet dataset, you can also use other backbones (e.g. ResNet, XceptionNet, AlexNet, ...)
    # and extract features from more than one layer.
    feats_1 = layer1(img_transform) # extract feature
    feats_2 = model(img_transform) # extract feature
    feats_np_1 = feats_1.cpu().detach().numpy() # convert tensor to numpy
    feats_np_2 = feats_2.cpu().detach().numpy()# convert tensor to numpy

    gallery_feature_1.append(feats_np_1.reshape(1, -1))
    gallery_feature_2.append(feats_np_2.reshape(1, -1))


111it [00:26,  4.23it/s]


KeyboardInterrupt: 

In [None]:
query_feature_1 = []
query_feature_2 = []
for i, query_img_no in tqdm(enumerate(query_imgs_no[0:20])):
    time_s = time.time()
    dist_record=[]
    per_query_name=path_query+'/'+str(query_img_no)+'.jpg'
    per_query=cv2.imread(per_query_name)
    gallery_imgs_no_desc=[]
    
    # read boundary from text file
    queryfilename = path_query_txt+'/'+str(query_img_no)+'.txt'
    boundary_file = open(queryfilename, 'r')
    boundary = boundary_file.readline().strip().split(' ')
    boundary = [int(b) for b in boundary]
    boundary_file.close()
    
    # crop the image
    x ,y, w, h = boundary
    query_boundary = per_query[y:y+h, x:x+w]
    
    # per_gallery = cv2.GaussianBlur(per_gallery,(3,3),0)
    # query_boundary = cv2.cvtColor(query_boundary,cv2.COLOR_BGR2YUV)
    # query_boundary[:,:,0] = cv2.equalizeHist(query_boundary[:,:,0])
    # query_boundary = cv2.cvtColor(query_boundary,cv2.COLOR_YUV2BGR)
    # # feature extraction for per gallery
    # query_boundary = cv2.resize(query_boundary, (224, 224), interpolation=cv2.INTER_CUBIC)
    query_boundary = cv2.cvtColor(query_boundary, cv2.COLOR_BGR2RGB)
    query_boundary = cv2.resize(query_boundary, (256, 256), interpolation=cv2.INTER_LINEAR)
    # Crop the central 224x224 region
    h, w = query_boundary.shape[:2]
    crop_size = 224
    h_start = (h - crop_size) // 2
    w_start = (w - crop_size) // 2
    query_boundary = query_boundary[h_start:h_start+crop_size, w_start:w_start+crop_size]
    # feature extraction for per gallery
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
    img_transform = transform(query_boundary) #normalize the input image and transform it to tensor.
    img_transform = torch.unsqueeze(img_transform, 0) #Set batchsize as 1. You can enlarge the batchsize to accelerate.

    # initialize the weights pretrained on the ImageNet dataset, you can also use other backbones (e.g. ResNet, XceptionNet, AlexNet, ...)
    # and extract features from more than one layer.
    feats_1 = layer1(img_transform) # extract feature
    feats_2 = model(img_transform) # extract feature
    feats_np_1 = feats_1.cpu().detach().numpy() # convert tensor to numpy
    feats_np_2 = feats_2.cpu().detach().numpy() # convert tensor to numpy

    query_feature_1=feats_np_1.reshape(1, -1)
    query_feature_2=feats_np_2.reshape(1, -1)

    # the iteration loop for gallery
    for j, gallery_img_no in tqdm(enumerate(gallery_imgs_no), desc=f"Processing query part {i}"):
        # print(np.squeeze(gallery_feature[j]))
        # # Compute similarity score for each match
        # print('query_feature_1',query_feature_1.shape)
        # print('gallery_feature_1',gallery_feature_1[j].shape)
        sim_score1 = cosine_similarity(query_feature_1, gallery_feature_1[j]) 
        sim_score2 = cosine_similarity(query_feature_2, gallery_feature_2[j])
        sim_score = sim_score1 + sim_score2
        dist_record.append(sim_score)
        # print(sim_score)
        # find the indexes with descending similarity order
    ascend_index=sorted(range(len(dist_record)), key=lambda k: np.max(dist_record[k]),reverse=True)
    # update the results for one query
    for k in range(len(ascend_index)):
        gallery_imgs_no_desc.append(np.array(gallery_imgs_no)[ascend_index[k]])
    record_all[i,:]= gallery_imgs_no_desc
    time_e = time.time()
    # sorted(dist_record)
    print('retrieval time for query {} is {}s'.format(query_img_no, time_e-time_s))
    query_idx = i
    print(f'For query image No. {query_imgs_no[query_idx]}, the top ranked similar image No. is {gallery_imgs_no_desc[0]}.')

In [None]:
# write the output file following the example
f=open(r'./rank_list_CNN.txt','w')
for i in range(num_query):
    f.write('Q'+str(i+1)+': ')
    for j in range(len(name_gallery)):
        f.write(str(np.int32(record_all[i,j]))+' ')
    f.write('\n')
f.close()