In [13]:
import numpy as np 
import cv2 
import pandas as pd
import glob
import time 
from tqdm import tqdm
from sklearn.metrics.pairwise import cosine_similarity
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
import imutils

def pyramid(image, scale=1.5, minSize=(30, 30)):
	# yield the original image
	yield image
	# keep looping over the pyramid
	while True:
		# compute the new dimensions of the image and resize it
		w = int(image.shape[1] / scale)
		image = imutils.resize(image, width=w)
		# if the resized image does not meet the supplied minimum
		# size, then stop constructing the pyramid
		if image.shape[0] < minSize[1] or image.shape[1] < minSize[0]:
			break
		# yield the next image in the pyramid
		yield image
		
def sliding_window(image, stepSize, windowSize):
	# slide a window across the image
	for y in range(0, image.shape[0], stepSize):
		for x in range(0, image.shape[1], stepSize):
			# yield the current window
			yield (x, y, image[y:y + windowSize[1], x:x + windowSize[0]])

def extract_sift_des(image):
    sift = cv2.ORB_create()
    kp, des = sift.detectAndCompute(image, None)
    return des

def read_bounding_box(path):
    boundary_file = open(path, 'r')
    boundary = boundary_file.readline().strip().split(' ')
    boundary = [int(b) for b in boundary]
    boundary_file.close()
    return boundary

def img_preprocess(img):
    img = cv2.cvtColor(img,cv2.COLOR_BGR2YUV)
    img[:,:,0] = cv2.equalizeHist(img[:,:,0])
    img = cv2.cvtColor(img,cv2.COLOR_YUV2BGR)
    return img

def initialize(download_path):
    path_query=download_path+'/query_4186'
    path_query_txt=download_path+'/query_txt_4186'

    path_gallery=download_path+'/gallery_4186'

    name_query=glob.glob(path_query+'/*.jpg')
    num_query=len(name_query)
    name_gallery=glob.glob(path_gallery+'/*.jpg')
    num_gallery=len(name_gallery)
    record_all=np.zeros((num_query,len(name_gallery)))
    query_imgs_no = [x.split('\\')[-1] for x in glob.glob(path_query+'/*.jpg')]
    query_imgs_no = [x[:-4] for x in query_imgs_no]

    gallery_imgs_no = [x.split('\\')[-1] for x in glob.glob(path_gallery+'/*.jpg')]
    gallery_imgs_no = [x[:-4] for x in gallery_imgs_no]
    return path_gallery, path_query, path_query_txt, gallery_imgs_no, query_imgs_no, record_all, num_query, num_gallery

transform = transforms.Compose([
    transforms.AutoAugment(transforms.AutoAugmentPolicy.IMAGENET),
    transforms.Resize(260),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

In [19]:
path='C:/Users/IV/Desktop/CityU' 
path_gallery, path_query, path_query_txt, gallery_imgs_no, query_imgs_no, record_all, num_query, num_gallery = initialize(path)

model = models.mobilenet_v3_small(pretrained=True)
feat_extractor = model.features #define the feature extractor
layer1 = model.features[:-1]
feat_extractor.eval()  #set the mode as evaluation

gallery_features = [[] for i in range(num_gallery)]
gallery_des = [[] for i in range(num_gallery)]

for i, gallery_img_no in tqdm(enumerate(gallery_imgs_no)):
    per_gallery_name = path_gallery+'/'+str(gallery_img_no)+'.jpg'
    per_gallery=cv2.imread(per_gallery_name)
    
    # Image pre-processing
    per_gallery = img_preprocess(per_gallery)
    # Define the window size
    winW, winH = (375, 375)

    # loop over the image pyramid
    for resized in pyramid(per_gallery, scale=1.5):
        # loop over the sliding window for each layer of the pyramid
        for (x, y, window) in sliding_window(resized, stepSize=300, windowSize=(winW, winH)):
            # print(window.shape[0], window.shape[1])
            # if the window does not meet our desired window size, ignore it
            if window.shape[0] != winH or window.shape[1] != winW:
                continue
            # THIS IS WHERE YOU WOULD PROCESS YOUR WINDOW, SUCH AS APPLYING A
            # MACHINE LEARNING CLASSIFIER TO CLASSIFY THE CONTENTS OF THE
            # WINDOW
            # feature extraction for per gallery
            # window = resized.copy()
            # print(x, x + winW, y, y + winH )
            # window = window[x:x + winW, y:y + winH]
            gallery_sift = extract_sift_des(window)
            window = cv2.GaussianBlur(window,(3,3),0)
            #  Display the window (for visualization purposes)
            # cv2.imshow('Window 1', window)
            # cv2.waitKey(0)
            # cv2.destroyAllWindows()
            window = Image.fromarray(window)

            # preprocess the input image
            img_transform = transform(window) #normalize the input image and transform it to tensor.
            img_transform = torch.unsqueeze(img_transform, 0)
            
            # feature extraction for per gallery
            with torch.no_grad():
                per_gallery_features = model(img_transform)

            gallery_features[i].append(per_gallery_features)
            # print(gallery_features[i])
            gallery_des[i].append(gallery_sift)

0it [00:00, ?it/s]


AttributeError: 'list' object has no attribute 'shape'

In [None]:
query_features = []
query_des = []
for i, query_img_no in tqdm(enumerate(query_imgs_no[0:20])):
    per_query_name=path_query+'/'+str(query_img_no)+'.jpg'
    per_query=cv2.imread(per_query_name)
    
    # read boundary from text file
    queryfilename = path_query_txt+'/'+str(query_img_no)+'.txt'
    
    # crop the image
    boundary = read_bounding_box(queryfilename)
    x ,y, w, h = boundary
    query_boundary = per_query[y:y+h, x:x+w]

    # Image pre-processing
    query_boundary= img_preprocess(query_boundary)

    query_des.append(extract_sift_des(query_boundary))
    query_boundary = cv2.GaussianBlur(query_boundary,(3,3),0)
    query_boundary = Image.fromarray(query_boundary)
    query_transformed = transform(query_boundary) #normalize the input image and transform it to tensor.
    query_transformed = torch.unsqueeze(query_transformed, 0) 

    with torch.no_grad():
        query_features.append(model(query_transformed))

20it [00:01, 17.95it/s]


In [None]:
for i, query_img_no in tqdm(enumerate(query_imgs_no[0:20])):
    time_s = time.time()
    dist_record=[]
    gallery_imgs_no_desc=[]
    sim_scores1 = []
    sim_scores2 = []
    
    # the iteration loop for gallery
    for j, gallery_img_no in tqdm(enumerate(gallery_imgs_no), desc=f"Processing query part {i}"):
              
        for k in range(len(gallery_features[j])):
            print(gallery_features[j][100])
            # print(query_features[i])
            windows_score = cosine_similarity(query_features[i], gallery_features[j][k]) 
            sim_scores1.append(windows_score)
            
        sim_score1 = np.max(sim_scores1)

        for l in range(len(gallery_des[i])):
            windows_score = cosine_similarity(query_des[i], gallery_des[j][k])
            sim_scores2.append(windows_score)
            
        sim_score2 = np.max(sim_scores2)
        #Weighted sum of two similarity scores
        sim_score = sim_score1*0.7 + sim_score2*0.3
        dist_record.append(sim_score)
        
        # find the indexes with descending similarity order
        
    descend_index=sorted(range(len(dist_record)), key=lambda k: np.max(dist_record[k]),reverse=True)
    # update the results for one query
    for k in range(len(descend_index)):
        gallery_imgs_no_desc.append(np.array(gallery_imgs_no)[descend_index[k]])
    record_all[i,:]= gallery_imgs_no_desc
    time_e = time.time()
    print('retrieval time for query {} is {}s'.format(query_img_no, time_e-time_s))
    query_idx = i
    print(f'For query image No. {query_imgs_no[query_idx]}, the top 10 ranked similar image No. are {gallery_imgs_no_desc[0]} {gallery_imgs_no_desc[1]} {gallery_imgs_no_desc[2]} {gallery_imgs_no_desc[3]} {gallery_imgs_no_desc[4]} {gallery_imgs_no_desc[5]} {gallery_imgs_no_desc[6]} {gallery_imgs_no_desc[7]} {gallery_imgs_no_desc[8]} {gallery_imgs_no_desc[9] }')
    
    filename=path_query+'/'+str(query_imgs_no[query_idx])+'.jpg'
    image = mpimg.imread(filename)
    plt.imshow(image)
    plt.show()
    for x in range(10):
        filename=path_gallery+'/'+str(gallery_imgs_no_desc[x])+'.jpg'
        image = mpimg.imread(filename)
        plt.imshow(image)
        plt.show()
    plt.close('all')

Processing query part 0: 0it [00:00, ?it/s]
0it [00:00, ?it/s]

tensor([[-4.1036e+00, -5.3776e+00, -3.2872e+00, -1.8441e+00, -3.0720e+00,
         -7.3566e-01, -3.8245e+00, -3.6626e+00, -3.9892e+00, -5.1785e+00,
         -4.3913e+00, -1.5664e+00, -2.7287e+00, -4.1634e+00, -3.4532e+00,
         -2.6477e+00, -3.4065e+00, -2.9687e+00, -6.7316e-01, -3.5910e+00,
         -3.1067e+00, -4.7350e+00, -5.1325e+00, -4.3137e+00, -3.8978e+00,
         -2.6216e+00, -1.9447e+00, -3.6073e-01, -1.3029e+00, -8.3783e-01,
         -3.6318e+00, -2.2050e+00, -2.3726e+00, -3.6374e+00, -2.0821e+00,
          1.5588e-02, -3.0351e-01, -1.6993e+00, -1.3737e+00, -3.1323e+00,
         -1.2708e+00, -2.1017e+00, -2.5669e+00, -4.6881e+00, -1.3916e+00,
         -2.4663e+00, -2.6169e+00, -3.1456e+00, -3.8715e+00, -3.7064e+00,
         -3.0110e+00, -3.8862e+00, -1.9774e+00, -6.5517e-02, -3.0462e+00,
         -4.0544e+00, -1.6601e+00, -3.8910e+00, -2.6336e+00, -2.9915e+00,
         -1.9354e+00, -2.2606e+00, -2.8811e+00, -2.0748e+00, -3.1747e+00,
         -5.6941e-01, -2.8700e+00, -1.




ValueError: Expected 2D array, got scalar array instead:
array=0.0.
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [None]:
# write the output file following the example
f=open(r'./rank_list_CNN&SIFT.txt','w')
for i in range(num_query):
    f.write('Q'+str(i+1)+': ')
    for j in range(num_gallery):
        f.write(str(np.int32(record_all[i,j]))+' ')
    f.write('\n')
f.close()
