In [4]:
#Sina Gholami
#MIRCV 2021
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
#To support GPU
!pip install opencv-python==4.4.0.46
!pip install whoosh

In [5]:
import cv2
import numpy as np
import os
import operator

from whoosh import index
from whoosh.fields import *
from whoosh.analysis import *
from whoosh import qparser

# for showing images in the cell outputs (Jupyter Notebooks / Google Colab)
from IPython.display import display
from ipywidgets import Image

from tqdm.notebook import tqdm

np.random.seed(42)  # makes the random pivot choice reproducible

BASE_DIR = '/content/gdrive/My Drive/mircv2021'
DEEP_PROTO = BASE_DIR + '/data/caffe/train_val.prototxt'
DEEP_MODEL = BASE_DIR + '/data/caffe/bvlc_reference_caffenet.caffemodel'
SRC_FOLDER = BASE_DIR + '/data/coco_img'
OUT_FOLDER = BASE_DIR + '/out'
WHOOSH_FOLDER = OUT_FOLDER + '/whoosh'
PIVOTS_FILE = OUT_FOLDER + '/pivots.txt.gz'
PIVOTS_ID_FILE = OUT_FOLDER + '/pivot_ids.txt'

IMAGE_ID_FILE = OUT_FOLDER + '/image_ids.txt'
FEATURES_FILE = OUT_FOLDER + '/extracted_features.txt.gz'

if not os.path.exists(OUT_FOLDER):
    os.mkdir(OUT_FOLDER)
if not os.path.exists(WHOOSH_FOLDER):
    os.mkdir(WHOOSH_FOLDER)

DEEP_LAYER = 'relu7'
SIZE = (227, 227)
MEAN_VALUES = (104, 117, 123)  # BGR

NUM_PIVOTS = 100
INDEX_FIELD = 'deep'
KX = 10
KQ = 10

K = 8

In [6]:
class DNNExtractor:    
    
    def __init__(self, net_proto_path, trained_model_path, size, mean_values=None):
        self.size = size
        self.mean_values = mean_values

        self.net = cv2.dnn.readNetFromCaffe(net_proto_path, trained_model_path)
        # to enable GPU (this won't work on Colab without recompiling opencv)
        self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
        self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
        
    
    def extract(self, img_file, layer, normalize=False):
        img = cv2.imread(img_file)
        blob = cv2.dnn.blobFromImage(img, 1.0, self.size, self.mean_values, swapRB=False, crop=False)
        self.net.setInput(blob)
        prob = self.net.forward(layer).flatten()

        if normalize:
            prob /= np.linalg.norm(prob)

        return prob
    
# it creates an instance of the DNNExtractor class
dnn = DNNExtractor(DEEP_PROTO, DEEP_MODEL, SIZE, MEAN_VALUES)

In [7]:
def extract_features(img_folder):
    # get the list of image filenames
    filenames = [f for f in os.listdir(img_folder)]

    descriptors = [dnn.extract(os.path.join(SRC_FOLDER, f),
                               DEEP_LAYER, normalize=True) for f in tqdm(filenames)]

    descriptors = np.array(descriptors)
    filenames = np.array(filenames)
    
    return descriptors, filenames

# compute and cache features
if not os.path.exists(FEATURES_FILE):
    descriptors, ids = extract_features(SRC_FOLDER)
    np.savetxt(IMAGE_ID_FILE, ids, fmt='%s')
    np.savetxt(FEATURES_FILE, descriptors)

ids = np.loadtxt(IMAGE_ID_FILE, dtype='str')
descriptors = np.loadtxt(FEATURES_FILE)

In [42]:
#TODO: STEP 0
#make pivots
# get NUM_PIVOTS integers between 0 and len(descriptors)-1 without repetitions

# # use the pivot array indices as pivot ids
if not os.path.exists(PIVOTS_FILE):
    pivots = np.random.choice(len(descriptors)-1, NUM_PIVOTS, replace=False)
    pivots = [descriptors[int(i)] for i in pivots]
    pivot_ids =  np.arange(NUM_PIVOTS)
    np.savetxt(PIVOTS_FILE, pivots)
    np.savetxt(PIVOTS_ID_FILE, pivot_ids)

100

In [95]:
#TODO: STEP 1

def search(queryF, dataset, ids, k):
    cos_similarities = np.dot(dataset, queryF)
    result = zip(cos_similarities, ids)
    res = sorted(result, reverse=True)[:k]
    return res

#use pivots to transform features to Surrogare Text Representation (STR)
def features_2_text(img_f, top_k):
  res = ""
  for rnk, id in search(img_f, pivots, pivot_ids, top_k):
    res += (str(int(id))+" ")*top_k 
    top_k -= 1
  return res.rstrip()

  #generate and return the STR of the img_f feature.
  #use the search function to retrieve the top_k nearest neighbor pivots
print(features_2_text(descriptors[0],KX))

54 54 54 54 54 54 54 54 54 54 92 92 92 92 92 92 92 92 92 32 32 32 32 32 32 32 32 36 36 36 36 36 36 36 49 49 49 49 49 49 11 11 11 11 11 79 79 79 79 69 69 69 10 10 15


In [47]:
#TODO: STEP 2
schema = Schema(text=TEXT(analyzer=KeywordAnalyzer(), vector=True, stored=True), id=ID(stored=True))
#Whoosh indexing
#Initialize the Whoosh index (see Exercise1)
ix = index.create_in(WHOOSH_FOLDER, schema=schema)
print('creating ex3 index...')
writer = ix.writer()
for img, new_id in zip(descriptors, ids):
    writer.add_document(id=new_id, text=features_2_text(img, KX))
writer.commit()
#call features_2_text to transform the image features to STR
#index the STR of all images into the Whoosh index


creating ex3 index...


In [39]:
def display_image(filename, score=0):
    """ Displays an image and its corresponding score (optional)
        in Jupyter Notebook / Google Colab
    """
    filepath = os.path.join(SRC_FOLDER , filename)
    image = Image.from_file(filepath, width=300, height=400)
    print('{} - {:.3f}'.format(filename, score))  # :.3f = 3-decimal float
    display(image)


def display_results(results):
    """ Displays a set of results. Takes a list of (score, id) couples """
    for score, filename in results:
        display_image(filename, score)

In [None]:
#TODO: STEP 3

#Whoosh searching
img_query = SRC_FOLDER  + "/000000321557.jpg"
img = dnn.extract(img_query, DEEP_LAYER, normalize=True)
vector_query = features_2_text(img, KQ)
#Extract the feetures of img_query.
#Transform the features to STR
#Perform a Whoosh search and call display_results to show the search results
searcher = ix.searcher()
parser = qparser.QueryParser("text", ix.schema, group=qparser.OrGroup)
txt_parser = parser.parse(vector_query)
res = searcher.search(txt_parser)
display_results([(x.score, x["id"]) for x in res])



In [100]:
# Optional reordering task

#get the IDs of images found by whoosh in hit
#transforms IDs into index positions in the results_descriptors
#to get the corresponding image features
#reorder the features using the search and
#show the result
o_id = [x["id"] for x in res]
features = [descriptors[np.where(ids == id)][0] for id in o_id]
reordered = search(img, features, o_id, KQ)
print(reordered)
display_results(reordered)

[(1.0000001218082937, '000000321557.jpg'), (0.5208934502487856, '000000297147.jpg'), (0.5062005759392438, '000000408830.jpg'), (0.4978914701402334, '000000152120.jpg'), (0.4655091900300762, '000000291791.jpg'), (0.3991721766381607, '000000270066.jpg'), (0.34194796819449463, '000000266400.jpg'), (0.3362054643120129, '000000276284.jpg'), (0.303422363001117, '000000226662.jpg'), (0.30091009213183983, '000000456394.jpg')]
000000321557.jpg - 1.000


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xdb\x00C\x00\x01\x01\x01\x01\…

000000297147.jpg - 0.521


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xdb\x00C\x00\x01\x01\x01\x01\…

000000408830.jpg - 0.506


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xdb\x00C\x00\x01\x01\x01\x01\…

000000152120.jpg - 0.498


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xdb\x00C\x00\x01\x01\x01\x01\…

000000291791.jpg - 0.466


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xe2\x0cXICC_PROFILE\x00\x01\x…

000000270066.jpg - 0.399


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xdb\x00C\x00\x01\x01\x01\x01\…

000000266400.jpg - 0.342


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xdb\x00C\x00\x01\x01\x01\x01\…

000000276284.jpg - 0.336


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00d\x00d\x00\x00\xff\xe2\x0cXICC_PROFILE\x00\x01\x…

000000226662.jpg - 0.303


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xdb\x00C\x00\x01\x01\x01\x01\…

000000456394.jpg - 0.301


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xe2\x0cXICC_PROFILE\x00\x01\x…