# Project Course - Foundations of Information Retrieval

## Group 25

## Contributors:
- Elia Cunegatti e.cunegatti@student.utwente.nl 
- Ruben Popper r.popper@student.utwente.nl

## Info:
All descriptors (SIFT, SURF, HOG, ORB) and features representation (VGG-16, RESNET-34, RESNET-152) can be dowloaded at the following link : https://drive.google.com/drive/folders/1v9lAjoEzJQSnCGZ3U1nzyqeSt8jYcG_q?usp=sharing

**Remark:** Please notice that the following Jupyter Notebook, although it allows to reproduce all the results contained in the report, it does not print all the results simultaneously. Due to the large computational time required to run all the experiment, we relied on different machines, notebooks and Google Collab sessions, as well as on a university-provided external server (Azure Machine), where the code was run with a classical .py script.


In [None]:
## Basic packages
import os
import json
import h5py
import tqdm
import pickle
import itertools
import numpy as np
import pylab as pl
import numpy as np
import matplotlib.pyplot as plt

#Packages traditional CV algorithms 
import cv2
'''
please install opencv with the following pip command in order to use SIFT and SURF Algorithms
>> pip install opencv-contrib-python==3.4.2.17
'''
import skimage
from skimage.color import rgb2gray
from skimage.feature import ORB, hog
from skimage.transform import resize

## Cluster packages
from sklearn.cluster import KMeans
from yellowbrick.cluster import KElbowVisualizer

## Normalization and Distance packages
from sklearn import preprocessing
from scipy.spatial import distance

## CNN packages
import torch
from torch import optim, nn
from torchvision import models, transforms



# Load Data

In [None]:
#load map images
with open("data02/database/database_lite.json","r") as f:
    m_idx = json.load(f)
    m_imgs = np.array(m_idx["im_paths"])
    m_loc=np.array(m_idx["loc"])

#load query images
with open("data02/query/query_lite.json","r") as f:
    q_idx=json.load(f)
    q_imgs=np.array(q_idx["im_paths"])
    q_loc=np.array(q_idx["loc"])

# loading the relevance judgements
with h5py.File("drive/MyDrive/data02/london_lite_gt.h5","r") as f:
    fovs = f["fov"][:]
    sim = f["sim"][:].astype(np.uint8)

# Traditional CV Algorithms 
In the next cells, code is available to extract the descriptors with the Computer Vision algorithms we selected. 
Descriptors are also available in .bin files for each descriptor at the following link: https://drive.google.com/drive/folders/1v9lAjoEzJQSnCGZ3U1nzyqeSt8jYcG_q?usp=sharing.

You can simply download them and upload them later without having to re-run all the code.

## ORB

In [None]:
descriptor_extractor = ORB(n_features=50)
descriptors = None

for img_m in m_imgs:
    img = plt.imread(os.path.join('data02/', img_m))
    img = rgb2gray(img)
  
    _,descriptors_img = descriptor_extractor.descriptors  
    
   
    if descriptors is None:
        descriptors = descriptors_img
    else:
        descriptors = np.vstack( (descriptors, descriptors_img))
    break
print(descriptors.shape)

In [None]:
#SAVE DESCRIPTORS
#f = open('data02/ORB-descriptors-map.bin', 'wb')
#data = pickle.dump(descriptors, f)
#f.close()

#LOAD DESCRIPTORS
descriptor_extractor = ORB(n_features=50)
f = open('data02/ORB-descriptors-map.bin', 'rb')
descriptors = pickle.load(f)
f.close()
print(descriptors.shape)

## SIFT

In [None]:
sift = cv2.xfeatures2d.SIFT_create(50)
descriptors = None

for img_m in m_imgs:
    img = cv2.imread(os.path.join('data02/', img_m),cv2.IMREAD_GRAYSCALE)    
    keypoints_sift, descriptors_img = sift.detectAndCompute(img, None)
    if descriptors is None:
        descriptors = descriptors_img
    else:
        descriptors = np.vstack((descriptors, descriptors_img))
print(descriptors.shape)

In [None]:
#SAVE DESCRIPTORS
#f = open('data02/SIFT-descriptors-map.bin', 'wb')
#data = pickle.dump(descriptors, f)
#f.close()

#LOAD DESCRIPTORS
sift = cv2.xfeatures2d.SIFT_create(50)
f = open('data02/SIFT-descriptors-map.bin', 'rb')
descriptors = pickle.load(f)
f.close()
print(descriptors.shape)

## SURF

In [None]:
surf = cv2.xfeatures2d.SURF_create()
descriptors = None

for img_m in m_imgs:
    img = cv2.imread(os.path.join('drive/MyDrive/data02/', img_m),cv2.IMREAD_GRAYSCALE)
    _, descriptors_img = surf.detectAndCompute(img, None)
    if descriptors is None:
        descriptors = descriptors_img
    else:
        descriptors = np.vstack( (descriptors, descriptors_img))
    break
print(descriptors.shape)

In [None]:
#SAVE DESCRIPTORS
#f = open('data02/SURF-descriptors-map.bin', 'wb')
#data = pickle.dump(descriptors, f)
#f.close()

#LOAD DESCRIPTORS
surf = cv2.xfeatures2d.SURF_create()
f = open('data02/SURF-descriptors-map.bin', 'rb')
descriptors = pickle.load(f)
f.close()
print(descriptors.shape)

## HOG

In [None]:
descriptors = None
for img_m in m_imgs:
    img = plt.imread(os.path.join('drive/MyDrive/data02/', img_m))
    resized_img = resize(img, (128*2, 64*2))
    _, descriptors_img = hog(resized_img, orientations=9, pixels_per_cell=(8, 8), 
          cells_per_block=(2, 2), visualize=True, multichannel=True)
    if descriptors is None:
        descriptors = descriptors_img
    else:
        descriptors = np.vstack( (descriptors, descriptors_img))
print(descriptors.shape)

In [None]:
#SAVE DESCRIPTORS
#f = open('data02/HOG-descriptors-map.bin', 'wb')
#data = pickle.dump(descriptors, f)
#f.close()

#LOAD DESCRIPTORS
f = open('data02/HOG-descriptors-map.bin', 'rb')
descriptors = pickle.load(f)
f.close()
print(descriptors.shape)

# Clustering

First of all find the correct number of cluster to use based on your selected descriptors

In [None]:
model = KMeans(random_state=42, n_init=2, verbose=1)
K= [x for x in range(5,200,10)]
visualizer = KElbowVisualizer(model, k= K, timings= True)
visualizer.fit(descriptors)        
visualizer.show()     

Once you find the correct value of K please change the variable k below

In [None]:
import sklearn
from sklearn.cluster import KMeans
# clustering

algorithm = "ORB"
algorithm = "SIFT"
algorithm = "SURF"
algorithm = "HOG"

if algorithm = "HOG":
    K = 55 
else:
    K = 45
    
num_initialization = 5 
kmeans = KMeans(n_clusters=K, random_state=42, n_init=num_initialization, verbose=1)
clusters = kmeans.fit(descriptors)
centroids = clusters.cluster_centers_
print("Shape of the centroids matrix: ", centroids.shape)

## BOW Vector

In [None]:
def bag_of_words(centroids, img_descriptors, key):
    n_centroids = centroids.shape[0] 
    n_descriptors = img_descriptors.shape[0]
    bow_vector = np.zeros(n_centroids)  
    for i in range(n_descriptors):
        l = []
        if key == 'cosine':
        for j in range(n_centroids):
            l.append(distance.cosine(img_descriptors[i], centroids[j]))
        elif key = 'euclidean':
            l.append(distance.euclidean(img_descriptors[i], centroids[j]))
        elif key = 'minkowski':
            l.append(distance.minkowski(img_descriptors[i], centroids[j]))
        elif key = 'manhattan':
            l.append(distance.cityblock(img_descriptors[i], centroids[j]))  
        dist = min(l)
        c = l.index(dist)
        bow_vector[c] += 1

    return bow_vector

Please now select a distance metric.

In [None]:
#SELECT YOUR DISTANCE!

dist = 'cosine'
#dist = 'euclidean'
#dist = 'minkowski'
#dist = 'manhattan'

bow_map_images = None
for img_name in tqdm(m_imgs):
    
    if algorithm == 'ORB':
        img = plt.imread(os.path.join('drive/MyDrive/data02/', img_name))
        img = rgb2gray(img)
        descriptor_extractor.detect_and_extract(img)  
         _,descriptors_img = descriptor_extractor.descriptors
    
    elif algorithm = 'SIFT':
        img = cv2.imread(os.path.join('drive/MyDrive/data02/', img_name),cv2.IMREAD_GRAYSCALE)    
        _, descriptors_img = sift.detectAndCompute(img, None)
    
    elif algorithm = 'SURF':
        img = cv2.imread(os.path.join('drive/MyDrive/data02/', img_name),cv2.IMREAD_GRAYSCALE)    
        _, descriptors_img = surf.detectAndCompute(img, None)
    
    elif algorithm = 'HOG':
         img = plt.imread(os.path.join('drive/MyDrive/data02/',img_name))
        resized_img = resize(img, (128*2, 64*2))

        _, descriptors_img = hog(resized_img, orientations=9, pixels_per_cell=(8, 8), 
                  cells_per_block=(2, 2), visualize=True, multichannel=True) 
    
    bow = bag_of_words(centroids,descriptors_img, dist)
    if bow_map_images is None:
        bow_map_images = bow
    else:
        bow_map_images = np.vstack( (bow_map_images, bow))

print(bow_map_images.shape)

In [None]:
from sklearn import preprocessing

scaler = preprocessing.StandardScaler().fit(bow_map_images)
bow_map_images = scaler.transform(bow_map_images)

In [None]:
## RETRIEVED IMAGES' SIMILARITIES

def retrieve_images(map_bow_vectors, query_bow, key):
    n_map_bow_vectors = map_bow_vectors.shape[0]
    bow_distances = np.zeros(n_map_bow_vectors)
    most_similar = None
    l = []
    index = []
    for i in range(n_map_bow_vectors):
        if key == 'cosine':
            l.append(distance.cosine(query_bow,map_bow_vectors[i]))
            index.append(i)
        elif key == 'euclidian':
            l.append(distance.euclidean(query_bow,map_bow_vectors[i]))
            index.append(i)
        elif key == 'minkowski':
            l.append(distance.minkowski(query_bow,map_bow_vectors[i],p=1))
            index.append(i)            
        elif key == 'manhattan':
            l.append(distance.cityblock(query_bow,map_bow_vectors[i]))
            index.append(i)
            
    import pandas as pd
    df = pd.DataFrame()
    df["index"] = index
    df["value"] = l
    df = df.sort_values(by='value', ascending=True)
    most_similar = list(df["index"])
    
    return most_similar 

In [None]:
query = []

distance_all = [dist]
retrieved_all = []
relevant_all = []
for item in distance_all:
    retrieved = {}
    relevant = {}
    for i in range(len(q_imgs)):    
        if algorithm == 'ORB':
            img = plt.imread(os.path.join('/data02/', q_imgs[i]))
            img = rgb2gray(img)
            descriptor_extractor.detect_and_extract(img)  
             _,descriptors_img = descriptor_extractor.descriptors

        elif algorithm = 'SIFT':
            img = cv2.imread(os.path.join('data02/', q_imgs[i]),cv2.IMREAD_GRAYSCALE)    
            _, descriptors_img = sift.detectAndCompute(img, None)

        elif algorithm = 'SURF':
            img = cv2.imread(os.path.join('data02/', q_imgs[i]),cv2.IMREAD_GRAYSCALE)    
            _, descriptors_img = surf.detectAndCompute(img, None)

        elif algorithm = 'HOG':
             img = plt.imread(os.path.join('data02/',q_imgs[i]))
            resized_img = resize(img, (128*2, 64*2))

            _, descriptors_img = hog(resized_img, orientations=9, pixels_per_cell=(8, 8), 
                      cells_per_block=(2, 2), visualize=True, multichannel=True) 
    
    
        bow = bag_of_words(centroids, descriptors_img,dist)
        bow = scaler.transform(bow.reshape(-1, 1).transpose())
        bow = bow.transpose().reshape(-1)
        retrieved_images = retrieve_images(bow_map_images, bow ,item)
        relevant_images = np.where(sim[i, :] == 1)[0]
        relevant[i] = list(relevant_images)
        retrieved[i] = retrieved_images
    retrieved_all.append(retrieved)
    relevant_all.append(relevant)

## Measure performance Functions



In [None]:
def average_precision(relevant, retrieved):
    pp = []
    for i in range(len(retrieved)):
        ret = retrieved[:i+1]
        if retrieved[i] in relevant:
            numerator = list(set(relevant) & set(ret))
            precision = len(numerator) / len(ret)
            pp.append(precision)
    k_sum = 0
    for item in pp:
        k_sum = k_sum + item
    if len(pp) > 0:
        return float(k_sum/len(pp)) 
    else:
        return 0

    def mean_average_precision(all_relevant, all_retrieved):
    pp = []    
    for key, value in all_retrieved.items():
        list_retrieved = list(value)        
        list_relevant = all_relevant[key]
        pp.append(average_precision(list_relevant, list_retrieved))
    k_sum = 0
    for item in pp:
        k_sum = k_sum + item
    return float(k_sum/len(pp))

def mean_average_precision_at_k(all_relevant, all_retrieved,k):
    pp = []
    for key, value in all_retrieved.items():
        list_retrieved = list(value)[:k]  
        list_relevant = all_relevant[key]
        pp.append(average_precision(list_relevant, list_retrieved))
    k_sum = 0
    for item in pp:
        k_sum = k_sum + item
    return float(k_sum/len(pp))


def top_recall_at_k(all_relevant, all_retrieved,k):
    numerator = 0
    for i in range(len(all_relevant)):
        if len(list(set.intersection(set(all_relevant[i]), set(all_retrieved[i][:k])))) > 0:
            numerator += 1  
    return numerator/len(all_relevant)

## Print Performance 

In [None]:
i = 0
for item in distance_all:
    print('Distance Metric --> {}'.format(item))
    mapr = mean_average_precision(relevant_all[i],retrieved_all[i])
    print('Mean Average Precision (MAP) with distance {0} --> {1}'.format(item,round(mapr,3)))
    k_list = [1,5,10,50,100,200]
    for item in k_list:
        top = top_recall_at_k(relevant_all[i],retrieved_all[i],item)
        print('Top-Recall-at-k with k {0} --> {2}'.format(item,round(top,3)))
        mapr = mean_average_precision_at_k(relevant_all[i],retrieved_all[i],item)
        print('MAP-at-k {0} --> {1}'.format(item,round(mapr,3)))
    print("\n")
    i += 1

# CNN

Please comment/uncomment the lines below in order to select one of the three models.

In [None]:
model = 'vgg-16'
#model = 'resnet-34'
#model = 'resnet-152'

In [None]:
if model == 'vgg-16':
    class FeatureExtractor(nn.Module):
          def __init__(self, model):
            super(FeatureExtractor, self).__init__()
            self.features = list(model.features)
            self.features = nn.Sequential(*self.features)
            self.pooling = model.avgpool
            self.flatten = nn.Flatten()
            self.fc = model.classifier[0]

            def forward(self, x):
                out = self.features(x)
                out = self.pooling(out)
                out = self.flatten(out)
                out = self.fc(out) 
                return out 
   
    model = models.vgg16(pretrained=True)
    new_model = FeatureExtractor(model)
    device = torch.device("cuda")
    new_model = new_model.to(device)

elif model == 'resnet-34':
    model = models.resnet34(pretrained=True)
    new_model = torch.nn.Sequential(*(list(model.children())[:-1]))

    device = torch.device("cuda")
    new_model = new_model.to(device)
    
elif model == 'resnet-152':
    model = models.resnet152(pretrained=True)
    new_model = torch.nn.Sequential(*(list(model.children())[:-1]))
    device = torch.device("cuda")
    new_model = new_model.to(device)

You can skip this cell if you want to use the features already extracted.

In [None]:
transform = transforms.Compose([
  transforms.ToPILImage(),
  transforms.CenterCrop(512),
  transforms.Resize(448),
  transforms.ToTensor()                              
])

features = []

for img_m in m_imgs:
    img = cv2.imread(os.path.join('drive/MyDrive/data02/', img_m),cv2.IMREAD_UNCHANGED)
    img = transform(img)
    img = img.reshape(1, 3, 448, 448)
    img = img.to(device)
    with torch.no_grad():
        feature = new_model(img)
    features.append(feature.cpu().detach().numpy().reshape(-1))

features = np.array(features)

In [None]:
#SAVE DESCRIPTORS
#f = open('data02/Model-descriptors-map.bin', 'wb')
#data = pickle.dump(descriptors, f)
#f.close()

#LOAD DESCRIPTORS
if model == 'vgg-16':
    f = open('data02/VGG-16-descriptors-map.bin', 'rb')
    descriptors = pickle.load(f)
    f.close()
elif model == 'resnet-34':
    f = open('data02/RESNET-34-descriptors-map.bin', 'rb')
    descriptors = pickle.load(f)
    f.close()
    
elif model == 'resnet-152':
    f = open('data02/RESNET-152-descriptors-map.bin', 'rb')
    descriptors = pickle.load(f)
    f.close()
print(descriptors.shape)

In [None]:
scaler = preprocessing.StandardScaler().fit(features)
features = scaler.transform(features)
print(features.shape)

In [None]:

distance_all = ["cosine","minkowski","euclidian","manhattan"]
retrieved_all = []
relevant_all = []
for item in distance_all:
    retrieved = {}
    relevant = {}
    for i in range(0,500):
        img = cv2.imread(os.path.join('drive/MyDrive/data02/',q_imgs[i]),cv2.IMREAD_UNCHANGED)
        img = transform(img)
        img = img.reshape(1, 3, 448, 448)
        img = img.to(device)
        with torch.no_grad():
            img_feature = new_model(img)

        img_feature = (img_feature.cpu().detach().numpy().reshape(-1))
        img_feature = np.array(img_feature)
        
        img_feature = scaler.transform(img_feature.reshape(-1, 1).transpose())
        img_feature = img_feature.transpose().reshape(-1)
        retrieved_images = retrieve_images(features,img_feature,item)
        relevant_images = np.where(sim[i, :] == 1)[0]
        relevant[i] = list(relevant_images)
        retrieved[i] = retrieved_images
    retrieved_all.append(retrieved)
    relevant_all.append(relevant)

In [None]:
i = 0
for item in distance_all:
    print('Distance Metric --> {}'.format(item))
    mapr = mean_average_precision(relevant_all[i],retrieved_all[i])
    print('Mean Average Precision (MAP) with distance {0} --> {1}'.format(item,round(mapr,3)))
    k_list = [1,5,10,50,100,200]
    for item in k_list:
        top = top_recall_at_k(relevant_all[i],retrieved_all[i],item)
        print('Top-Recall-at-k with k {0} --> {2}'.format(item,round(top,3)))
        mapr = mean_average_precision_at_k(relevant_all[i],retrieved_all[i],item)
        print('MAP-at-k {0} --> {1}'.format(item,round(mapr,3)))
    print("\n")
    i += 1