In [12]:
import numpy as np

In [55]:
def populate_image_id_to_semantic_database(model, normalize, image_id_to_features):
    """ Populates a database dictionary that maps image ids to their 
    normalized semantic features. 
    
        Parameters
        ----------
        model : function
            given a set of image features, return its semantic features
            
        normalize : function
            returns the normalized semantic features
            
        image_id_to_features : dictionary
            maps image ids to image features 
                    
        Returns
        -------
        A database dictionary that maps image ids to normalized semantic 
        features. """
    
    return {id:normalize(model(features)) for id, features in image_id_to_features.items()}

In [56]:
def test_model(features):
    M = np.array([[0, 1],
                  [2, 0],
                  [1, 6]])
    return np.matmul(features, M)

In [57]:
def test_normalize(features):
    return features / np.linalg.norm(features)

In [58]:
test_features = np.array([[[5., 7., 6.], 
                           [7., 9., 9.]],
                          
                          [[9., 4., 5.],
                           [3., 6., 5.]]])

test_id_to_features = {0:test_features[0], 1:test_features[1]}

test_database = {0:test_normalize(test_model(test_features[0])),
                 1:test_normalize(test_model(test_features[1]))}

for test_id in range(len(test_database)):
    assert np.all(test_database[test_id] == populate_id_to_semantic_database(test_model, test_normalize, test_id_to_features)[test_id])

In [62]:
from sklearn.metrics.pairwise import cosine_similarity

def search_database(query_vec, image_id_to_semantic_database, k):
    """ Given a shape (50,) representative vector for a query, return
    the top k image ids of images matching the query.
    
        Parameters
        ----------
        query_vec : nd.array
            normalized semantic features of a query
            
        image_id_to_semantic_database : dictionary
            maps image ids to normalized semantic features
            
        k : scalar
            determines how many of the top image ids should be returned 
                    
        Returns
        -------
        An array of shape (k,) containing the image ids of the top k 
        images. """
    
    top_k_cos_sim = np.zeros(k)
    top_k_id = np.zeros(k)
    
    for id, semantic in image_id_to_semantic_database.items():
        cos_sim = cosine_similarity(query_vec.reshape(1, -1), semantic.reshape(1, -1))[0][0]
        if cos_sim > np.min(top_k_cos_sim):
            new_i = np.argmin(top_k_cos_sim)
            top_k_cos_sim[new_i] = cos_sim
            top_k_id[new_i] = id
            
    # sort in descending order 1 -> 0
    sort_i = np.argsort(top_k_cos_sim)[::-1]
    return top_k_id[sort_i]

In [61]:
# this test is not with normalized semantic features for either the query or the image features

test_query_vec = np.array([6, 2, 2, 5, 0])

test_image_id_to_semantic_database = {0:np.array([6, 1, 4, 3, 3]), 
                                      1:np.array([8, 7, 6, 3, 2]), 
                                      2:np.array([0, 0, 2, 9, 9]), 
                                      3:np.array([7, 8, 6, 0, 9]), 
                                      4:np.array([3, 2, 4, 3, 0])}

test_top_k = np.array([4, 0, 1, 3, 2])

assert np.all(test_top_k == search_database(test_query_vec, test_image_id_to_semantic_database, k = test_query_vec.shape[0]))

In [17]:
from IPython.display import Image
from IPython.display import display

def display_images(ids, image_id_to_url):
    """ Displays images corresponding to the specified ids within 
    a jupyter notebook cell. 
    
        Parameters
        ----------
        ids : nd.array
            the ids corresponding to the images to be displayed
            
        image_id_to_url : dictionary
            maps image ids to url """
    imgs = []
    
    for i, id in enumerate(ids):
        imgs.append(Image(url = image_id_to_url[id]))
        
    display(*imgs)
    return

In [21]:
test_ids = np.array([1])

test_image_id_to_url = {0:"https://upload.wikimedia.org/wikipedia/commons/thumb/3/31/Ice_Cream_dessert_02.jpg/220px-Ice_Cream_dessert_02.jpg",
                        1:"https://i.ytimg.com/vi/SfLV8hD7zX4/maxresdefault.jpg"}

display_images(test_ids, test_image_id_to_url)