# IVF (Inverted File Index) - Similarity Search

* [Vector Index Basics and the Inverted File Index](https://zilliz.com/blog/vector-index)
* [Choosing the Right Vector Index for Your Project](https://zilliz.com/learn/choosing-right-vector-index-for-your-project)
* [Visual Embedding Vectors](https://github.com/zilliztech/feder)
* [Powering Semantic Similarity Search in Computer Vision with State of the Art Embeddings](https://zilliz.com/learn/embedding-generation)

In [3]:
import os
import sys
import shutil
import pandas as pd
import numpy as np

from PIL import Image
from random import randint
from tqdm import tqdm
from matplotlib import pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity

tqdm.pandas()

if os.path.exists("image_df.pkl"):
    image_df = pd.read_pickle("image_df.pkl")
else:
    print("RUN THE TUTORIAL 1_similarity_search.ipynb")

In [4]:
def plot_similar(df: pd.DataFrame, embedding_col: str, query_index: np.array, k_neighbors=5):
    """
    Helper function to take a dataframe index as input query 
    and display the k nearest neighbors
    """
    # Calculate pairwise cosine similarities between query and all rows
    similarities = cosine_similarity([df[embedding_col][query_index]],
                                     df[embedding_col].values.tolist())[0]
    # Find nearest neighbor indices
    k = k_neighbors + 1
    nearest_indices = np.argpartition(similarities, -k)[-k:]
    # Exclude the actual query_index from search
    nearest_indices = nearest_indices[nearest_indices != query_index]

    # Plot input image
    with Image.open(df["img_path"][query_index]) as img:
        img = img.convert("RGB")
        plt.imshow(img)
    plt.title("Query Product")

    # Plot nearest neighbors images
    fig = plt.figure(figsize=(20, 4))
    plt.suptitle("Similar Products")
    for idx, neighbor in enumerate(nearest_indices):
        plt.subplot(1, len(nearest_indices), idx+1)
        with Image.open(df["img_path"][neighbor]) as img:
            img = img.convert("RGB")
            plt.imshow(img)
            plt.title(f"Cosine Similarity: {similarities[neighbor]:.3f}")
    plt.tight_layout()

In [5]:
def text_image_search(df: pd.DataFrame, text_query: str, img_emb_col: str, k=5):
    """
    Helper function to take a text query as input and display the k nearest neighbor images
    """

    # Calculate the text embeddings
    text_emb = model.encode(text_query).tolist()

    # Calculate the pairwise cosine similarities between text query and images from all rows
    similarities = cosine_similarity([text_emb], df[img_emb_col].values.tolist())[0]

    # Find nearest neighbors
    nearest_indices = np.argpartition(similarities, -k)[-k:]

    # Print query text
    print(f"Query text: {text_query}")

    # Plot nearest neighbors images
    fig = plt.figure(figsize=(20, 4))
    plt.suptitle("Similar Products")
    for idx, neighbor in enumerate(nearest_indices):
        plt.subplot(1, len(nearest_indices), idx+1)
        with Image.open(df["img_path"][neighbor]) as img:
            img = img.convert("RGB")
            plt.imshow(img)
            plt.title(f"Cosine Sim: {similarities[neighbor]:.3f}")
    plt.tight_layout()