## Load the open CLIP model and create function to generate embedding for a single text

In [1]:
import clip
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

def get_single_embedding(text):
    with torch.no_grad():
        # Encode the text to compute the feature vector and normalize it
        text_input = clip.tokenize([text]).to(device)
        text_features = model.encode_text(text_input)
        text_features /= text_features.norm(dim=-1, keepdim=True)

    # Return the feature vector
    return text_features.cpu().numpy()[0]

## Create a function to perform KNN search in OpenSearch

In [26]:
import os
from dotenv import load_dotenv
load_dotenv()
SERVICE_URI = os.getenv("SERVICE_URI")
index_name = "photos"  # Update with your index name

from opensearchpy import OpenSearch
opensearch = OpenSearch(SERVICE_URI, use_ssl=True)

def knn_search(text):
    vector = get_single_embedding(text)

    body = {
        "query": {
            "knn": {
                "embedding": {
                    "vector": vector.tolist(),  # Convert to list
                    "k": 4  # Number of nearest neighbors to retrieve
                }
            }
        }
    }

    # Perform search
    result = opensearch.search(index=index_name, body=body)
    return result



## Create function to display image as a result

In [22]:
from IPython.display import display, Image
import pandas as pd

# Read the TSV file into a DataFrame


def display_images(result):
    # Check if hits are present in the result
    if 'hits' in result and 'hits' in result['hits']:
        hits = result['hits']['hits']
        
        # Loop through each hit, up to a maximum of 4
        for i, hit in enumerate(hits[:4]):
            if '_source' in hit and 'image_url' in hit['_source']:
                image_url = hit['_source']['image_url']
                
                # Display the image
                print(f"Displaying image {i+1}:")
                display(Image(url=image_url))
            else:
                print(f"Hit {i+1} does not contain an 'image_url' key.")
                
    else:
        print("Invalid result format or no hits found.")



## Run example

In [29]:
text_input = "room"  # Provide your text input here
result = knn_search(text_input)
display_images(result)

Displaying image 1:


Displaying image 2:


Displaying image 3:


Displaying image 4:
