## Load the open CLIP model and create function to generate embedding for a single text

In [None]:
import clip
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

def get_single_embedding(text):
    with torch.no_grad():
        # Encode the text to compute the feature vector and normalize it
        text_input = clip.tokenize([text]).to(device)
        text_features = model.encode_text(text_input)
        text_features /= text_features.norm(dim=-1, keepdim=True)

    # Return the feature vector
    return text_features.cpu().numpy()[0]

## Create a function to perform KNN search in OpenSearch

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()
SERVICE_URI = os.getenv("SERVICE_URI")
index_name = "photos"  # Update with your index name

from opensearchpy import OpenSearch
opensearch = OpenSearch(SERVICE_URI, use_ssl=True)

def knn_search(text):
    vector = get_single_embedding(text)

    body = {
        "query": {
            "knn": {
                "embedding": {
                    "vector": vector.tolist(),  # Convert to list
                    "k": 2  # Number of nearest neighbors to retrieve
                }
            }
        }
    }

    # Perform search
    result = opensearch.search(index=index_name, body=body)
    return result



## Create function to display image as a result

In [None]:
from IPython.display import display, Image
import pandas as pd

# Read the TSV file into a DataFrame
file_path = 'photos.tsv000'
df = pd.read_csv(file_path, sep='\t')


def display_image_by_id(result):
    # Check if hits are present in the result
    if 'hits' in result and 'hits' in result['hits']:
        hits = result['hits']['hits']
        # Extract image_url from the first hit
        if hits:
            image_url = hits[0]['_source']['image_url']
            image_url = f"{image_url}?w=640"
                
            # Display the image
            display(Image(url=image_url))
        else:
            print("No hits found in the result.")
    else:
        print("Invalid result format or no hits found.")





## Run example

In [None]:
text_input = "dog at home"  # Provide your text input here
result = knn_search(text_input)
display_image_by_id(result)