In [1]:
from transformers import CLIPProcessor, CLIPModel

# load pre-trained model
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
# load preprocessor for model input
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")

In [2]:
from typing import List
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

In [34]:
import requests
from pathlib import Path
import torch

def download_and_open_image(url, save_path):
    sample_path = Path(save_path)
    sample_path.parent.mkdir(parents=True, exist_ok=True)
    r = requests.get(url)

    with sample_path.open("wb") as f:
        f.write(r.content)

    image = Image.open(sample_path)
    return image

image_url = "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco_tulips.jpg"
save_path = "data/coco_tulips.jpg"
image = download_and_open_image(image_url, save_path)



def get_single_image_embedding(image):
    # Get single image embeddings
    inputs = processor(
        images = image,
        return_tensors="pt"
    )
    with torch.no_grad():
        image_features = model.get_image_features(**inputs)
        return image_features.cpu().detach().numpy()

def get_single_text_embedding(text):
    inputs = processor(text=text, return_tensors="pt")
    with torch.no_grad():
        text_features = model.get_text_features(**inputs)
        return text_features.cpu().detach().numpy()

import lancedb
db = lancedb.connect("./.lancedb")
embedding = get_single_image_embedding(image)
text_embedding = get_single_text_embedding("dog, black, in hollywood")
tbl = db.create_table(name= "pt_table", data=[{"vector": embedding.tolist()[0], "image": "coco_tulips.jpg"}], mode= "overwrite")

import os
from PIL import Image

# Get all images in the data folder
data_folder = "data"
image_files = [f for f in os.listdir(data_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))]

# Generate embeddings and add to the database
for image_file in image_files:
    image_path = os.path.join(data_folder, image_file)
    image = Image.open(image_path)
    
    # Generate embedding
    embedding = get_single_image_embedding(image)
    
    # Add to the database
    tbl.add([{
        "vector": embedding.tolist()[0],
        "image": image_file
    }])



a= tbl.search(query=text_embedding.tolist()[0]).limit(4).to_list()
[a['image'] for a in a]


['dog2.jpg', 'coco2.jpg', 'dog1.jpg', 'empty_road_mapillary.jpg']

In [None]:
import openvino as ov

fp16_model_path = Path("clip-vit-base-patch16.xml")
model.config.torchscript = True

if not fp16_model_path.exists():
    ov_model = ov.convert_model(model.get_image_features, example_input=dict(inputs))
    ov.save_model(ov_model, fp16_model_path)

from scipy.special import softmax

# create OpenVINO core object instance
core = ov.Core()
compiled_model = core.compile_model(fp16_model_path, 'AUTO')
ov_output = compiled_model(dict(inputs))
print(ov_output)


In [29]:
a[1]



{'vector': [-0.07428831607103348,
  -0.8701529502868652,
  -0.5326011180877686,
  0.0950397253036499,
  -0.12399178743362427,
  -0.5727064609527588,
  0.045978203415870667,
  0.652576744556427,
  0.2907596230506897,
  -0.005152791738510132,
  0.13310663402080536,
  -0.16770029067993164,
  -0.053022198379039764,
  -0.19427023828029633,
  -0.4826986789703369,
  0.07951705157756805,
  -0.49113449454307556,
  0.5952075719833374,
  -0.16153821349143982,
  0.2674843668937683,
  0.03241335600614548,
  -0.18639671802520752,
  0.042672231793403625,
  0.23066405951976776,
  -0.5784958004951477,
  -0.5509401559829712,
  -0.7237839102745056,
  -0.03904959559440613,
  0.028246775269508362,
  0.2380460500717163,
  -0.0828205943107605,
  -0.1353408396244049,
  -0.2660818099975586,
  0.0741080641746521,
  -0.9574709534645081,
  0.020300760865211487,
  0.6968226432800293,
  -5.558133125305176e-05,
  0.4905878007411957,
  0.7135616540908813,
  0.3045003414154053,
  -0.4894503355026245,
  0.1411966830492