In [1]:
from transformers import CLIPProcessor, CLIPModel

# load pre-trained model
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
# load preprocessor for model input
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")

In [2]:
import requests
from PIL import Image
from pathlib import Path
import torch

def download_and_open_image(url, save_path):
    sample_path = Path(save_path)
    sample_path.parent.mkdir(parents=True, exist_ok=True)
    r = requests.get(url)

    with sample_path.open("wb") as f:
        f.write(r.content)

    image = Image.open(sample_path)
    return image

In [3]:
image_url = "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco_tulips.jpg"
save_path = "data/coco_tulips.jpg"
image = download_and_open_image(image_url, save_path)

In [4]:
import torch

class ImageFeatureExtractor(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def forward(self, pixel_values):
        return self.model.get_image_features(pixel_values)
    
class TextFeatureExtractor(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def forward(self, input_ids):
        return self.model.get_text_features(input_ids)


In [5]:
image_inputs = processor(
        images = image,
        return_tensors="pt"
    )

text_inputs = processor(
    text = "dog, black, in hollywood",
    return_tensors="pt"
)

import openvino as ov
from pathlib import Path
core = ov.Core()

image_fp16_model_path = Path("image_clip-vit-base-patch16.xml")
model.config.torchscript = True

if not image_fp16_model_path.exists():
    ov_model = ov.convert_model(ImageFeatureExtractor(model), example_input=dict(image_inputs))
    ov.save_model(ov_model, image_fp16_model_path)
compiled_image_model = core.compile_model(image_fp16_model_path, 'AUTO')

text_fp16_model_path = Path("text_clip-vit-base-patch16.xml")
model.config.torchscript = True

if not text_fp16_model_path.exists():
    ov_model = ov.convert_model(TextFeatureExtractor(model), example_input=dict(text_inputs))
    ov.save_model(ov_model, text_fp16_model_path)
compiled_text_model = core.compile_model(text_fp16_model_path, 'AUTO')



# Prepare the image input
image_input = processor(images=image, return_tensors="pt")["pixel_values"]

# Run inference
ov_output = compiled_image_model(image_input)
image_features = ov_output[compiled_image_model.output(0)]
print(image_features)


def get_single_image_embedding(image):
    # Get single image embeddings
    inputs = processor(
        images=image,
        return_tensors="pt"
    )
    image_input = inputs["pixel_values"]
    ov_output = compiled_image_model(image_input)
    image_features = ov_output[compiled_image_model.output(0)]
    return image_features

def get_single_text_embedding(text):
    inputs = processor(text=text, return_tensors="pt")
    text_input = inputs["input_ids"]
    ov_output = compiled_text_model(text_input)
    text_features = ov_output[compiled_text_model.output(0)]
    return text_features

[[-2.19914958e-01 -1.43779957e+00  1.68696702e-01  2.35982444e-02
  -2.01770574e-01  4.06838283e-02 -1.22514576e-01  7.25643277e-01
  -3.46505731e-01  2.71688737e-02 -4.83018875e-01 -2.75728166e-01
   3.31674218e-01 -1.37439787e-01 -2.81778395e-01  7.80049013e-03
  -1.68826565e-01  5.02239466e-01  2.29350448e-01 -2.24074557e-01
  -3.43964919e-02 -4.62107480e-01  3.59208375e-01  2.20400229e-01
  -3.97568852e-01 -2.95854509e-01 -6.45035267e-01  1.78328961e-01
   5.03996491e-01 -7.80545101e-02 -5.68075553e-02  2.93036699e-01
  -2.38245726e-01 -6.27012908e-01 -2.56510675e-01  2.13830665e-01
   1.87109753e-01  1.52856886e-01  3.46124470e-01 -8.24485123e-01
   6.21066839e-02 -4.10600126e-01 -2.53095329e-01 -1.94112286e-01
  -3.52725983e-01  2.43654668e-01  3.13615471e-01 -8.13287348e-02
   1.29491597e-01 -1.17533110e-01  2.05151394e-01  1.52452737e-01
   2.40767673e-01  2.24273264e-01  4.39472407e-01 -2.97078528e-02
   5.37916601e-01  5.04969656e-01 -1.48654059e-01  4.52014953e-01
   9.18075

In [6]:
from pathlib import Path
import lancedb
db = lancedb.connect("./.lancedb")
embedding = get_single_image_embedding(image)
text_embedding = get_single_text_embedding("japanese writing")
tbl = db.create_table(name= "pt_table", data=[
    {"vector": embedding.tolist()[0], "name": "coco_tulips.jpg", "path": "data/coco_tulips.jpg"}
], mode= "overwrite")

import os
from PIL import Image

# Get all images in the data folder
data_folder = Path.home() / "Desktop" / "data"
image_files = [f for f in os.listdir(data_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))]

# Generate embeddings and add to the database
for image_file in image_files:
    image_path = os.path.join(data_folder, image_file)
    image = Image.open(image_path)
    
    # Generate embedding
    embedding = get_single_image_embedding(image)
    
    # Add to the database
    tbl.add([{
        "vector": embedding.tolist()[0],
        "name": image_file,
        "path": os.path.join(data_folder, image_file)
    }])



a= tbl.search(query=text_embedding.tolist()[0]).limit(4).to_list()


In [7]:
a[1]



{'vector': [0.2802734375,
  -0.4453125,
  0.185302734375,
  0.352294921875,
  0.225341796875,
  -0.04150390625,
  -0.2425537109375,
  -0.2802734375,
  -0.1136474609375,
  -0.1468505859375,
  0.296142578125,
  -0.3017578125,
  -0.453125,
  -0.02935791015625,
  0.43212890625,
  -0.07562255859375,
  -0.019866943359375,
  0.281982421875,
  -0.00115966796875,
  0.1630859375,
  -0.30712890625,
  -0.1307373046875,
  0.260986328125,
  0.389892578125,
  -0.04461669921875,
  0.05657958984375,
  0.35205078125,
  0.000640869140625,
  0.0660400390625,
  0.1651611328125,
  0.01934814453125,
  -0.269287109375,
  0.318359375,
  -0.0182037353515625,
  -0.7216796875,
  0.30322265625,
  0.15625,
  -0.6201171875,
  0.400634765625,
  -0.0684814453125,
  0.07672119140625,
  -0.045989990234375,
  -0.298828125,
  -0.0888671875,
  -0.0089111328125,
  -0.1759033203125,
  -0.0217132568359375,
  0.236328125,
  0.4951171875,
  -0.388671875,
  0.188720703125,
  -0.04620361328125,
  0.48876953125,
  0.07464599609375

In [8]:
import os
import time
from pathlib import Path
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
import pickle
import pypdf

desktop_path = Path.home() / "Desktop"

def index_desktop(no_update=False):
    last_modified_times_file = Path("last_modified_times.pkl")
    if last_modified_times_file.exists():
        with open(last_modified_times_file, "rb") as f:
            last_modified_times = pickle.load(f)
    else:
        last_modified_times = {}
    for file in desktop_path.glob('**/*'):
        if file.is_file():
            current_mtime = file.stat().st_mtime
            if file not in last_modified_times or current_mtime != last_modified_times[file]:
                update_embedding(file)
                last_modified_times[file] = current_mtime
    if not no_update:
        with open(last_modified_times_file, "wb") as f:
            pickle.dump(last_modified_times, f)

def update_embedding(file_path):
    try:
        if file_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.gif']:
            image = Image.open(file_path)
            embedding = get_single_image_embedding(image)
            
            tbl.add([{
                "vector": embedding.tolist()[0],
                "name": file_path.name,
                "path": str(file_path)
            }])
            print(f"Updated image embedding for {file_path}")
        elif file_path.suffix.lower() in ['.txt', '.md']:
            text = file_path.read_text()
            text_embedding = get_single_text_embedding(text)[:50]
            tbl.add([{
                "vector": text_embedding.tolist()[0],
                "name": file_path.name,
                "path": str(file_path)
            }])
            print(f"Updated text embedding for {file_path}")
        elif file_path.suffix.lower() in ['.pdf']:
            pdf_reader = pypdf.PdfReader(file_path)
            text = "\n".join([page.extract_text() for page in pdf_reader.pages])[:50]
            print(type(text))
            text_embedding = get_single_text_embedding(text)
            tbl.add([{
                "vector": text_embedding.tolist()[0],
                "name": file_path.name,
                "path": str(file_path)
            }])
            print(f"Updated pdf embedding for {file_path}")
        else:
            print(f"Skipped file: {file_path}")
    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")

class DesktopHandler(FileSystemEventHandler):
    def on_created(self, event):
        if not event.is_directory:
            update_embedding(Path(event.src_path))

    def on_modified(self, event):
        if not event.is_directory:
            update_embedding(Path(event.src_path))

In [9]:
# Initial indexing
index_desktop(no_update=True)

Skipped file: C:\Users\intelaipc\Desktop\Backyard AI.lnk
Skipped file: C:\Users\intelaipc\Desktop\Cursor.lnk
Skipped file: C:\Users\intelaipc\Desktop\desktop.ini
<class 'str'>
Updated pdf embedding for C:\Users\intelaipc\Desktop\Extract text from PDF File using Python - GeeksforGeeks.pdf
Skipped file: C:\Users\intelaipc\Desktop\GIMP 2.10.38.lnk
Updated text embedding for C:\Users\intelaipc\Desktop\helloworld.txt
Skipped file: C:\Users\intelaipc\Desktop\Jan.lnk
Skipped file: C:\Users\intelaipc\Desktop\LM Studio.lnk
Updated image embedding for C:\Users\intelaipc\Desktop\data\coco2.jpg
Updated image embedding for C:\Users\intelaipc\Desktop\data\coco_tulips.jpg
Updated image embedding for C:\Users\intelaipc\Desktop\data\dog1.jpg
Updated image embedding for C:\Users\intelaipc\Desktop\data\dog2.jpg
Updated image embedding for C:\Users\intelaipc\Desktop\data\empty_road_mapillary.jpg
Updated image embedding for C:\Users\intelaipc\Desktop\data\handwritten_chinese_test.jpg
Updated image embeddin

Token indices sequence length is longer than the specified maximum sequence length for this model (103 > 77). Running this sequence through the model will result in indexing errors


Updated text embedding for C:\Users\intelaipc\Desktop\exampledata\note03_calculus_integration.md
Updated text embedding for C:\Users\intelaipc\Desktop\exampledata\note04_literature_shakespeare.md
Updated text embedding for C:\Users\intelaipc\Desktop\exampledata\note05_chemistry_periodic_table.txt
Updated text embedding for C:\Users\intelaipc\Desktop\exampledata\note06_psychology_memory.md
Updated text embedding for C:\Users\intelaipc\Desktop\exampledata\note07_physics_thermodynamics.txt
Error processing C:\Users\intelaipc\Desktop\exampledata\note08_computer_science_data_structures.md: Exception from src/inference/src/cpp/infer_request.cpp:223:
Exception from src/plugins/intel_cpu/src/node.cpp:593:
[CPU] Add node with name '__module.model.text_model.embeddings/aten::add/Add' Exception from src/plugins/intel_cpu/src/shape_inference/custom/eltwise.cpp:45:
Eltwise shape infer input shapes dim index: 1 mismatch



Updated text embedding for C:\Users\intelaipc\Desktop\exampledata\note09_econ

In [10]:
# Set up watchdog observer
event_handler = DesktopHandler()
observer = Observer()
observer.schedule(event_handler, str(desktop_path), recursive=True)
observer.start()

try:
    while True:
        time.sleep(1)
except KeyboardInterrupt:
    observer.stop()
observer.join()