In [2]:
import glob
import os
from tqdm import tqdm
from PIL import Image
import pandas as pd
import numpy as np

from imagebind import data
import torch
from imagebind.models import imagebind_model
from imagebind.models.imagebind_model import ModalityType

In [5]:
img_path = "F:/GitHub/genAI_art/DL/benmoranartist/*.png"
imgs = glob.glob(img_path, recursive=True)

In [None]:
# Instantiate model
device = "cuda:0" if torch.cuda.is_available() else "cpu"

model = imagebind_model.imagebind_huge(pretrained=True)
model.eval()
model.to(device)

In [8]:
# load data
inputs = {
    ModalityType.VISION: data.load_and_transform_vision_data(img, device)
}

In [None]:
user_dict = {}
user_dict['benmoranartist'] = {}

for img in imgs:
    image_id = img.split("/")[-1].split(".")[0]

    inputs = {
        ModalityType.VISION: data.load_and_transform_vision_data(img, device)
    }
    
    with torch.no_grad():
        embed = model(inputs)

    user_dict['benmoranartist'][image_id] = {"embed" : embed}

In [10]:
# get embeddings
with torch.no_grad():
    embeddings = model(inputs)

In [13]:
# vision embeddings on size N imgs x embedding_dims
embeddings['vision'].shape

torch.Size([16, 1024])

In [16]:
np_embeddings = [emb.cpu().numpy() for emb in embeddings['vision']]

In [20]:
np_embeddings[0]

array([-0.00121709, -0.02244211, -0.00029027, ...,  0.02825802,
       -0.03355971,  0.0291189 ], dtype=float32)

In [9]:
os.path.basename(imgs[0])

'1.jpg'

In [10]:
dest_path = "embeddings"

for i in range(0,len(imgs)):
    file_path = os.path.join(dest_path, f"{os.path.basename(imgs[i])}.npy")
    np.save(file_path, np_embeddings[i])