# Multi-modal vector embeddings

A vector embedding can also represent non-textual data, such as images.

In [1]:
import os
import mimetypes

import requests
from PIL import Image
import dotenv
from azure.identity import DefaultAzureCredential, get_bearer_token_provider

dotenv.load_dotenv()

azure_credential = DefaultAzureCredential(exclude_shared_token_cache_credential=True)
token_provider = get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default")
AZURE_AI_VISION_ENDPOINT = os.environ["AZURE_AI_VISION_ENDPOINT"]
AZURE_AI_VISION_URL = f"{AZURE_AI_VISION_ENDPOINT}/computervision/retrieval"

def get_model_params():
    return {"api-version": "2024-02-01", "model-version": "2023-04-15"}

def get_auth_headers():
    return {"Authorization": "Bearer " + token_provider()}

def get_image_embedding(image_file):
    mimetype = mimetypes.guess_type(image_file)[0]
    url = f"{AZURE_AI_VISION_URL}:vectorizeImage"
    headers = get_auth_headers()
    headers["Content-Type"] = mimetype
    response = requests.post(url, headers=headers, params=get_model_params(), data=open(image_file, "rb"))
    if response.status_code != 200:
        print(image_file, response.status_code, response.json())
    return response.json()["vector"]

def get_text_embedding(text):
    url = f"{AZURE_AI_VISION_URL}:vectorizeText"
    return requests.post(url, headers=get_auth_headers(), params=get_model_params(),
                         json={"text": text}).json()["vector"]


In [2]:
import json

vectors = {}
for image_file in os.listdir("../product_images"):
    image_embedding = get_image_embedding(f"../product_images/{image_file}")
    vectors[image_file] = image_embedding

# guardamos los embeddings en un archivo
with open("embeddings/images_ai-vision.json", "w") as f:
    json.dump(vectors, f)
    

In [None]:
Image.open("../product_images/amulet8_fullshot.jpg")

In [None]:
vectors["amulet8_fullshot.jpg"]

In [None]:
len(vectors["amulet8_fullshot.jpg"])

In [None]:
import csv

# abrimos el csv con los sustantivos más usados
words = []
with open('embeddings/sustantivos-mas-usados.csv') as f:
    reader = csv.reader(f)
    next(reader)
    for row in reader:
        words.append(row[0])

# Calculamos los embeddings de cada palabra
word_vectors = {}
for word in words:
    word_embedding = get_text_embedding(word)
    word_vectors[word] = word_embedding

# Guardamos los embeddings en un archivo
with open('embeddings/sustantivos2_ai-vision.json', 'w') as f:
    json.dump(word_vectors, f)

In [None]:
word_vectors["tiempo"]

In [None]:
len(word_vectors["tiempo"])