# Azure Computer Vision 4 (Florence)

## Image retrieval (version 4.0 preview) with Azure Computer Vision

**Vector embeddings** are a way of representing content—text or images—as vectors of real numbers in a high-dimensional space. Vector embeddings are often learned from large amounts of textual and visual data using machine learning algorithms, such as neural networks. Each dimension of the vector corresponds to a different feature or attribute of the content, such as its semantic meaning, syntactic role, or context in which it commonly appears.

- **Vectorize Images and Text:** the Image Retrieval APIs, VectorizeImage and VectorizeText, can be used to extract feature vectors out of an image or text respectively. The APIs return a single feature vector representing the entire input.

- **Measure similarity**: Vector search systems typically use distance metrics, such as cosine distance or Euclidean distance, to compare vectors and rank them by similarity. The Vision studio demo uses cosine distance to measure similarity.

- **Retrieve Images:** Use the top N vectors similar to the search query and retrieve images corresponding to those vectors from your photo library to provide as the final result.

![Image](./image-retrieval.png)

Azure Computer Vision updates: https://learn.microsoft.com/en-us/azure/cognitive-services/computer-vision/whats-new#march-2023
<br><br>
> Serge Retkowsky | Microsoft | https://github.com/retkowsky | 3rd of May, 2023

In [None]:
import json
import math
import matplotlib.pyplot as plt
import os
import pandas as pd
import requests
import seaborn as sns
import sys

from dotenv import load_dotenv
from IPython.display import Image as viewimage
from PIL import Image

In [None]:
sys.version

## Azure Computer Vision 4.0 Florence connection

In [None]:
load_dotenv("azure.env")

key = os.getenv("COMPUTER_VISION_SUBSCRIPTION_KEY")
endpoint = os.getenv("COMPUTER_VISION_ENDPOINT")

In [None]:
version = "?api-version=2023-02-01-preview&modelVersion=latest"

vec_img_url = (
    endpoint + "/computervision/retrieval:vectorizeImage" + version
)  # For doing the image vectorization
vec_txt_url = (
    endpoint + "/computervision/retrieval:vectorizeText" + version
)  # For the prompt vectorization

headers = {"Content-type": "application/json", "Ocp-Apim-Subscription-Key": key}

In [None]:
def image_embedding(imageurl):
    """
    Embedding image using Azure CV 4.0
    """
    image = {"url": imageurl}
    r = requests.post(vec_img_url, data=json.dumps(image), headers=headers)
    image_emb = r.json()["vector"]

    return image_emb

In [None]:
def text_embedding(promptxt):
    """
    Embedding text using Azure CV 4.0
    """
    prompt = {"text": promptxt}
    r = requests.post(vec_txt_url, data=json.dumps(prompt), headers=headers)
    text_emb = r.json()["vector"]

    return text_emb

In [None]:
def get_cosine_similarity(vector1, vector2):
    """
    Get cosine similarity value
    """
    dot_product = 0
    length = min(len(vector1), len(vector2))

    for i in range(length):
        dot_product += vector1[i] * vector2[i]

    magnitude1 = math.sqrt(sum(x * x for x in vector1))
    magnitude2 = math.sqrt(sum(x * x for x in vector2))
    similarity = dot_product / (magnitude1 * magnitude2)

    return similarity

In [None]:
def similarity_results(image_emb, prompts):
    """ "
    Get similarity results
    """
    simil_values_list = [
        get_cosine_similarity(image_emb, text_embedding(prompt)) for prompt in prompts
    ]
    sorted_results = sorted(
        zip(prompts, simil_values_list), key=lambda x: x[1], reverse=True
    )

    df = pd.DataFrame(columns=["prompt", "similarity"])
    for idx, (prompt, simil_val) in enumerate(sorted_results):
        df.loc[idx, "prompt"] = prompt
        df.loc[idx, "similarity"] = simil_val

    df["similarity"] = df.similarity.astype(float)

    return df

## Embedding image

In [None]:
imageurl1 = "https://github.com/retkowsky/images/blob/master/i4.jpg?raw=true"
image_emb1 = image_embedding(imageurl1)

plt.imshow(Image.open(requests.get(imageurl1, stream=True).raw))
plt.axis("off")
plt.show()

## Embedding prompts

In [None]:
text1 = text_embedding("a dog")

In [None]:
get_cosine_similarity(image_emb1, text1)

In [None]:
text2 = text_embedding("a car")

In [None]:
get_cosine_similarity(image_emb1, text2)

## Multiple prompts

In [None]:
plt.imshow(Image.open(requests.get(imageurl1, stream=True).raw))
plt.axis("off")
plt.show()

In [None]:
prompts = [
    "bird",
    "a truck",
    "a car",
    "a blue car",
    "a white car",
    "a BMW white car",
    "a tesla car",
    "a mercedes car",
    "a man",
    "a ford car",
]

In [None]:
df = similarity_results(image_emb1, prompts)

cm = sns.light_palette("green", as_cmap=True)
df.style.background_gradient(cmap=cm)

In [None]:
imageurl2 = "https://github.com/retkowsky/images/blob/master/xboxps5.jpg?raw=true"
image_emb2 = image_embedding(imageurl2)

plt.imshow(Image.open(requests.get(imageurl2, stream=True).raw))
plt.axis("off")
plt.show()

In [None]:
prompts = [
    "PS5",
    "Xbox",
    "play station",
    "Sony",
    "controller",
    "Microsoft",
    "games console",
    "guitar",
    "fish",
    "apple",
    "car",
    "street",
    "truck",
    "Miami",
    "black controller",
    "white controller",
]

In [None]:
df = similarity_results(image_emb2, prompts)

cm = sns.light_palette("green", as_cmap=True)
df.style.background_gradient(cmap=cm)

In [None]:
imageurl3 = "https://github.com/retkowsky/images/blob/master/sodas.jpg?raw=true"
image_emb3 = image_embedding(imageurl3)

plt.imshow(Image.open(requests.get(imageurl3, stream=True).raw))
plt.axis("off")
plt.show()

In [None]:
prompts = [
    "a can",
    "coca cola",
    "pepsi",
    "7 up",
    "water",
    "wine",
    "beer",
    "gin",
    "alcohol",
    "lemon",
    "drink",
    "I do not know",
    "food",
    "soda bottles",
    "coke bottle",
]

In [None]:
df = similarity_results(image_emb3, prompts)

cm = sns.light_palette("green", as_cmap=True)
df.style.background_gradient(cmap=cm)

## Search example using a local image file

In [None]:
imageurl5 = "https://github.com/retkowsky/images/blob/master/i4.jpg?raw=true"
image_emb5 = image_embedding(imageurl5)

plt.imshow(Image.open(requests.get(imageurl5, stream=True).raw))
plt.axis("off")
plt.show()

In [None]:
imageurl6 = "https://github.com/retkowsky/images/blob/master/i4_2.jpg?raw=true"
whitebmw = image_embedding(imageurl6)

plt.imshow(Image.open(requests.get(imageurl6, stream=True).raw))
plt.axis("off")
plt.show()

In [None]:
imageurl7 = "https://github.com/retkowsky/images/blob/master/cat.jpg?raw=true"
cat = image_embedding(imageurl7)

plt.imshow(Image.open(requests.get(imageurl7, stream=True).raw))
plt.axis("off")
plt.show()

In [None]:
get_cosine_similarity(image_emb5, image_emb5)

In [None]:
get_cosine_similarity(image_emb5, whitebmw)

In [None]:
get_cosine_similarity(image_emb5, cat)

## Using a local image file (not from an url)

In [None]:
local_image = "images/car.png"

In [None]:
viewimage(filename=local_image)

In [None]:
with open(local_image, "rb") as f:
    data = f.read()

In [None]:
# header to use for local image file
headers_local = {
    "Content-type": "application/octet-stream",
    "Ocp-Apim-Subscription-Key": key,
}

In [None]:
r = requests.post(vec_img_url, data=data, headers=headers_local)
image_emb = r.json()["vector"]

In [None]:
prompts = [
    "bird",
    "a truck",
    "a car",
    "a blue car",
    "a white car",
    "a BMW white car",
    "a tesla car",
    "a mercedes car",
    "a man",
    "a ford car",
    "an i3 bmw",
    "an i4 bmw",
    "a 218 bmw",
]

In [None]:
df = similarity_results(image_emb, prompts)

cm = sns.light_palette("green", as_cmap=True)
df.style.background_gradient(cmap=cm)