In [1]:
# Import required libraries  
import os  
import json  
import requests
import http.client, urllib.parse
from tenacity import retry, stop_after_attempt, wait_fixed
from dotenv import load_dotenv  
from azure.core.credentials import AzureKeyCredential
from azure.identity import DefaultAzureCredential
from azure.search.documents import SearchClient  
from azure.search.documents.indexes import SearchIndexClient  
from azure.search.documents.models import (
    RawVectorQuery,
)
from azure.search.documents.indexes.models import (  
 
    ExhaustiveKnnParameters,  
    ExhaustiveKnnVectorSearchAlgorithmConfiguration,
    HnswParameters,  
    HnswVectorSearchAlgorithmConfiguration,
    SimpleField,
    SearchField,  
    SearchFieldDataType,  
    SearchIndex,  
    VectorSearch,  
    VectorSearchAlgorithmKind,  
    VectorSearchProfile,  
)

from IPython.display import Image, display

from sklearn.metrics.pairwise import cosine_similarity
  
load_dotenv()  
service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")  
index_name = os.getenv("AZURE_SEARCH_INDEX_NAME")
api_version = os.getenv("AZURE_SEARCH_API_VERSION")
key = os.getenv("AZURE_SEARCH_ADMIN_KEY")  
aiVisionApiKey = os.getenv("AZURE_AI_VISION_API_KEY")  
aiVisionRegion = os.getenv("AZURE_AI_VISION_REGION")
aiVisionEndpoint = os.getenv("AZURE_AI_VISION_ENDPOINT")
credential = DefaultAzureCredential()

In [2]:
FILE_PATH='images'

In [3]:
@retry(stop=stop_after_attempt(5), wait=wait_fixed(1))
def get_image_vector(image_path, key, region):
    headers = {
        'Ocp-Apim-Subscription-Key': key,
    }

    params = urllib.parse.urlencode({
        'model-version': '2023-04-15',
    })

    try:
        if image_path.startswith(('http://', 'https://')):
            headers['Content-Type'] = 'application/json'              
            body = json.dumps({"url": image_path})
        else:
            headers['Content-Type'] = 'application/octet-stream'
            with open(image_path, "rb") as filehandler:
                image_data = filehandler.read()
                body = image_data

        conn = http.client.HTTPSConnection(f'{region}.api.cognitive.microsoft.com', timeout=3)
        conn.request("POST", "/computervision/retrieval:vectorizeImage?api-version=2023-04-01-preview&%s" % params, body, headers)
        response = conn.getresponse()
        data = json.load(response)
        conn.close()

        if response.status != 200:
            raise Exception(f"Error processing image {image_path}: {data.get('message', '')}")

        return data.get("vector")

    except (requests.exceptions.Timeout, http.client.HTTPException) as e:
        print(f"Timeout/Error for {image_path}. Retrying...")
        raise


In [4]:
import os

In [5]:
FILES = os.listdir(FILE_PATH)

In [6]:
FILES

['Braeburn.jpeg',
 'Fiji.jpeg',
 'GoldenDelicious.jpeg',
 'GrannySmith.jpeg',
 'Honeycrisp.jpeg',
 'Jonagold.jpeg',
 'PIC01.jpeg',
 'PIC02.jpeg',
 'PIC03.jpeg',
 'PIC04.jpeg',
 'PIC05.jpeg',
 'PIC06.jpeg',
 'PinkLady.jpeg',
 'RedDelicious.jpeg',
 'Rome.jpeg',
 'two_apples.jpeg']

In [7]:
image_embeddings = {}

In [8]:
DIR_PATH = os.path.join(os.getcwd(), FILE_PATH)

In [9]:
DIR_PATH

'c:\\Ambarish\\AzureAI-ImageSearch\\images'

In [10]:
os.path.join(DIR_PATH, "file1.jpg")

'c:\\Ambarish\\AzureAI-ImageSearch\\images\\file1.jpg'

In [11]:
for file in FILES:
    image_embeddings[file] = get_image_vector(os.path.join(DIR_PATH, file), 
                                    aiVisionApiKey, aiVisionRegion)

In [None]:
counter = 0

In [None]:
input_data =[]

In [None]:
for file in FILES:
    d= {}
    d["id"] = str(counter)
    d["description"] = file
    d["image_vector"] = image_embeddings[file]
    input_data.append(d)
    counter += 1

In [None]:
# Output embeddings to docVectors.json file
with open("output/docVectors.json", "w") as f:
    json.dump(input_data, f)