In [None]:
# Import required libraries  
import os  
import json  
import requests
import http.client, urllib.parse
from tenacity import retry, stop_after_attempt, wait_fixed
from dotenv import load_dotenv  
from azure.core.credentials import AzureKeyCredential
from azure.identity import DefaultAzureCredential

from IPython.display import Image, display

from sklearn.metrics.pairwise import cosine_similarity
  
load_dotenv()  
service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")  
index_name = os.getenv("AZURE_SEARCH_INDEX_NAME")
api_version = os.getenv("AZURE_SEARCH_API_VERSION")
key = os.getenv("AZURE_SEARCH_ADMIN_KEY")  
aiVisionApiKey = os.getenv("AZURE_AI_VISION_API_KEY")  
aiVisionRegion = os.getenv("AZURE_AI_VISION_REGION")
aiVisionEndpoint = os.getenv("AZURE_AI_VISION_ENDPOINT")
credential = DefaultAzureCredential()

In [None]:
FILE_PATH='images'

In [None]:
@retry(stop=stop_after_attempt(5), wait=wait_fixed(1))
def get_image_vector(image_path, key, region):
    headers = {
        'Ocp-Apim-Subscription-Key': key,
    }

    params = urllib.parse.urlencode({
        'model-version': '2023-04-15',
    })

    try:
        if image_path.startswith(('http://', 'https://')):
            headers['Content-Type'] = 'application/json'              
            body = json.dumps({"url": image_path})
        else:
            headers['Content-Type'] = 'application/octet-stream'
            with open(image_path, "rb") as filehandler:
                image_data = filehandler.read()
                body = image_data

        conn = http.client.HTTPSConnection(f'{region}.api.cognitive.microsoft.com', timeout=3)
        conn.request("POST", "/computervision/retrieval:vectorizeImage?api-version=2023-04-01-preview&%s" % params, body, headers)
        response = conn.getresponse()
        data = json.load(response)
        conn.close()

        if response.status != 200:
            raise Exception(f"Error processing image {image_path}: {data.get('message', '')}")

        return data.get("vector")

    except (requests.exceptions.Timeout, http.client.HTTPException) as e:
        print(f"Timeout/Error for {image_path}. Retrying...")
        raise


In [None]:
import os

In [None]:
FILES = os.listdir(FILE_PATH)

In [None]:
FILES

In [None]:
image_embeddings = {}

In [None]:
DIR_PATH = os.path.join(os.getcwd(), FILE_PATH)

In [None]:
DIR_PATH

In [None]:
os.path.join(DIR_PATH, "file1.jpg")

In [None]:
for file in FILES:
    image_embeddings[file] = get_image_vector(os.path.join(DIR_PATH, file), 
                                    aiVisionApiKey, aiVisionRegion)

In [None]:
keys = list(image_embeddings.keys())

In [None]:
keys[0]

In [None]:
arr1 = [image_embeddings[keys[0]]]

In [None]:
print("Cosine Similarity values")

In [None]:
sim_dict ={}

In [None]:
for file in FILES:
    sim = cosine_similarity(arr1, [image_embeddings[file]])
    sim_dict[file] = sim[0][0]

In [None]:
sim_sort = dict(sorted(sim_dict.items(), key=lambda item: item[1],reverse=True))

In [None]:
list(sim_sort.keys())[1]

In [None]:
display(Image(filename=os.path.join(DIR_PATH, keys[0])))

In [None]:
list(sim_sort.keys())[1]

In [None]:
sim_sort

In [None]:
display(Image(filename=os.path.join(DIR_PATH, list(sim_sort.keys())[1])))

In [None]:
def generate_embeddings(text, aiVisionEndpoint, aiVisionApiKey):  
    url = f"{aiVisionEndpoint}/computervision/retrieval:vectorizeText"  
  
    params = {  
        "api-version": "2023-02-01-preview"  
    }  
  
    headers = {  
        "Content-Type": "application/json",  
        "Ocp-Apim-Subscription-Key": aiVisionApiKey  
    }  
  
    data = {  
        "text": text  
    }  
  
    response = requests.post(url, params=params, headers=headers, json=data)  
  
    if response.status_code == 200:  
        embeddings = response.json()["vector"]  
        return embeddings  
    else:  
        print(f"Error: {response.status_code} - {response.text}")  
        return None  

In [None]:
text = "winter clothes"

In [None]:
vector_text = generate_embeddings(text, aiVisionEndpoint, aiVisionApiKey)

In [None]:
vector_text[:4]

In [None]:
len(vector_text)

In [None]:
arr1 = [vector_text]

In [None]:
for file in FILES:
    sim = cosine_similarity(arr1, [image_embeddings[file]])
    sim_dict[file] = sim[0][0]

In [None]:
len(image_embeddings[file])

In [None]:
sim_dict

In [None]:
sim_sort = dict(sorted(sim_dict.items(), key=lambda item: item[1],reverse=True))

In [None]:
sim_sort

In [None]:
display(Image(filename=os.path.join(DIR_PATH, list(sim_sort.keys())[0])))

In [None]:
display(Image(filename=os.path.join(DIR_PATH, list(sim_sort.keys())[1])))

In [None]:
display(Image(filename=os.path.join(DIR_PATH, list(sim_sort.keys())[2])))

In [None]:
display(Image(filename=os.path.join(DIR_PATH, list(sim_sort.keys())[3])))