**Embeddings**

In [1]:
import re
import requests
import sys
import os
from openai import AzureOpenAI
import tiktoken
from dotenv import load_dotenv
import numpy as np
load_dotenv("azure.env")

client = AzureOpenAI(
  azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"), 
  api_key=os.getenv("AZURE_OPENAI_KEY"),  
  api_version=os.getenv("AZURE_OPENAI_VERSION")
)


In [2]:
model=os.getenv("EMBEDDING_MODEL_NAME")
def cosine_similarity(query_embedding, embeddings, distance_metric='cosine'):
    if distance_metric == 'cosine':
        distances = np.dot(embeddings, query_embedding) / (np.linalg.norm(embeddings) * np.linalg.norm(query_embedding))
    else:
        raise ValueError("Métrica de distância não suportada. Utilize 'cosine'.")

    return distances

In [3]:
automobile_embedding    = client.embeddings.create(input='automobile', model=model).data[0].embedding
vehicle_embedding       = client.embeddings.create(input='vehicle', model=model).data[0].embedding
dinosaur_embedding      = client.embeddings.create(input='dinosaur', model=model).data[0].embedding
stick_embedding         = client.embeddings.create(input='stick', model=model).data[0].embedding

print(automobile_embedding)

[-0.013795847073197365, 0.0008768822299316525, 0.017049584537744522, -0.01771334744989872, -0.014134235680103302, -0.008212433196604252, -0.02813832089304924, -0.006224399898201227, -0.01603441871702671, -0.022971386089920998, 0.020602665841579437, 0.028528770431876183, 0.004197321366518736, 0.0061853546649217606, -0.021956220269203186, 0.010600676760077477, 0.029908355325460434, -0.0043535009026527405, 0.025066792964935303, -0.018871676176786423, -0.023088520392775536, 0.0029609010089188814, 0.006767774000763893, -0.024246850982308388, -0.007620253134518862, 0.004675620701164007, 0.006429384928196669, -0.022659027948975563, 0.005414219107478857, -0.01906690187752247, 0.022802192717790604, -0.03459373489022255, -0.014784982427954674, -0.014719908125698566, -0.0032830210402607918, -0.021305473521351814, 1.9493827494443394e-06, -0.0024093925021588802, -0.00014102918794378638, 0.0033448419999331236, 0.0037938577588647604, 0.013574592769145966, -0.012364202179014683, -0.01297590509057045, 

In [4]:
# compare several words. Closet to 1 is the most similar
print(cosine_similarity(automobile_embedding, vehicle_embedding))
print(cosine_similarity(automobile_embedding, dinosaur_embedding))
print(cosine_similarity(automobile_embedding, stick_embedding))

0.9161762609368851
0.8334429682277883
0.7820358471285385
