In [None]:
import os
import faiss
import numpy as np
import pandas as pd

from openai import AzureOpenAI

credentials = {
    "api_key": os.environ["AZURE_OPENAI_API_KEY"],
    "azure_endpoint": os.environ["AZURE_OPENAI_ENDPOINT"],
    "api_version": "2025-01-01-preview",
}

texts = [
    "The sun was shining brightly in the clear blue sky",
    "The cat sat on the windowsill, watching the birds outside",
    "The city was bustling with people, cars, and noise",
    "The teacher wrote complex equations on the blackboard",
    "The company will announce its quarterly earnings next week",
]


def get_embeddings(
    texts: list[str], client: AzureOpenAI, model: str
) -> list[list[float]]:
    response = client.embeddings.create(input=texts, model=model)
    return [result.embedding for result in response.data]


In [None]:
# Model & Client
model = "text-embedding-ada-002"
client = AzureOpenAI(**credentials)

# Convert texts to embeddings
doc_embeddings = get_embeddings(texts=texts, client=client, model=model)

# Build index
df = pd.DataFrame(doc_embeddings)
n_dim = df.shape[1]
index = faiss.IndexFlatL2(n_dim)
index.add(df)


In [None]:
# Convert query to embeddings
search_results = 5
query = "Some question about my pet"
query_embeddings = get_embeddings(texts=[query], client=client, model=model)

# Query results
distances, indices = index.search(np.array(query_embeddings), k=search_results)

# Display results
print(f"Query: '{query}'\nDocument ranking:")
max_len = max([len(text) for text in texts])
for i, (row, distance) in enumerate(zip(indices[0], distances[0], strict=True)):
    print(f"  {i}. {texts[row]:<{max_len + 3}} (distance: {distance:.4f})")
