# Getting Started with Azure AI Search for Vector Database

In [None]:
! pip install azure-identity
! pip install openai
! pip install python-dotenv
! pip install azure-search-documents

In [14]:
import os
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
from azure.core.credentials import AzureKeyCredential
from dotenv import load_dotenv
from openai import AzureOpenAI
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    HnswAlgorithmConfiguration,
    HnswParameters,
    SearchField,
    SearchFieldDataType,
    SearchIndex,
    SimpleField,
    VectorSearch,
    VectorSearchAlgorithmKind,
    VectorSearchProfile,
)
from azure.search.documents.models import VectorizedQuery

# Set up OpenAI client based on environment variables
load_dotenv()
AZURE_OPENAI_ENDPOINT: str = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_API_KEY: str = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_API_VERSION: str = "2023-05-15"
AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME: str = os.getenv(
    "AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME"
)


# Set up Azure AI Search client based on environment variables
AZURE_SEARCH_SERVICE_ENDPOINT: str = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
AZURE_SEARCH_ADMIN_KEY: str = os.getenv("AZURE_SEARCH_ADMIN_KEY")

credential = DefaultAzureCredential()
token_provider = get_bearer_token_provider(
    credential, "https://cognitiveservices.azure.com/.default"
)

# Set this flag to True if you are using Azure Active Directory
use_aad_for_aoai = True

if use_aad_for_aoai:
    # Use Azure Active Directory (AAD) authentication
    client = AzureOpenAI(
        azure_endpoint=AZURE_OPENAI_ENDPOINT,
        api_version=AZURE_OPENAI_API_VERSION,
        azure_ad_token_provider=token_provider,
    )
else:
    # Use API key authentication
    client = AzureOpenAI(
        api_key=AZURE_OPENAI_API_KEY,
        api_version=AZURE_OPENAI_API_VERSION,
        azure_endpoint=AZURE_OPENAI_ENDPOINT,
    )

# Set this flag to True if you are using Azure Active Directory
use_aad_for_search = True

if use_aad_for_search:
    # Use Azure Active Directory (AAD) authentication
    credential = DefaultAzureCredential()
else:
    # Use API key authentication
    credential = AzureKeyCredential(AZURE_SEARCH_ADMIN_KEY)


# Example function to generate document embedding
def generate_embedding(text: str):
    # Generate embeddings for the provided text using the specified model
    embeddings_response = client.embeddings.create(
        model=AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME, input=text
    )
    # Extract the embedding data from the response
    return embeddings_response.data[0].embedding

# Azure AI Search: vector search, step by step
Create a tiny vector index

In [9]:
AZURE_SEARCH_TINY_INDEX = "teenytinyy-index"

index = SearchIndex(
    name=AZURE_SEARCH_TINY_INDEX,
    fields=[
        SimpleField(name="id", type=SearchFieldDataType.String, key=True),
        SimpleField(name="category", type=SearchFieldDataType.String, filterable=True),
        SearchField(
            name="vector",
            type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
            searchable=True,
            vector_search_dimensions=3,
            vector_search_profile_name="my-hnsw-profile",
        ),
    ],
    vector_search=VectorSearch(
        algorithms=[
            HnswAlgorithmConfiguration(
                name="my-hnsw-config",
                kind=VectorSearchAlgorithmKind.HNSW,
                parameters=HnswParameters(metric="cosine"),
            )
        ],
        profiles=[
            VectorSearchProfile(
                name="my-hnsw-profile", algorithm_configuration_name="my-hnsw-config"
            )
        ],
    ),
)

index_client = SearchIndexClient(
    endpoint=AZURE_SEARCH_SERVICE_ENDPOINT, credential=credential
)

# Create or update the index
result = index_client.create_or_update_index(index)
print(f"{result.name} created")

teenytinyy-index created


Insert a few documents with tiny vectors

In [13]:
search_client = SearchClient(
    AZURE_SEARCH_SERVICE_ENDPOINT, AZURE_SEARCH_TINY_INDEX, credential=credential
)
search_client.upload_documents(
    documents=[
        {"id": "1", "vector": [1, 2, 3], "category": "A"},
        {"id": "2", "vector": [1, 1, 3], "category": "A"},
        {"id": "3", "vector": [4, 5, 6], "category": "B"},
    ]
)

[<azure.search.documents._generated.models._models_py3.IndexingResult at 0x227a1f326d0>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x227a1f4f0d0>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x227a243d8d0>]

Search using vector similarity

In [16]:
r = search_client.search(
    None,
    vector_queries=[
        VectorizedQuery(vector=[2, 2, 3], k_nearest_neighbors=3, fields="vector")
    ],
)
for doc in r:
    print(f"id: {doc['id']}, score: {doc['@search.score']}, {doc['category']}")

id: 3, score: 0.99504673, B
id: 1, score: 0.97305185, A
id: 2, score: 0.9529747, A


Search with a filter!

In [17]:
r = search_client.search(
    None,
    vector_queries=[
        VectorizedQuery(vector=[2, 2, 3], k_nearest_neighbors=3, fields="vector")
    ],
    filter="category eq 'A'",
)
for doc in r:
    print(f"id: {doc['id']}, score: {doc['@search.score']}, {doc['category']}")

id: 1, score: 0.97305185, A
id: 2, score: 0.9529747, A
