# Azure AI Search: vector search, step by step

## Setup API client


In [2]:
import os

import azure.identity
import dotenv
from azure.search.documents.indexes import SearchIndexClient

dotenv.load_dotenv("prod.env")

AZURE_SEARCH_SERVICE = os.getenv("AZURE_SEARCH_SERVICE")
AZURE_SEARCH_ENDPOINT = f"https://{AZURE_SEARCH_SERVICE}.search.windows.net"

azure_credential = azure.identity.AzureDeveloperCliCredential(tenant_id=os.getenv("AZURE_TENANT_ID"))
index_client = SearchIndexClient(endpoint=AZURE_SEARCH_ENDPOINT, credential=azure_credential)

In [10]:
print(AZURE_SEARCH_SERVICE)
print(AZURE_SEARCH_ENDPOINT)
print(os.getenv("AZURE_TENANT_ID"))


search-service-ai-prod
https://search-service-ai-prod.search.windows.net
12b173d3-a7a1-4331-9fa4-69c3ab228e5c


## Search a tiny index

### Create index

In [None]:
from azure.search.documents.indexes.models import (
    HnswAlgorithmConfiguration,
    HnswParameters,
    SearchField,
    SearchFieldDataType,
    SearchIndex,
    SimpleField,
    VectorSearch,
    VectorSearchAlgorithmKind,
    VectorSearchProfile,
)

AZURE_SEARCH_TINY_INDEX = "teeenytinyindex"

index = SearchIndex(
    name=AZURE_SEARCH_TINY_INDEX, 
    fields=[
        SimpleField(name="id", type=SearchFieldDataType.String, key=True),
        SearchField(name="embedding", 
                    type=SearchFieldDataType.Collection(SearchFieldDataType.Single), 
                    searchable=True, 
                    vector_search_dimensions=3,
                    vector_search_profile_name="embedding_profile")
    ],
    vector_search=VectorSearch(
        algorithms=[HnswAlgorithmConfiguration( # Hierachical Navigable Small World, IVF
                            name="hnsw_config",
                            kind=VectorSearchAlgorithmKind.HNSW,
                            parameters=HnswParameters(metric="cosine"),
                        )],
        profiles=[VectorSearchProfile(name="embedding_profile", algorithm_configuration_name="hnsw_config")]
    )
)

index_client.create_index(index)

### Insert a few documents with tiny vectors

In [None]:
from azure.search.documents import SearchClient

search_client = SearchClient(AZURE_SEARCH_ENDPOINT, AZURE_SEARCH_TINY_INDEX, credential=azure_credential)
search_client.upload_documents(documents=[
    {"id": "1", "embedding": [1, 2, 3]},
    {"id": "2", "embedding": [1, 1, 3]},
    {"id": "3", "embedding": [4, 5, 6]}])

### Search using vector similarity

In [None]:
from azure.search.documents.models import VectorizedQuery

r = search_client.search(search_text=None, vector_queries=[
    VectorizedQuery(vector=[-2, -1, -1], k_nearest_neighbors=3, fields="embedding")])
for doc in r:
    print(f"id: {doc['id']}, score: {doc['@search.score']}")

## Search a larger index

In [None]:
import azure.identity
import dotenv
import openai

dotenv.load_dotenv()

# Initialize Azure search variables
AZURE_SEARCH_SERVICE = os.getenv("AZURE_SEARCH_SERVICE")
AZURE_SEARCH_ENDPOINT = f"https://{AZURE_SEARCH_SERVICE}.search.windows.net"

# Set up OpenAI client based on environment variables
dotenv.load_dotenv()
AZURE_OPENAI_SERVICE = os.getenv("AZURE_OPENAI_SERVICE")
AZURE_OPENAI_ADA_DEPLOYMENT = os.getenv("AZURE_OPENAI_ADA_DEPLOYMENT")

token_provider = azure.identity.get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default")

openai_client = openai.AzureOpenAI(
    api_version="2024-08-01-preview",
    azure_endpoint=f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com",
    azure_ad_token_provider=token_provider)

def get_embedding(text):
    get_embeddings_response = openai_client.embeddings.create(model=AZURE_OPENAI_ADA_DEPLOYMENT, input=text)
    return get_embeddings_response.data[0].embedding

In [13]:
AZURE_SEARCH_FULL_INDEX = "index-docs-ai-score-test"
search_client = SearchClient(AZURE_SEARCH_ENDPOINT, AZURE_SEARCH_FULL_INDEX, credential=azure_credential)

search_query = "learning about underwater activities"
search_vector = get_embedding(search_query)

r = search_client.search(search_text=None, top=5, vector_queries=[
    VectorizedQuery(vector=search_vector, k_nearest_neighbors=5, fields="embedding")])

for doc in r:
    content = doc["content"].replace("\n", " ")[:150]
    print(f"Score: {doc['@search.score']:.5f}\tContent:{content}")

NameError: name 'SearchClient' is not defined

In [14]:
AZURE_SEARCH_FULL_INDEX = "index-archive-exporter-2-keyword-no-skills-kajetan"

In [8]:
AZURE_SEARCH_FULL_INDEX = "index-docs-ai-score-test"

In [9]:
from azure.search.documents import SearchClient

search_client = SearchClient(AZURE_SEARCH_ENDPOINT, AZURE_SEARCH_FULL_INDEX, credential=azure_credential)

In [7]:
results =  search_client.search(
    query_type='simple',
    search_text="ArchiveStatusId" ,
    select='content',
    search_fields=['content'],
    include_total_count=True
    )

print ('Total Documents Matching Query:', results.get_count())

for result in results:
    print(result["@search.score"])
    print(result["content"])

Total Documents Matching Query: 0


In [11]:
results =  search_client.search(
    query_type='simple',
    search_text="g2s" ,
    select='content',
    search_fields=['content'],
    include_total_count=True
    )

print ('Total Documents Matching Query:', results.get_count())

for result in results:
    print(result["@search.score"])
    print(result["content"])

Total Documents Matching Query: 66
5.606973
 Valid values depend on the game category. For details,</td></tr><tr><td></td><td></td></tr></table></figure>
www.comtradegaming.com
sCore
98COMTRADE GAMING
sCore NCEMS Integration Guide
<figure><table><tr><th>Attribute</th><th>Data type</th><th>Description</th></tr><tr><td></td><td></td><td>see game category-specific sections in this chapter.</td></tr><tr><td>transType</td><td>t_transType</td><td>Standardized code* for type of the transaction (meter). Valid values depend on the game category. For details, see game category-specific sections in this chapter.</td></tr><tr><td>movementValue</td><td>t_money</td><td>Value of the movement or the meter.</td></tr></table></figure>
* The set of standardized codes for meter domain and meter type (valid values of the t_transClass and t_transType enumeration data types) is defined in the G2S protocol specification. A gaming terminal, connected to the operator's CMS either directly or through a SMIB, sho