In [2]:
import os

import azure.identity
import dotenv
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient

dotenv.load_dotenv()

AZURE_SEARCH_SERVICE = os.getenv("AZURE_SEARCH_SERVICE")
AZURE_SEARCH_ENDPOINT = f"https://{AZURE_SEARCH_SERVICE}.search.windows.net"
key = os.environ["AZURE_SEARCH_API_KEY"]

azure_credential = azure.identity.DefaultAzureCredential()
index_client = SearchIndexClient(endpoint=AZURE_SEARCH_ENDPOINT, credential=AzureKeyCredential(key))

In [7]:
# create index using python
from azure.search.documents.models import VectorFilterMode
from azure.search.documents.indexes.models import (
    HnswAlgorithmConfiguration,
    HnswParameters,
    SearchField,
    SearchFieldDataType,
    SearchIndex,
    SimpleField,
    VectorSearch,
    VectorSearchAlgorithmKind,
    VectorSearchProfile,
)

AZURE_SEARCH_TINY_INDEX = "teenytinyindex"

index = SearchIndex(
    name=AZURE_SEARCH_TINY_INDEX, 
    fields=[
        SimpleField(name="id", type=SearchFieldDataType.String, key=True), # like a primary key
        SearchField(name="embedding", 
                    type=SearchFieldDataType.Collection(SearchFieldDataType.Single), 
                    searchable=True, 
                    vector_search_dimensions=3, # dimesnion of embedding
                    vector_search_profile_name="embedding_profile") # defined below
    ],
    vector_search=VectorSearch(
        algorithms=[HnswAlgorithmConfiguration( # Hierachical Navigable Small World, other options like IVF
                            name="hnsw_config",
                            kind=VectorSearchAlgorithmKind.HNSW,
                            parameters=HnswParameters(metric="cosine"),
                        )],
        profiles=[VectorSearchProfile(name="embedding_profile", algorithm_configuration_name="hnsw_config")]
    )
)

index_client.create_index(index)

HttpResponseError: (ResourceNameAlreadyInUse) Cannot create index 'teenytinyindex' because it already exists.
Code: ResourceNameAlreadyInUse
Message: Cannot create index 'teenytinyindex' because it already exists.
Exception Details:	(CannotCreateExistingIndex) Cannot create index 'teenytinyindex' because it already exists.
	Code: CannotCreateExistingIndex
	Message: Cannot create index 'teenytinyindex' because it already exists.

In [4]:
# create docs

from azure.search.documents import SearchClient

search_client = SearchClient(AZURE_SEARCH_ENDPOINT, AZURE_SEARCH_TINY_INDEX, credential=AzureKeyCredential(key))
search_client.upload_documents(documents=[
    {"id": "1", "embedding": [1, 2, 3]},
    {"id": "2", "embedding": [1, 1, 3]},
    {"id": "3", "embedding": [4, 5, 6]}])

[<azure.search.documents._generated.models._models_py3.IndexingResult at 0x2b7c8964310>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x2b7c8964610>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x2b7c8964340>]

In [None]:
from azure.search.documents.models import VectorizedQuery

r = search_client.search(search_text=None, vector_queries=[ # not doing a text search but vector search
    VectorizedQuery(vector=[-2, -1, -1], k_nearest_neighbors=3, fields="embedding",exhaustive=False)]) # search the embedding field
# exhaustive=F ; uses apprx nearest neighbors, faster, uses HSNW - recommended
# exhaustive=T ; uses KNN, faster, used when highly selective filters
for doc in r:
    print(f"id: {doc['id']}, score: {doc['@search.score']}") # score isnt necessarily cosine sim. but can be compared

id: 2, score: 0.36515692
id: 1, score: 0.3618256
id: 3, score: 0.34674543


In [None]:
# we can even add filters on properties- filtered vector search
# https://learn.microsoft.com/en-us/azure/search/vector-search-filters?tabs=filter-2024-07-01
# can do pre and post filtering
r = search_client.search(None,
            vector_queries=[VectorizedQuery(vector=[-2, -1, -1], k_nearest_neighbors=3, fields="embedding",exhaustive=False)],
            vector_filter_mode=VectorFilterMode.PRE_FILTER, filter = "created gt 2023-11-15")
for doc in r:
    print(f"id: {doc['id']}, score: {doc['@search.score']}")

# this will give error because we dont have `created` property 

HttpResponseError: () Invalid expression: Could not find a property named 'created' on type 'search.document'.
Parameter name: $filter
Code: 
Message: Invalid expression: Could not find a property named 'created' on type 'search.document'.
Parameter name: $filter

### Hybrid Search

hybrid search - uses both keywords and vectors
Ai search has hybrid search + a reranker for more relevant results and then send to LLM