## Vector search in Python (Azure AI Search)

https://github.com/Azure/azure-search-vector-samples/blob/main/demo-python/code/basic-vector-workflow/azure-search-vector-python-sample.ipynb

In [1]:
from dotenv import load_dotenv
from azure.identity import DefaultAzureCredential
from azure.core.credentials import AzureKeyCredential
import os

load_dotenv(override=True) # take environment variables from .env.

# The following variables from your .env file are used in this notebook
endpoint = os.environ["AZURE_SEARCH_SERVICE_ENDPOINT"]
credential = AzureKeyCredential(os.environ["AZURE_SEARCH_API_KEY"]) 
index_name = "azure-vector"

azure_openai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT_2"]
azure_openai_key = os.environ["AZURE_OPENAI_API_KEY_2"] 
azure_openai_embedding_deployment = os.environ["AZURE_OPENAI_EMBEDDING_MODEL_NAME_2"]
embedding_model_name = os.environ["AZURE_OPENAI_EMBEDDING_MODEL_NAME_2"]
azure_openai_api_version = os.environ["AZURE_OPENAI_API_VERSION_2"]

### Load data

In [3]:
import json

# Read the text-sample.json
path = os.path.join('data', 'text-sample.json')
with open(path, 'r', encoding='utf-8') as file:
    input_data = json.load(file)

In [4]:
input_data

[{'id': '1',
  'title': 'Azure App Service',
  'content': 'Azure App Service is a fully managed platform for building, deploying, and scaling web apps. You can host web apps, mobile app backends, and RESTful APIs. It supports a variety of programming languages and frameworks, such as .NET, Java, Node.js, Python, and PHP. The service offers built-in auto-scaling and load balancing capabilities. It also provides integration with other Azure services, such as Azure DevOps, GitHub, and Bitbucket.',
  'category': 'Web'},
 {'id': '2',
  'title': 'Azure Functions',
  'content': 'Azure Functions is a serverless compute service that enables you to run code on-demand without having to manage infrastructure. It allows you to build and deploy event-driven applications that automatically scale with your workload. Functions support various languages, including C#, F#, Node.js, Python, and Java. It offers a variety of triggers and bindings to integrate with other Azure services and external services.

### Create embeddings

Read your data, generate OpenAI embeddings and export to a format to insert your Azure AI Search index:

In [5]:
from openai import AzureOpenAI
from azure.identity import DefaultAzureCredential, get_bearer_token_provider

openai_credential = DefaultAzureCredential()
token_provider = get_bearer_token_provider(openai_credential, "https://cognitiveservices.azure.com/.default")

client = AzureOpenAI(
    azure_deployment=azure_openai_embedding_deployment,
    api_version=azure_openai_api_version,
    azure_endpoint=azure_openai_endpoint,
    api_key=azure_openai_key,
    azure_ad_token_provider=token_provider if not azure_openai_key else None
)

In [7]:
# Generate Document Embeddings using OpenAI

titles = [item['title'] for item in input_data]
content = [item['content'] for item in input_data]
title_response = client.embeddings.create(input=titles, model=embedding_model_name)
title_embeddings = [item.embedding for item in title_response.data]
content_response = client.embeddings.create(input=content, model=embedding_model_name)
content_embeddings = [item.embedding for item in content_response.data]

In [8]:
# Generate embeddings for title and content fields
for i, item in enumerate(input_data):
    title = item['title']
    content = item['content']
    item['titleVector'] = title_embeddings[i]
    item['contentVector'] = content_embeddings[i]

In [9]:
# Output embeddings to docVectors.json file
output_path = os.path.join('data', 'docVectors.json')
output_directory = os.path.dirname(output_path)
if not os.path.exists(output_directory):
    os.makedirs(output_directory)
with open(output_path, "w") as f:
    json.dump(input_data, f)

In [10]:
input_data

[{'id': '1',
  'title': 'Azure App Service',
  'content': 'Azure App Service is a fully managed platform for building, deploying, and scaling web apps. You can host web apps, mobile app backends, and RESTful APIs. It supports a variety of programming languages and frameworks, such as .NET, Java, Node.js, Python, and PHP. The service offers built-in auto-scaling and load balancing capabilities. It also provides integration with other Azure services, such as Azure DevOps, GitHub, and Bitbucket.',
  'category': 'Web',
  'titleVector': [-0.03202344477176666,
   -0.0031460069585591555,
   0.028085747733712196,
   0.0055655562318861485,
   0.01686568185687065,
   -0.05019378662109375,
   -0.0005174158723093569,
   0.019783372059464455,
   0.0028391156811267138,
   -0.00357595132663846,
   0.006475852336734533,
   -0.04995657503604889,
   0.039400696754455566,
   -0.018502434715628624,
   -0.04476166144013405,
   0.016889402642846107,
   -0.035059742629528046,
   0.04139326885342598,
   -0.01

### Create your search index

Create your search index schema and vector search configuration. If you get an error, check the search service for available quota and check the .env file to make sure you're using a unique search index name.

In [7]:
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SimpleField,
    SearchFieldDataType,
    SearchableField,
    SearchField,
    VectorSearch,
    HnswAlgorithmConfiguration,
    VectorSearchProfile,
    SemanticConfiguration,
    SemanticPrioritizedFields,
    SemanticField,
    SemanticSearch,
    SearchIndex
)

# Create a search index
index_client = SearchIndexClient(
    endpoint=endpoint, credential=credential)
fields = [
    SimpleField(name="id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True),
    SearchableField(name="title", type=SearchFieldDataType.String),
    SearchableField(name="content", type=SearchFieldDataType.String),
    SearchableField(name="category", type=SearchFieldDataType.String,
                    filterable=True),
    SearchField(name="titleVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, vector_search_dimensions=1536, vector_search_profile_name="myHnswProfile"),
    SearchField(name="contentVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, vector_search_dimensions=1536, vector_search_profile_name="myHnswProfile"),
]

# Configure the vector search configuration  
vector_search = VectorSearch(
    algorithms=[
        HnswAlgorithmConfiguration(
            name="myHnsw"
        )
    ],
    profiles=[
        VectorSearchProfile(
            name="myHnswProfile",
            algorithm_configuration_name="myHnsw",
        )
    ]
)

semantic_config = SemanticConfiguration(
    name="my-semantic-config",
    prioritized_fields=SemanticPrioritizedFields(
        title_field=SemanticField(field_name="title"),
        keywords_fields=[SemanticField(field_name="category")],
        content_fields=[SemanticField(field_name="content")]
    )
)

In [13]:
# Create the semantic settings with the configuration
semantic_search = SemanticSearch(configurations=[semantic_config])

# Create the search index with the semantic settings
index = SearchIndex(
    name=index_name, 
    fields=fields,
    vector_search=vector_search, 
    semantic_search=semantic_search
)
result = index_client.create_or_update_index(index)
print(f' {result.name} created')

 azure-vector created


In [18]:
print(index.name)
for field in index.fields:
    print(f"Name: {field.name}, Type: {field.type}, Searchable: {field.searchable}, Filterable: {field.filterable}, Sortable: {field.sortable}, Facetable: {field.facetable}")

azure-vector
Name: id, Type: Edm.String, Searchable: False, Filterable: True, Sortable: True, Facetable: True
Name: title, Type: Edm.String, Searchable: True, Filterable: False, Sortable: False, Facetable: False
Name: content, Type: Edm.String, Searchable: True, Filterable: False, Sortable: False, Facetable: False
Name: category, Type: Edm.String, Searchable: True, Filterable: True, Sortable: False, Facetable: False
Name: titleVector, Type: Collection(Edm.Single), Searchable: True, Filterable: None, Sortable: None, Facetable: None
Name: contentVector, Type: Collection(Edm.Single), Searchable: True, Filterable: None, Sortable: None, Facetable: None


### Insert text and embeddings into vector store

Add texts and metadata from the JSON data to the vector store:

In [23]:
from azure.search.documents import SearchClient

# Upload some documents to the index
output_path = os.path.join('data', 'docVectors.json')
output_directory = os.path.dirname(output_path)

if not os.path.exists(output_directory):
    os.makedirs(output_directory)
with open(output_path, 'r') as file:  
    documents = json.load(file) 

documents
print(f"{len(documents)} documents") 

108 documents


In [22]:
search_client = SearchClient(endpoint=endpoint, index_name=index_name, credential=credential)
result = search_client.upload_documents(documents)

print(f"Uploaded {len(documents)} documents") 

Uploaded 108 documents


In [2]:
# get_document_count

from azure.search.documents import SearchClient

search_client = SearchClient(endpoint=endpoint, index_name=index_name, credential=credential)
search_client.get_document_count()

108

In [10]:
# filter and get_count

search_client = SearchClient(endpoint=endpoint, index_name=index_name, credential=credential)

search_results = search_client.search(
    search_text="*",  
    filter="category eq 'Networking'",
    include_total_count=True
)

# Get the total count from the search results
total_count = search_results.get_count()

print(f"Total documents matching filter: {total_count}")

Total documents matching filter: 12


In [8]:
# Index size

def bytes_to_mb(bytes):
    return round(bytes / (1024 * 1024), 4)

def find_storage_size_mb(index_name):
    response = index_client.get_index_statistics(index_name)
    return bytes_to_mb(response["storage_size"]), bytes_to_mb(response["vector_index_size"])

index_sizes = [(find_storage_size_mb(index_name), index_name) for index_name in [index_name]]
index_sizes.sort(key=lambda item: item[0][0], reverse=True)

for ((storage_size, vector_size), index_name) in index_sizes:
    print("*" * 40)
    print(f"Index Name: {index_name}\nStorage Size: {storage_size}MB\nVector Size: {vector_size}MB")

****************************************
Index Name: azure-vector
Storage Size: 5.4251MB
Vector Size: 1.2801MB


In [4]:
# Retrieve data

from azure.search.documents import SearchClient

# Initialize SearchClient
search_client = SearchClient(endpoint=endpoint, index_name=index_name, credential=credential)

# Perform a search query to retrieve documents
results = search_client.search(search_text="*", top=1)

# Extract the first document
first_document = next(iter(results), None)

print(type(first_document))
first_document

<class 'dict'>


{'id': '20',
 'title': 'Azure CDN',
 'content': 'Azure Content Delivery Network (CDN) is a global content delivery network that enables you to deliver content to users with low latency and high availability. It caches and serves content from edge servers located near your users, improving the performance and reliability of your web applications. Azure CDN supports various content types, including static files, videos, and images. It provides advanced features like geo-filtering, custom domains, and SSL certificates. You can use Azure CDN with other Azure services, such as Azure App Service and Azure Storage.',
 'titleVector': [-0.0049445247,
  -0.0013837042,
  0.043243866,
  0.011560438,
  0.017602641,
  -0.027458528,
  -0.01297316,
  -0.033772662,
  0.004238164,
  0.01204461,
  0.008463063,
  0.0024175444,
  0.032658406,
  -0.035470583,
  -0.033295125,
  0.035046104,
  -0.041890837,
  -0.0009210877,
  -0.01353029,
  -0.000103010934,
  0.0033361448,
  -0.002666263,
  -0.015214943,
  0.

If you are indexing a very large number of documents, you can use the SearchIndexingBufferedSender which is an optimized way to automatically index the docs as it will handle the batching for you:

In [24]:
# from azure.search.documents import SearchIndexingBufferedSender

# # Upload some documents to the index  
# with open(output_path, 'r') as file:  
#     documents = json.load(file)  
  
# # Use SearchIndexingBufferedSender to upload the documents in batches optimized for indexing  
# with SearchIndexingBufferedSender(  
#     endpoint=endpoint,  
#     index_name=index_name,  
#     credential=credential,  
# ) as batch_client:  
#     # Add upload actions for all documents  
#     batch_client.upload_documents(documents=documents)  
# print(f"Uploaded {len(documents)} documents in total")  

['_ODATA_ACCEPT', '__annotations__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__enter__', '__eq__', '__exit__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_aad', '_api_version', '_client', '_credential', '_endpoint', '_headers', '_index_documents_actions', '_index_name', '_merge_client_headers', 'autocomplete', 'close', 'delete_documents', 'get_document', 'get_document_count', 'index_documents', 'merge_documents', 'merge_or_upload_documents', 'search', 'send_request', 'suggest', 'upload_documents']


### Perform a vector similarity search

This example shows a pure vector search using the vectorizable text query, all you need to do is pass in text and your vectorizer will handle the query vectorization.

#

In [55]:
from azure.search.documents.models import VectorizedQuery

# Pure Vector Search
query = "tools for software development"  
  
embedding = client.embeddings.create(input=query, model=embedding_model_name).data[0].embedding
vector_query = VectorizedQuery(vector=embedding, k_nearest_neighbors=3, fields="contentVector")
  
results = search_client.search(  
    search_text=None,  
    vector_queries= [vector_query],
    select=["title", "content", "category"],
)

In [75]:
print(type(vector_query))
print('vector length', len(vector_query.vector))
print(type(vector_query.vector))
vector_query.vector

<class 'azure.search.documents._generated.models._models_py3.VectorizedQuery'>
vector length 1536
<class 'list'>


[-0.008518248796463013,
 0.028065217658877373,
 0.042770177125930786,
 -0.05734500288963318,
 -0.00987379439175129,
 -0.052096329629421234,
 -0.0001938430214067921,
 0.028607435524463654,
 -0.013306035660207272,
 0.016960587352514267,
 0.026785582304000854,
 -0.023293696343898773,
 -0.0117010697722435,
 -0.06532645225524902,
 0.031687233597040176,
 0.019682522863149643,
 0.005508937407284975,
 -0.007477189879864454,
 -0.04344252496957779,
 -0.002191917272284627,
 0.06207314506173134,
 -0.005129384808242321,
 0.007629010826349258,
 0.028932766988873482,
 0.04227133467793465,
 0.004915208555757999,
 -0.028976144269108772,
 0.020485006272792816,
 -0.007601900026202202,
 -0.027631442993879318,
 -0.03481041267514229,
 -0.03652382269501686,
 0.03895295783877373,
 0.0443100742995739,
 0.04253159835934639,
 -0.01083352044224739,
 0.0019275859231129289,
 -0.028238726779818535,
 0.020354872569441795,
 0.014954379759728909,
 -0.007721188012510538,
 -0.038410741835832596,
 0.024681774899363518,
 0

In [56]:
import pandas as pd

results_list = list(results)

# Now you can create a DataFrame from the list
results_df = pd.DataFrame(results_list)
print(results_list)
results_df

[{'content': 'Azure DevOps is a suite of services that help you plan, build, and deploy applications. It includes Azure Boards for work item tracking, Azure Repos for source code management, Azure Pipelines for continuous integration and continuous deployment, Azure Test Plans for manual and automated testing, and Azure Artifacts for package management. DevOps supports a wide range of programming languages, frameworks, and platforms, making it easy to integrate with your existing development tools and processes. It also integrates with other Azure services, such as Azure App Service and Azure Functions.', 'title': 'Azure DevOps', 'category': 'Developer Tools', '@search.score': 0.640457, '@search.reranker_score': None, '@search.highlights': None, '@search.captions': None}, {'content': 'Azure DevTest Labs is a fully managed service that enables you to create, manage, and share development and test environments in Azure. It provides features like custom templates, cost management, and int

Unnamed: 0,content,title,category,@search.score,@search.reranker_score,@search.highlights,@search.captions
0,Azure DevOps is a suite of services that help ...,Azure DevOps,Developer Tools,0.640457,,,
1,Azure DevTest Labs is a fully managed service ...,Azure DevTest Labs,Developer Tools,0.61851,,,
2,Azure App Service is a fully managed platform ...,Azure App Service,Web,0.598167,,,


In [47]:
# The SearchItemPaged object you are working with is likely an iterator. In Python, iterators can only be traversed once. 

for result in results:
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n")  

Title: Azure DevOps
Score: 0.640457
Content: Azure DevOps is a suite of services that help you plan, build, and deploy applications. It includes Azure Boards for work item tracking, Azure Repos for source code management, Azure Pipelines for continuous integration and continuous deployment, Azure Test Plans for manual and automated testing, and Azure Artifacts for package management. DevOps supports a wide range of programming languages, frameworks, and platforms, making it easy to integrate with your existing development tools and processes. It also integrates with other Azure services, such as Azure App Service and Azure Functions.
Category: Developer Tools

Title: Azure DevTest Labs
Score: 0.61850977
Content: Azure DevTest Labs is a fully managed service that enables you to create, manage, and share development and test environments in Azure. It provides features like custom templates, cost management, and integration with Azure DevOps. DevTest Labs supports various platforms, such 

This example shows a pure vector search to demonstrate OpenAI's embedding multilingual capabilities.

In [58]:
from azure.search.documents.models import VectorizedQuery

# Pure Vector Search
query = "tools for software development"
  
embedding = client.embeddings.create(input=query, model=embedding_model_name).data[0].embedding
vector_query = VectorizedQuery(vector=embedding, k_nearest_neighbors=3, fields="contentVector")
  
results = search_client.search(  
    search_text=None,  
    vector_queries= [vector_query],
    select=["title", "content", "category"],
)  

for result in results:  
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n")  

Title: Azure DevOps
Score: 0.640457
Content: Azure DevOps is a suite of services that help you plan, build, and deploy applications. It includes Azure Boards for work item tracking, Azure Repos for source code management, Azure Pipelines for continuous integration and continuous deployment, Azure Test Plans for manual and automated testing, and Azure Artifacts for package management. DevOps supports a wide range of programming languages, frameworks, and platforms, making it easy to integrate with your existing development tools and processes. It also integrates with other Azure services, such as Azure App Service and Azure Functions.
Category: Developer Tools

Title: Azure DevTest Labs
Score: 0.61850977
Content: Azure DevTest Labs is a fully managed service that enables you to create, manage, and share development and test environments in Azure. It provides features like custom templates, cost management, and integration with Azure DevOps. DevTest Labs supports various platforms, such 

#### Perform an Exhaustive KNN exact nearest neighbor search

This example shows how you can exhaustively search your vector index regardless of what index you have, HNSW or ExhaustiveKNN. You can use this to calculate the ground-truth values.

In [59]:
# Pure Vector Search multi-lingual (e.g 'tools for software development' in Dutch)  
query = "tools voor softwareontwikkeling"  
  
embedding = client.embeddings.create(input=query, model=embedding_model_name).data[0].embedding
vector_query = VectorizedQuery(vector=embedding, k_nearest_neighbors=3, fields="contentVector")

results = search_client.search(  
    search_text=None,  
    vector_queries= [vector_query],
    select=["title", "content", "category"],
)  

for result in results:  
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n") 

Title: Azure DevOps
Score: 0.6206871
Content: Azure DevOps is a suite of services that help you plan, build, and deploy applications. It includes Azure Boards for work item tracking, Azure Repos for source code management, Azure Pipelines for continuous integration and continuous deployment, Azure Test Plans for manual and automated testing, and Azure Artifacts for package management. DevOps supports a wide range of programming languages, frameworks, and platforms, making it easy to integrate with your existing development tools and processes. It also integrates with other Azure services, such as Azure App Service and Azure Functions.
Category: Developer Tools

Title: Azure DevTest Labs
Score: 0.6089567
Content: Azure DevTest Labs is a fully managed service that enables you to create, manage, and share development and test environments in Azure. It provides features like custom templates, cost management, and integration with Azure DevOps. DevTest Labs supports various platforms, such 

#### Perform an Exhaustive KNN exact nearest neighbor search

This example shows how you can exhaustively search your vector index regardless of what index you have, HNSW or ExhaustiveKNN. You can use this to calculate the ground-truth values.

- VectorizedQuery = exhaustive=True
- search_client.search()
  - search_text=None
  - vector_queries= [vector_query]

In [76]:
# Pure Vector Search
query = "tools for software development"  
  
embedding = client.embeddings.create(input=query, model=embedding_model_name).data[0].embedding
vector_query = VectorizedQuery(vector=embedding, k_nearest_neighbors=3, fields="contentVector", exhaustive=True)
  
results = search_client.search(  
    search_text=None,  
    vector_queries= [vector_query],
    select=["title", "content", "category"],
)  
  
for result in results:  
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n")  

Title: Azure DevOps
Score: 0.64045686
Content: Azure DevOps is a suite of services that help you plan, build, and deploy applications. It includes Azure Boards for work item tracking, Azure Repos for source code management, Azure Pipelines for continuous integration and continuous deployment, Azure Test Plans for manual and automated testing, and Azure Artifacts for package management. DevOps supports a wide range of programming languages, frameworks, and platforms, making it easy to integrate with your existing development tools and processes. It also integrates with other Azure services, such as Azure App Service and Azure Functions.
Category: Developer Tools

Title: Azure DevTest Labs
Score: 0.6185099
Content: Azure DevTest Labs is a fully managed service that enables you to create, manage, and share development and test environments in Azure. It provides features like custom templates, cost management, and integration with Azure DevOps. DevTest Labs supports various platforms, such

#### Perform a Cross-Field Vector Search

This example shows a cross-field vector search that allows you to query multiple vector fields at the same time. Note, ensure that the same embedding model was used for the vector fields you decide to query.

- VectorizedQuery = fields="contentVector, titleVector"
- search_client.search()
  - search_text=None
  - vector_queries= [vector_query]

In [77]:
# Pure Vector Search
query = "tools for software development"  
  
embedding = client.embeddings.create(input=query, model=embedding_model_name).data[0].embedding
vector_query = VectorizedQuery(vector=embedding, k_nearest_neighbors=3, fields="contentVector, titleVector")

results = search_client.search(  
    search_text=None,  
    vector_queries= [vector_query],
    select=["title", "content", "category"],
)  
  
for result in results:  
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n")  

Title: Azure DevOps
Score: 0.03333333507180214
Content: Azure DevOps is a suite of services that help you plan, build, and deploy applications. It includes Azure Boards for work item tracking, Azure Repos for source code management, Azure Pipelines for continuous integration and continuous deployment, Azure Test Plans for manual and automated testing, and Azure Artifacts for package management. DevOps supports a wide range of programming languages, frameworks, and platforms, making it easy to integrate with your existing development tools and processes. It also integrates with other Azure services, such as Azure App Service and Azure Functions.
Category: Developer Tools

Title: Azure DevTest Labs
Score: 0.032786883413791656
Content: Azure DevTest Labs is a fully managed service that enables you to create, manage, and share development and test environments in Azure. It provides features like custom templates, cost management, and integration with Azure DevOps. DevTest Labs supports var

#### Perform a Multi-Vector Search

This example shows a cross-field vector search that allows you to query multiple vector fields at the same time by passing in multiple query vectors. Note, in this case, you can pass in query vectors from two different embedding models to the corresponding vector fields in your index.

- search_client.search()
  - search_text=None
  - vector_queries=[vector_query_1, vector_query_2]

In [None]:
# Multi-Vector Search
query = "tools for software development"

embedding = client.embeddings.create(input=query, model=embedding_model_name).data[0].embedding
vector_query_1 = VectorizedQuery(vector=embedding, k_nearest_neighbors=3, fields="titleVector")
vector_query_2 = VectorizedQuery(vector=embedding, k_nearest_neighbors=3, fields="contentVector")

results = search_client.search(  
    search_text=None,  
    vector_queries=[vector_query_1, vector_query_2],
    select=["title", "content", "category"],
)  
  
for result in results:  
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n")  

#### Perform a Pure Vector Search with a filter

This example shows how to apply filters on your index. Note, that you can choose whether you want to use Pre-Filtering (default) or Post-Filtering.

- search_client.search()
  - search_text=None
  - vector_queries= [vector_query]

In [79]:
from azure.search.documents.models import VectorFilterMode

# Pure Vector Search
query = "tools for software development"  
  
embedding = client.embeddings.create(input=query, model=embedding_model_name).data[0].embedding
vector_query = VectorizedQuery(vector=embedding, k_nearest_neighbors=3, fields="contentVector")

results = search_client.search(  
    search_text=None,  
    vector_queries= [vector_query],
    vector_filter_mode=VectorFilterMode.PRE_FILTER,
    filter="category eq 'Developer Tools'",
    select=["title", "content", "category"],
)
  
for result in results:  
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n")  

Title: Azure DevOps
Score: 0.64045686
Content: Azure DevOps is a suite of services that help you plan, build, and deploy applications. It includes Azure Boards for work item tracking, Azure Repos for source code management, Azure Pipelines for continuous integration and continuous deployment, Azure Test Plans for manual and automated testing, and Azure Artifacts for package management. DevOps supports a wide range of programming languages, frameworks, and platforms, making it easy to integrate with your existing development tools and processes. It also integrates with other Azure services, such as Azure App Service and Azure Functions.
Category: Developer Tools

Title: Azure DevTest Labs
Score: 0.6185099
Content: Azure DevTest Labs is a fully managed service that enables you to create, manage, and share development and test environments in Azure. It provides features like custom templates, cost management, and integration with Azure DevOps. DevTest Labs supports various platforms, such

## Hybrid Search

- search_client.search()
  - search_text=query
  - vector_queries=[vector_query]

In [80]:
# Hybrid Search
query = "scalable storage solution"  
  
embedding = client.embeddings.create(input=query, model=embedding_model_name).data[0].embedding
vector_query = VectorizedQuery(vector=embedding, k_nearest_neighbors=3, fields="contentVector")

results = search_client.search(  
    search_text=query,  
    vector_queries=[vector_query],
    select=["title", "content", "category"],
    top=3
)  
  
for result in results:  
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n")  

Title: Azure Storage
Score: 0.03333333507180214
Content: Azure Storage is a scalable, durable, and highly available cloud storage service that supports a variety of data types, including blobs, files, queues, and tables. It provides a massively scalable object store for unstructured data. Storage supports data redundancy and geo-replication, ensuring high durability and availability. It offers a variety of data access and management options, including REST APIs, SDKs, and Azure Portal. You can secure your data using encryption at rest and in transit.
Category: Storage

Title: Azure File Storage
Score: 0.0320020467042923
Content: Azure File Storage is a fully managed, scalable, and secure file sharing service that enables you to store and access your files over the Server Message Block (SMB) protocol. It provides features like snapshots, shared access signatures, and integration with Azure Backup. File Storage supports various platforms, such as Windows, Linux, and macOS. You can use Az

## Semantic Hybrid Search RAG

- earch_client.search(()
  - query_type=QueryType.SEMANTIC, 
  - semantic_configuration_name='my-semantic-config', 
  - query_caption=QueryCaptionType.EXTRACTIVE, 
  - query_answer=QueryAnswerType.EXTRACTIVE

In [81]:
from azure.search.documents.models import QueryType, QueryCaptionType, QueryAnswerType

# Semantic Hybrid Search
query = "what is azure sarch?"

embedding = client.embeddings.create(input=query, model=embedding_model_name).data[0].embedding
vector_query = VectorizedQuery(vector=embedding, k_nearest_neighbors=3, fields="contentVector", exhaustive=True)

results = search_client.search(  
    search_text=query,  
    vector_queries=[vector_query],
    select=["title", "content", "category"],
    query_type=QueryType.SEMANTIC, semantic_configuration_name='my-semantic-config', query_caption=QueryCaptionType.EXTRACTIVE, query_answer=QueryAnswerType.EXTRACTIVE,
    top=3
)

semantic_answers = results.get_answers()
for answer in semantic_answers:
    if answer.highlights:
        print(f"Semantic Answer: {answer.highlights}")
    else:
        print(f"Semantic Answer: {answer.text}")
    print(f"Semantic Answer Score: {answer.score}\n")

for result in results:
    print(f"Title: {result['title']}")
    print(f"Reranker Score: {result['@search.reranker_score']}")
    print(f"Content: {result['content']}")
    print(f"Category: {result['category']}")

    captions = result["@search.captions"]
    if captions:
        caption = captions[0]
        if caption.highlights:
            print(f"Caption: {caption.highlights}\n")
        else:
            print(f"Caption: {caption.text}\n")

Semantic Answer: Azure File Storage is<em> a fully managed, scalable, and secure file sharing service that enables you to store and access your files over the Server Message Block (SMB) protocol.</em> It provides features like snapshots, shared access signatures, and integration with Azure Backup. File Storage supports various platforms, such as Windows, Linux, and macOS.
Semantic Answer Score: 0.9208984375

Title: Azure Stack Edge
Reranker Score: 2.075716972351074
Content: Azure Stack Edge is a managed, edge computing appliance that enables you to run Azure services and AI workloads on-premises or at the edge. It provides features like hardware-accelerated machine learning, local caching, and integration with Azure IoT Hub. Azure Stack Edge supports various Azure services, such as Azure Functions, Azure Machine Learning, and Azure Kubernetes Service. You can use Azure Stack Edge to build edge computing applications, optimize your data processing, and ensure the security and compliance

In [98]:
from azure.search.documents.models import QueryType, QueryCaptionType, QueryAnswerType

# Semantic Hybrid Search
query = "what is azure AI sarch?"

embedding = client.embeddings.create(input=query, model=embedding_model_name).data[0].embedding
vector_query = VectorizedQuery(vector=embedding, k_nearest_neighbors=3, fields="contentVector", exhaustive=True)

results = search_client.search(  
    search_text=query,  
    vector_queries=[vector_query],
    select=["title", "content", "category"],
    query_type=QueryType.SEMANTIC, semantic_configuration_name='my-semantic-config', query_caption=QueryCaptionType.EXTRACTIVE, query_answer=QueryAnswerType.EXTRACTIVE,
    top=3
)

In [116]:
semantic_answers = results.get_answers()

answer_dict = semantic_answers[0].as_dict()

answer_df = pd.DataFrame([answer_dict])
answer_df

Unnamed: 0,score,key,text,highlights
0,0.868164,92,"Azure Batch AI is a fully managed, AI-powered ...","Azure Batch AI is<em> a fully managed, AI-powe..."


In [100]:
import pandas as pd

results_list = list(results)

# Now you can create a DataFrame from the list
results_df = pd.DataFrame(results_list)
results_df

Unnamed: 0,content,title,category,@search.score,@search.reranker_score,@search.highlights,@search.captions
0,"Azure Batch AI is a fully managed, AI-powered ...",Azure Batch AI,AI + Machine Learning,0.03306,2.630515,,"[{'additional_properties': None, 'text': 'Azur..."
1,"Azure Machine Learning is a fully managed, end...",Azure Machine Learning,AI + Machine Learning,0.014925,2.043165,,"[{'additional_properties': None, 'text': 'Azur..."
2,Azure Cognitive Services is a collection of AI...,Azure Cognitive Services,AI + Machine Learning,0.032258,1.92289,,"[{'additional_properties': None, 'text': 'Azur..."
