## Hotels sample Search application for Azure AI

In [1]:
import os
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes.models import (
    SearchIndexerDataContainer,
    SearchIndex,
    SearchIndexer,
    SimpleField,
    SearchFieldDataType,
    EntityRecognitionSkill,
    InputFieldMappingEntry,
    OutputFieldMappingEntry,
    TextTranslationSkill,
    SearchIndexerSkillset,
    CorsOptions,
    IndexingSchedule,
    SearchableField,
    IndexingParameters,
    SearchIndexerDataSourceConnection,
    IndexingParametersConfiguration,
)
from azure.search.documents.indexes import SearchIndexerClient, SearchIndexClient
from azure.search.documents import SearchClient
from azure.storage.blob import BlobServiceClient
from azure.core.exceptions import ResourceExistsError
from dotenv import load_dotenv

Loading credentials environment variables

In [2]:
load_dotenv()

# please create an .env file with the following variables and save it in the same directory
# container_name is optional, one will be created if not provided

service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
key = os.getenv("AZURE_SEARCH_API_KEY")
connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
container_name = os.getenv("AZURE_STORAGE_CONTAINER_NAME", default='hotelscontainer')
print(f"container name: {container_name}")

container name: hotelscontainer


Upload source .json file on Azure storage container before index creation

In [3]:
def upload_blob_file(name):
    service_client=BlobServiceClient.from_connection_string(connection_string)
    container_client=create_container(service_client, container_name)
    blob_client = container_client.get_blob_client('hotel_small.json')
    with open(file=os.path.join('.', name), mode="rb") as data:
        blob_client.upload_blob(data, overwrite=True)
    print(f"Uploading file: {container_name}")

def create_container(blob_service_client, container_name):
    try:
        container_client = blob_service_client.create_container(container_name)
        print(f"Creating container: {container_name}")
    except ResourceExistsError:
        print(f"The container with name {container_name} already exists")
        container_client = blob_service_client.get_container_client(container=container_name)
    return container_client

upload_blob_file('hotel_small.json')

Creating container: hotelscontainer
Uploading file: hotelscontainer


In [4]:
# datasource is needed with an indexer

def create_datasource():
    ds_client = SearchIndexerClient(service_endpoint, AzureKeyCredential(key))
    container = SearchIndexerDataContainer(name=container_name)
    data_source_connection = SearchIndexerDataSourceConnection(
        name="hotel-datasource", type="azureblob", connection_string=connection_string, container=container
    )
    data_source = ds_client.create_data_source_connection(data_source_connection)
    return data_source

Index creation

In [5]:
def create_index():
    name = "hotel-index"

    # create an index with listed fields
    
    fields = [
        SimpleField(name="hotelId", type=SearchFieldDataType.String, filterable=True, sortable=True, key=True),
        SearchableField(name="hotelName", type=SearchFieldDataType.String, sortable=True),
        SearchableField(name="description", type=SearchFieldDataType.String, analyzer_name="en.lucene"),
        SearchableField(name="descriptionFr", type=SearchFieldDataType.String, analyzer_name="fr.lucene"),
        SearchableField(name="descriptionDe", type=SearchFieldDataType.String, analyzer_name="de.lucene"),
        SearchableField(name="category", type=SearchFieldDataType.String, facetable=True, filterable=True, sortable=True),
        SearchableField(name="tags", collection=True, type=SearchFieldDataType.String, facetable=True, filterable=True),
        SimpleField(name="parkingIncluded", type=SearchFieldDataType.Boolean, filterable=True),
        SimpleField(name="smokingAllowed", type=SearchFieldDataType.Boolean, filterable=True),
        SimpleField(name="lastRenovationDate", type=SearchFieldDataType.DateTimeOffset, facetable=True, filterable=True, sortable=True),
        SimpleField(name="rating", type=SearchFieldDataType.Int64, facetable=True, filterable=True, sortable=True),
        SimpleField(name="location", type=SearchFieldDataType.GeographyPoint),
    ]
    cors_options = CorsOptions(allowed_origins=["*"], max_age_in_seconds=60)

    # pass in the name, fields and cors options and create the index
    index = SearchIndex(name=name, fields=fields, cors_options=cors_options)
    index_client = SearchIndexClient(service_endpoint, AzureKeyCredential(key))
    result = index_client.create_index(index)
    return result

Skillset creation for AI translation for searchable fields

In [6]:
# draft skillset to use with an indexer for added AI enrichments

def create_skillset():
    client = SearchIndexerClient(service_endpoint, AzureKeyCredential(key))

    in_description = InputFieldMappingEntry(name="text", source="/document/description")
    out_description = OutputFieldMappingEntry(name="translatedText", target_name="descriptionDe")
    translate = TextTranslationSkill(suggested_from="en", default_from_language_code="en", default_to_language_code="de", context="/document",
                                     inputs=[in_description], outputs=[out_description])

    skillset = SearchIndexerSkillset(name="hotels-skill", skills=[translate], description="skillset for hotels")
    result = client.create_skillset(skillset)
    return result

In [7]:
# logged issue with indexer creation with azure-search-documents 11.4.0 :
# https://github.com/Azure/azure-sdk-for-python/pull/33357
# https://github.com/Azure/azure-sdk-for-python/issues/33382
# workaround to pass query_timeout=None with IndexingParametersConfiguration() instantiation


skillset_name = create_skillset().name
print("Skillset is created")

ds_name = create_datasource().name
print("Data source is created")

index_name = create_index().name
print("Index is created")

# pass the data source, skillset and targeted index to build an indexer
configuration = IndexingParametersConfiguration(parsing_mode="jsonArray", query_timeout=None)
parameters = IndexingParameters(configuration=configuration)
indexer = SearchIndexer(
    name="hotels-indexer",
    data_source_name=ds_name,
    target_index_name=index_name,
    skillset_name=skillset_name,
    parameters=parameters,
)

indexer_client = SearchIndexerClient(service_endpoint, AzureKeyCredential(key))
indexer_name=indexer_client.create_indexer(indexer).name  # create the indexer

# get the indexer
result = indexer_client.get_indexer(indexer_name)
print(result)

Skillset is created
Data source is created
Index is created
{'additional_properties': {'@odata.context': 'https://prototypes-search.search.windows.net/$metadata#indexers/$entity'}, 'name': 'hotels-indexer', 'description': None, 'data_source_name': 'hotel-datasource', 'skillset_name': 'hotels-skill', 'target_index_name': 'hotel-index', 'schedule': None, 'parameters': <azure.search.documents.indexes._generated.models._models_py3.IndexingParameters object at 0x000001D6DDDCE330>, 'field_mappings': [], 'output_field_mappings': [], 'is_disabled': False, 'e_tag': '"0x8DC0B847A602878"', 'encryption_key': None}


In [8]:
# run the indexer
indexer_client.run_indexer(indexer_name)

Create a queries client

In [9]:
# instantiate a search client before running queries on the populated index
search_client = SearchClient(service_endpoint, index_name, AzureKeyCredential(key))

Example queries

In [10]:
# this is a query based on keywords search within the fields defined as searchable when creating the index
# the search results are returned with only the select fields content, presenting results as filtered by rating  >3 and sorted
# filterable and sortable fields are also definied when creating the index

results = search_client.search(search_text="pool", select='hotelId,hotelName,rating,tags', filter='rating gt 3', order_by='rating desc')

for result in results:
    print("{}: {} - {} rating".format(result["hotelId"], result["hotelName"], result["rating"], result["tags"]))

1: Fancy Stay - 5 rating
9: Secret Point Motel - 4 rating


In [11]:
# search by document key, all fields content are returned (no select)

result = search_client.get_document(key="9")

print("Name: {}".format(result["hotelName"]))
print("Rating: {}".format(result["rating"]))
print("Category: {}".format(result["category"]))
print("Tags: {}".format(result["tags"]))
print("LastRenovationDate: {}".format(result["lastRenovationDate"]))
print("Description: {}".format(result["descriptionDe"]))

Name: Secret Point Motel
Rating: 4
Category: Boutique
Tags: ['pool', 'air conditioning', 'concierge']
LastRenovationDate: 1970-01-18T05:00:00Z
Description: None


In [12]:
# global search leveraging the facetable fields

results =  search_client.search(search_text="*", facets=["category"])

facets = results.get_facets()

for facet in facets["category"]:
    print("{}".format(facet))

{'value': 'Budget', 'count': 5}
{'value': 'Boutique', 'count': 1}
{'value': 'Luxury', 'count': 1}
