# Hotels sample Search application for Azure AI with search.documents and storage.blob Azure Python SDKs

In [53]:
import os
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes.models import (
    SearchIndexerDataContainer,
    SearchIndex,
    SearchIndexer,
    SimpleField,
    SearchFieldDataType,
    EntityRecognitionSkill,
    InputFieldMappingEntry,
    OutputFieldMappingEntry,
    SearchIndexerSkillset,
    CorsOptions,
    IndexingSchedule,
    SearchableField,
    IndexingParameters,
    SearchIndexerDataSourceConnection,
    IndexingParametersConfiguration,
)
from azure.search.documents.indexes import SearchIndexerClient, SearchIndexClient
from azure.search.documents import SearchClient
from azure.storage.blob import BlobServiceClient
from dotenv import load_dotenv
import json


In [2]:
load_dotenv()

# Please create an .env file with the following variables and save it in the same directory

service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
key = os.getenv("AZURE_SEARCH_API_KEY")
connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
container_name = os.getenv("AZURE_STORAGE_CONTAINER_NAME")

In [3]:
def upload_blob_file(name):
    service_client=BlobServiceClient.from_connection_string(connection_string)
    container_client = service_client.get_container_client(container_name)
    blob_client = container_client.get_blob_client('hotel_small.json')
    with open(file=os.path.join('.', name), mode="rb") as data:
        blob_client.upload_blob(data, overwrite=True)

In [4]:
def create_datasource():
    ds_client = SearchIndexerClient(service_endpoint, AzureKeyCredential(key))
    container = SearchIndexerDataContainer(name=container_name)
    data_source_connection = SearchIndexerDataSourceConnection(
        name="hotel-datasource", type="azureblob", connection_string=connection_string, container=container
    )
    data_source = ds_client.create_data_source_connection(data_source_connection)
    return data_source

In [36]:
def create_index():
    name = "hotel-index"

    # Here we create an index with listed fields
    
    fields = [
        SimpleField(name="hotelId", type=SearchFieldDataType.String, filterable=True, sortable=True, key=True),
        SearchableField(name="hotelName", type=SearchFieldDataType.String, sortable=True),
        SearchableField(name="description", type=SearchFieldDataType.String, analyzer_name="en.lucene"),
        SearchableField(name="descriptionFr", type=SearchFieldDataType.String, analyzer_name="fr.lucene"),
        SearchableField(name="category", type=SearchFieldDataType.String, facetable=True, filterable=True, sortable=True),
        SearchableField(name="tags", collection=True, type=SearchFieldDataType.String, facetable=True, filterable=True),
        SimpleField(name="parkingIncluded", type=SearchFieldDataType.Boolean, filterable=True),
        SimpleField(name="smokingAllowed", type=SearchFieldDataType.Boolean, filterable=True),
        SimpleField(name="lastRenovationDate", type=SearchFieldDataType.DateTimeOffset, facetable=True, filterable=True, sortable=True),
        SimpleField(name="rating", type=SearchFieldDataType.Int64, facetable=True, filterable=True, sortable=True),
        SimpleField(name="location", type=SearchFieldDataType.GeographyPoint),
    ]
    cors_options = CorsOptions(allowed_origins=["*"], max_age_in_seconds=60)

    # pass in the name, fields and cors options and create the index
    index = SearchIndex(name=name, fields=fields, cors_options=cors_options)
    index_client = SearchIndexClient(service_endpoint, AzureKeyCredential(key))
    result = index_client.create_index(index)
    return result

In [6]:
# example skillset to use with an indexer for added AI enrichments

def create_skillset():
    client = SearchIndexerClient(service_endpoint, AzureKeyCredential(key))
    inp = InputFieldMappingEntry(name="text", source="/document/lastRenovationDate")
    output = OutputFieldMappingEntry(name="dateTimes", target_name="RenovatedDate")
    s = EntityRecognitionSkill(name="merge-skill", inputs=[inp], outputs=[output])

    skillset = SearchIndexerSkillset(name="hotel-data-skill", skills=[s], description="skillset for hotels")
    result = client.create_skillset(skillset)
    return result

In [7]:
upload_blob_file('hotel_small.json')

In [None]:
#DO NOT RUN
#known issue with indexer creation
#https://github.com/Azure/azure-sdk-for-python/pull/33357
#https://github.com/Azure/azure-sdk-for-python/issues/33382

"""
# Now that we have a datasource and an index, we can create an indexer.

skillset_name = create_skillset().name
print("Skillset is created")

ds_name = create_datasource().name
print("Data source is created")

index_name = create_index().name
print("Index is created")

# we pass the data source, skillsets and targeted index to build an indexer
configuration = IndexingParametersConfiguration(parsing_mode="json")
parameters = IndexingParameters(configuration=configuration)
indexer = SearchIndexer(
    name="hotel-data-indexer",
    data_source_name=ds_name,
    target_index_name=index_name,
    skillset_name=skillset_name,
    parameters=parameters,
)

indexer_client = SearchIndexerClient(service_endpoint, AzureKeyCredential(key))
indexer_client.create_indexer(indexer)  # create the indexer

# to get an indexer
result = indexer_client.get_indexer("hotel-data-indexer")
print(result)

# To run an indexer, we can use run_indexer()
indexer_client.run_indexer(result.name)

# get the status of an indexer
indexer_client.get_indexer_status(result.name)
"""

In [37]:
#ALTERNATIVE without advanced indexer skills

# Upload some documents to the index

index_name = create_index().name
print("Index is created")

with open(os.path.join('.', 'hotel_small.json'), 'r') as file:  
    documents = json.load(file)  
search_client = SearchClient(service_endpoint, index_name, AzureKeyCredential(key))
result = search_client.upload_documents(documents)
print(f"Uploaded {len(documents)} documents")

Index is created
Uploaded 10 documents


Queries

In [52]:
# This is a query based on keywords search within the fields defined as searchable when creating the index
# The search results are returned with only the select fields content, presenting results as filtered by rating  >3 and sorted.
# Filterable and sortable fields are also definied when creating the index

results = search_client.search(search_text="swimming pool", select='hotelId,hotelName,rating,tags', filter='rating gt 3', order_by='rating desc')

for result in results:
    print("{}: {} - {} rating".format(result["hotelId"], result["hotelName"], result["rating"], result["tags"]))

1: Fancy Stay - 5 rating
9: Secret Point Motel - 4 rating


In [46]:
# Search by document key, all fields content are returned (no select)

result = search_client.get_document(key="9")

print("Name: {}".format(result["hotelName"]))
print("Rating: {}".format(result["rating"]))
print("Category: {}".format(result["category"]))
print("Tags: {}".format(result["tags"]))
print("LastRenovationDate: {}".format(result["lastRenovationDate"]))
print("Description: {}".format(result["description"]))

Name: Secret Point Motel
Rating: 4
Category: Boutique
Tags: ['pool', 'air conditioning', 'concierge']
LastRenovationDate: 1970-01-18T05:00:00Z
Description: The hotel is ideally located on the main commercial artery of the city in the heart of New York. A few minutes away is Time's Square and the historic centre of the city, as well as other places of interest that make New York one of America's most attractive and cosmopolitan cities.


In [42]:
# Global search leveraging the facetable fields

results =  search_client.search(search_text="*", facets=["category"])

facets = results.get_facets()

for facet in facets["category"]:
    print("{}".format(facet))

{'value': 'Budget', 'count': 5}
{'value': 'Boutique', 'count': 1}
{'value': 'Luxury', 'count': 1}
