### Querying and Retrieval


In [None]:
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents import SearchClient
from openai import AzureOpenAI

import os
from dotenv import load_dotenv


In [None]:
load_dotenv(override=True)

# Azure Search service details
service_name = os.getenv("AZURE_SEARCH_SERVICE_NAME")
admin_key = os.getenv("AZURE_SEARCH_SERVICE_KEY")
index_name = os.getenv("AZURE_SEARCH_INDEX_NAME")

# Initialize the search index client
endpoint = f"https://{service_name}.search.windows.net/"
credential = AzureKeyCredential(admin_key)
index_client = SearchIndexClient(endpoint=endpoint, credential=credential)
search_client = SearchClient(endpoint=endpoint, index_name=index_name, credential=credential)

print(f"endpoint: {endpoint}")
print(f"index_name: {index_name}")

In [None]:
from openai import AzureOpenAI

openai_client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_EMBEDDING_API_KEY"),
    api_version=os.getenv("AZURE_OPENAI_EMBEDDING_VERSION"),
    azure_endpoint=os.getenv("AZURE_OPENAI_EMBEDDING_ENDPOINT")
)


def embed_query(query: str) -> list:
    response = openai_client.embeddings.create(
        input=query,
        model=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME"),
    )
    return response.data[0].embedding

In [None]:
from azure.search.documents.models import VectorizedQuery
from azure.search.documents.models import QueryType, QueryCaptionType, QueryAnswerType


In [None]:
def full_text_search(query, top=5):
    """Perform a full-text search."""
    results = search_client.search(search_text=query, top=top)

    return list(results)


def vector_search(query):
    embedded_query = embed_query(query)
    vector_query = VectorizedQuery(
        vector=embedded_query, k_nearest_neighbors=3, fields="embedding")

    results = search_client.search(
        search_text=None,
        vector_queries=[vector_query],
        select=["hotel_name", "review_text", "review_title", "city"],
    )

    return list(results)


def hybrid_search(query, top=5):
    """Perform a hybrid search (combination of full-text and vector search)."""
    embedded_query = embed_query(query)
    vector_query = VectorizedQuery(
        vector=embedded_query, k_nearest_neighbors=3, fields="embedding")
    results = search_client.search(
        search_text=query,
        vector_queries=[vector_query],
        select=["id", "review_text", "review_title", "hotel_name", "city"],
        top=top,
    )

    return list(results)


def semantic_search(query, top=5):
    """Perform a semantic search using the vector search method."""
    embedded_query = embed_query(query)
    vector_query = VectorizedQuery(
        vector=embedded_query, k_nearest_neighbors=3, fields="embedding"
    )
    results = search_client.search(
        search_text=query,
        vector_queries=[vector_query],
        select=[
            "id",
            "review_text",
            "review_title",
            "hotel_name",
            "city",
            "hotel_state",
        ],
        query_type=QueryType.SEMANTIC,
        semantic_configuration_name="ps-hotels-semantic-config",
        query_caption=QueryCaptionType.EXTRACTIVE,
        query_answer=QueryAnswerType.EXTRACTIVE,
        top=top,
    )
    return results

In [None]:
def print_semantic_results(results):
    for result in results:
        print(f"Hotel Name: {result['hotel_name']}")
        print(f"Reranker Score: {result['@search.reranker_score']}")
        print(f"Review Title: {result['review_title']}")
        print(f"Review: {result['review_text']}")

        captions = result["@search.captions"]
        if captions:
            caption = captions[0]
            if caption.highlights:
                print(f"Caption: {caption.highlights}")
            else:
                print(f"Caption: {caption.text}")
        print("\n")


def print_results(results):
    for result in results:
        print(f"Hotel Name: {result['hotel_name']}")
        print(f"City: {result['city']}")
        print(f"Score: {result['@search.score']}")
        print(f"Review Title: {result['review_title']}")
        print(f"Review: {result['review_text']}")
        print("\n")

In [None]:
query = "walkable"
results = semantic_search(query)

print_semantic_results(results)


In [None]:
def transform_search_results_to_string(results):
    """Transform search results into a string format."""
    result_strings = []

    for result in results:
        result_string = (
            f"Hotel Name: {result['hotel_name']}\n"
            f"City: {result['city']}\n"
            f"Review Title: {result['review_title']}\n"
            f"Review Text: {result['review_text']}\n"
            f"Score: {result['@search.score']}\n"
            f"Reranker Score: {result.get('@search.reranker_score', 'N/A')}\n"
        )
        result_strings.append(result_string)

        print(f"Hotel Name: {result['hotel_name']}")
    return "\n".join(result_strings)

In [None]:
from openai import AzureOpenAI

client = AzureOpenAI(
  api_key = os.getenv("AZURE_OPENAI_API_KEY"),  
  api_version = os.getenv("AZURE_OPENAI_API_VERSION", "2025-01-01-preview"),
  azure_endpoint =os.getenv("AZURE_OPENAI_ENDPOINT"),
  azure_deployment=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME", "gpt-4.1")
)

SYSTEM_MESSAGE = "You are a helpful assistant."

query = "walkable"
results = semantic_search(query)

USER_QUERY = query
SEARCH_RESULTS = transform_search_results_to_string(results)

print(SEARCH_RESULTS)

In [None]:
import json

USER_MESSAGE = f"""
  You are provided a user query, and the search results based on user query. You task is to summarize the results and put the best order for the results. 

USER_QUERY
```
{USER_QUERY}
```

SEARCH_RESULTS
```
{SEARCH_RESULTS}
```

Return a valid JSON with the following information

RESULT_SCHEMA
```
{{
  'summary': '',
  'results': []
}}
```
"""

response = client.chat.completions.create(
    model=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME", "gpt-4.1"),
    messages=[
        {"role": "system", "content": SYSTEM_MESSAGE},
        {"role": "user", "content": USER_MESSAGE},
    ],
    response_format={"type": "json_object"},
)

response = json.loads(response.choices[0].message.content)

print(json.dumps(response, indent=2))

In [None]:
# Run a term query
results = search_client.search(query_type='simple',
                               search_text="wifi",
                               select='hotel_name,review_title,review_text,city,hotel_state',
                               include_total_count=True)

print('Total Documents Matching Query:', results.get_count())

for result in results:
    print(result["@search.score"])
    print(f"Hotel Name: {result['hotel_name']}")
    print(f"Review Title: {result['review_title']}")
    print(f"Review Text: {result['review_text']}")
    print(f"Hotel City: {result['city']}")
    print(f"Hotel State: {result['hotel_state']}")

In [None]:
# Add a filter
results = search_client.search(
    search_text="hotels",
    select='hotel_name,review_title,review_text,city,hotel_state,dateAdded',
    filter="hotel_state eq 'CA'",
    order_by='dateAdded desc')

for result in results:
    print(f"Hotel Name: {result['hotel_name']}")
    print(f"City: {result['city']}")
    print(f"Review Title: {result['review_title']}")
    print(f"Review Text: {result['review_text']}")
    print(f"Hotel State: {result['hotel_state']}")
    print(f"Date Added: {result['dateAdded']}")
    print("\n")

In [None]:
# Look up a specific document by ID
result = search_client.get_document(key="14e42e6a-d6ba-475f-86a5-13e1c82bed94")

print("Details for hotel '14e42e6a-d6ba-475f-86a5-13e1c82bed94' are:")
print("Name: {}".format(result["hotel_name"]))
print("Review Title: {}".format(result["review_title"]))
print("Review Text: {}".format(result["review_text"]))

In [None]:
# Autocomplete a query
search_suggestion = 'I'
results = search_client.autocomplete(
    search_text=search_suggestion, 
    suggester_name="sg",
    mode='twoTerms')

print("Autocomplete for:", search_suggestion)
for result in results:
    print (result['text'])