# Evaluating Search Relevance with Azure Al Search
Source: [step by step guide to measuring azure ai search relevance](farzzy.hashnode.dev/step-by-step-guide-to-measuring-azure-ai-search-relevance-the-hello-world-of-information-retrieval)

Dataset:
 - Guidance [train.doj_guidance.jsonl.xz](https://huggingface.co/datasets/pile-of-law/pile-of-law/resolve/main/data/train.doj_guidance.jsonl.xz?download=true)
 - Eurlex [train.eurlex.jsonl.xz](https://huggingface.co/datasets/pile-of-law/pile-of-law/resolve/main/data/train.eurlex.jsonl.xz?download=true)
 - Memos [train.irs_legal_advice_memos.jsonl.xz](https://huggingface.co/datasets/pile-of-law/pile-of-law/resolve/main/data/train.irs_legal_advice_memos.jsonl.xz?download=true)
 - Memos [train.olc_memos.jsonl.xz](https://huggingface.co/datasets/pile-of-law/pile-of-law/resolve/main/data/train.olc_memos.jsonl.xz?download=true)

In [103]:
# httpx==0.27.2 is needed to avoid an inconsistencies in the openai interface
%pip install azure-identity==1.23.0 azure-search-documents==11.5.2 openai==1.43.1 ranx==0.3.20  dotenv tenacity pandas httpx==0.27.2 


Note: you may need to restart the kernel to use updated packages.


# Step 1: Environment and Resources configuration

This step is composed of many substep:

1. Load environment variables

2. Open Al embeddings configuration

3. Azure Al Search confguration

4. Load data and configure dataset

## Step 1.1: Load environment variables

In [2]:
import os

from dotenv import load_dotenv

load_dotenv() # take environment variables from .env file

True

## Step 1.2: Open Al embeddings configuration

In [3]:
# Initialize OpenAI client
from azure.identity.aio import DefaultAzureCredential as AsyncDefaultAzureCredential, get_bearer_token_provider
from azure.core.credentials import AzureKeyCredential
from openai import AsyncAzureOpenAI
from typing import List
from tenacity import retry, stop_after_attempt, wait_fixed


#async_credential = AsyncDefaultAzureCredential()
openai_api_key = os.getenv("AZURE_OPENAI_EMBEDDINGS_SERVICE_KEY")

openai_embeddings_ada2_config = {
    "name": "ada2",
    "service_name": os.getenv("AZURE_OPENAI_EMBEDDINGS_SERVICE_NAME"),
    "api_version": os.getenv("AZURE_OPENAI_EMBEDDINGS_ADA2_API_VERSION"),
    "deployment_model": os.getenv("AZURE_OPENAI_EMBEDDINGS_ADA2_DEPLOYMENT_MODEL"),
    "vector_dimensions": 1536
}

openai_embeddings_t3small_config = {
    "name": "t3small",
    "service_name": os.getenv("AZURE_OPENAI_EMBEDDINGS_SERVICE_NAME"), 
    "api_version": os.getenv("AZURE_OPENAI_EMBEDDINGS_T3SMALL_API_VERSION"),
    "deployment_model": os.getenv("AZURE_OPENAI_EMBEDDINGS_T3SMALL_DEPLOYMENT_MODEL"),
    "vector_dimensions": 1536
}

class EmbeddingsClient:

    def __init__(self, embeddings_config):
        self.embeddings_config = embeddings_config
        self.model = AsyncAzureOpenAI(
            api_version=self.embeddings_config["api_version"],
            #azure_ad_token_provider=get_bearer_token_provider(
            #    async_credential, "https://cognitiveservices.azure.com/.default"
            #),
            api_key=openai_api_key,
            azure_endpoint=f'https://{self.embeddings_config["service_name"]}.openai.azure.com',
            max_retries=2,
        )


    @retry(
        stop_after_attempt(15), #Retry up to X times
        wait_fixed(10), # Wait X seconds between retries
    )
    async def generate_embeddings(self, model_inputs: List[str], batch_size: int = 20):
        responses = []
        # Generate embeddings in batches
        batch_count = 0
        for i in range(0, len(model_inputs), batch_size):
            j = i + batch_size if i + batch_size <= len(model_inputs) else len(model_inputs)
            batch = model_inputs[i:j]
            #print(f"[Embeddings] [{self.embeddings_config['name']}] Processing batch #{batch_count), Batch: (1) -> {j}")
            try:
                response = await self.model.embeddings.create(
                    model=self.embeddings_config["deployment_model"], 
                    input=batch,
                )
                responses.extend(i.embedding for i in response.data)
                batch_count += 1

            except Exception as e:
                print(f"[Embeddings][{self.embeddings_config['name']}] Error while computing embeddings: {e}. Retrying...")
                raise

        return responses

## Step 1.3: Azure Al Search configuration

In [4]:
from azure.identity import DefaultAzureCredential
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient

# sync_credential = DefaultAzureCredential()
sync_credential = AzureKeyCredential(os.getenv("SEARCH_SERVICE_KEY"))

azure_search_config = {
    "service_name": os.getenv("SEARCH_SERVICE_NAME"),
    "index_name": os.getenv("SEARCH_INDEX_NAME"),
    "api_version": os.getenv("SEARCH_API_VERSION"),
    "service_endpoint": f'https://{os.getenv("SEARCH_SERVICE_NAME")}.search.windows.net'
}

## Step 1.4: Load data and configure dataset

In [6]:
import pandas as pd

# Load dataset
products_number = 10
products_df = pd.read_csv("dataset/products/product.csv", sep="\t", index_col="product_id", keep_default_na=False)
products_df.loc[products_df["product_description"] == "", "product_description"] = products_df["product_name"]
product_names = products_df["product_name"].tolist()[:products_number]
product_descriptions = products_df["product_description"].tolist()[:products_number]

# Load queries and groundtruth
queries_df = pd.read_csv("dataset/products/query.csv", sep="\t", index_col="query_id")
labels_df = pd.read_csv("dataset/products/label.csv", sep="\t")

# Map ground truth labels to scores
relevancy_scores = {"Exact": 10, "Partial": 5, "Irrelevant": 0}
labels_df["score"] = labels_df["label"].map(relevancy_scores)

# Ensure query id and product_id columns are of type string (object)
labels_df["query_id"] = labels_df["query_id"].astype(str)
labels_df["product_id"] = labels_df["product_id"].astype(str)

# Filter by the products number
filtered_labels_df = labels_df.loc[labels_df['product_id'].astype(int) <= products_number]
filtered_queries_df = queries_df[queries_df.index.isin (filtered_labels_df['query_id'].astype(int))]

In [7]:
products_df.head()

Unnamed: 0_level_0,product_name,product_class,category hierarchy,product_description,product_features,rating_count,average_rating,review_count
product_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,solid wood platform bed,Beds,Furniture / Bedroom Furniture / Beds & Headboa...,"good , deep sleep can be quite difficult to ha...",overallwidth-sidetoside:64.7|dsprimaryproducts...,15.0,4.5,15.0
1,all-clad 7 qt . slow cooker,Slow Cookers,Kitchen & Tabletop / Small Kitchen Appliances ...,"create delicious slow-cooked meals , from tend...",capacityquarts:7|producttype : slow cooker|pro...,100.0,2.0,98.0
2,all-clad electrics 6.5 qt . slow cooker,Slow Cookers,Kitchen & Tabletop / Small Kitchen Appliances ...,prepare home-cooked meals on any schedule with...,features : keep warm setting|capacityquarts:6....,208.0,3.0,181.0
3,all-clad all professional tools pizza cutter,"Slicers, Peelers And Graters",Browse By Brand / All-Clad,this original stainless tool was designed to c...,overallwidth-sidetoside:3.5|warrantylength : l...,69.0,4.5,42.0
4,baldwin prestige alcott passage knob with roun...,Door Knobs,Home Improvement / Doors & Door Hardware / Doo...,the hardware has a rich heritage of delivering...,compatibledoorthickness:1.375 '' |countryofori...,70.0,5.0,42.0


In [8]:
filtered_queries_df.head()

Unnamed: 0_level_0,query,query_class
query_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,smart coffee table,Coffee & Cocktail Tables
9,coffee table fire pit,Outdoor Fireplaces
14,beds that have leds,Beds
18,chrome bathroom 4 light vanity light,Vanity Lighting
24,wood coffee table set by storage,Living Room Table Sets


In [17]:
labels_df

Unnamed: 0,id,query_id,product_id,label,score
0,0,0,25434,Exact,10
1,1,0,12088,Irrelevant,0
2,2,0,42931,Exact,10
3,3,0,2636,Exact,10
4,4,0,42923,Exact,10
...,...,...,...,...,...
233443,234010,478,15439,Partial,5
233444,234011,478,451,Partial,5
233445,234012,478,30764,Irrelevant,0
233446,234013,478,16796,Partial,5


In [9]:
filtered_labels_df.head()

Unnamed: 0,id,query_id,product_id,label,score
8012,8012,62,1,Exact,10
8013,8013,62,2,Partial,5
9635,9635,76,4,Irrelevant,0
9901,9901,78,7,Partial,5
9973,9973,78,5,Partial,5


# Step 2: Prepare the code to run the evaluation

This is composed of many substeps:

1. Generate embeddings

2. Create/update a search index and upload data

3. Set-up code for searching

4. Gather search data (score)

5. Set-up evaluation tool (ranx)

## Step 2.1: Generate embeddings

In [19]:
async def generate_embeddings(embeddings_client, product_names, product_descriptions):
    product_name_embeddings = await embeddings_client.generate_embeddings(product_names)
    product_description_embeddings = await embeddings_client.generate_embeddings(product_descriptions)
    return product_name_embeddings, product_description_embeddings

## Step 2.2: Create/update a search index and upload data

In [20]:
from azure.search.documents.indexes.models import (
    HnswAlgorithmConfiguration,
    HnswParameters,
    SearchField,
    SearchFieldDataType,
    SearchIndex,
    SimpleField,
    VectorSearch,
    VectorSearchAlgorithmKind,
    VectorSearchAlgorithmMetric,
    VectorSearchProfile
)

def create_or_update_index(
    azure_search_config, index_name, vector_field_type, vector_dimensions
):
    search_index_client = SearchIndexClient(endpoint=azure_search_config["service_endpoint"], credential=sync_credential)
    # Define the search index fields based on your product schema
    fields = [
        SimpleField(name="product_id", type=SearchFieldDataType.String, key=True), 
        SearchField(name="product_name", type=SearchFieldDataType.String, searchable=True, filterable=True),
        SearchField(name="product_description", type=SearchFieldDataType.String, searchable=True),
        SearchField(name="product_name_vector", type=vector_field_type, vector_search_dimensions=vector_dimensions, vector_search_profile_name="my-vector-config"),
        SearchField(name="product_description_vector", type=vector_field_type, vector_search_dimensions=vector_dimensions, vector_search_profile_name="my-vector-config"),
    ]
    
    # Vector search configuration with HNSW algorithm and query vectorizer
    vector_search = VectorSearch(
        profiles=[
            VectorSearchProfile( 
                name="my-vector-config", 
                algorithm_configuration_name="my-hnsw", 
            )
        ],
        algorithms=[
            HnswAlgorithmConfiguration(
                name="my-hnsw", 
                kind=VectorSearchAlgorithmKind.HNSW, 
                parameters=HnswParameters(metric=VectorSearchAlgorithmMetric.COSINE),
            )
        ]
    )

    index= SearchIndex(name=index_name, fields=fields, vector_search=vector_search)
    search_index_client.create_or_update_index(index=index)
    print(f"[SearchIndexClient][{index_name}] Created or updated index.")

In [21]:
from azure.search.documents import SearchIndexingBufferedSender

def upload_embeddings_to_index(service_endpoint, index_name, embeddings_name, embeddings_description, batch_size=100): 
    documents = []
    # Prepare documents with embeddings
    for i, (name_embedding, desc_embedding) in enumerate(zip(embeddings_name, embeddings_description)):
        document = {
            "product_id": str(products_df.index[i]),
            "product_name": products_df["product_name"][i], 
            "product_description": products_df["product_description"][i], 
            "product_name_vector": name_embedding,
            "product_description_vector": desc_embedding,
        } 
        documents.append(document)

        # Initialize SearchIndexingBufferedSender for batch uploads 
    with SearchIndexingBufferedSender(
        endpoint=service_endpoint,
        index_name=index_name,
        credential=sync_credential,
        auto_flush_interval=60, # Automatically flush every 68 seconds
        initial_batch_action_count=batch_size # Batch size for actions
    ) as batch_client:
        # Upload documents in batches
        for doc_batch in [documents[i:i + batch_size] for i in range(0, len(documents), batch_size)]:
            batch_client.upload_documents(documents=doc_batch)

        print(f"[SearchIndexClient][{index_name}] Uploaded {len(documents)} documents using buffered sender.")

    #Ensure all documents are flushed
    batch_client.flush()

## Step 2.3: Set-up code for searching

In [22]:
async def search(search_client, embeddings_client, query_text: str, vector_fields: str, top: int): 
    query_vector = (await embeddings_client.generate_embeddings((query_text)))[0]

    vector_query = {
        "kind": "vector", 
        "vector": query_vector, 
        "fields": vector_fields,
        "k": top, 
    }
    response = search_client.search(search_text=None, vector_queries=[vector_query], top=top)

    return response

## Step 2.4: Gather search data (score)

In [23]:
from collections import defaultdict

async def gather_search_data(search_client, embeddings_client, queries_df, field, top): 
    run_dict = defaultdict(dict)

    for index, row in queries_df.iterrows():
        query_text = row["query"] 
        print(f"[SearchClient][{search_client._index_name}] Searching query {index}. Query: {query_text}")

        # Perform vector search using the Azure AI Search client
        results = await search(search_client, embeddings_client, query_text, vector_fields=field, top=top)
        

        query_id = f"{index}"  # Ensure query id matches what's in qrels
        count = 0
        # Use the actual product id from the search results instead of generating a 'doc 
        for result in results: 
            print(f"[SearchClient][{search_client._index_name}] - Searching query {index}. Result {count}: {result}")
            product_id = result['product_id'] 
            score = result['@search.score']

            # Populate the run dict using product id and score
            run_dict[query_id][str(product_id)] = score
            count += 0
        
        print(f"[SearchClient][{search_client._index_name}] - run_dict[{query_id}]: {run_dict[query_id]}")
    
    return run_dict

## Step 3: Execute the evaluation process

In [24]:
from ranx import Run

async def evaluation_process(azure_search_config, embeddings_config, queries_df, data, k=3):
    # create search index
    index_name = f'{azure_search_config["index_name"]}-{embeddings_config["name"]}'

    create_or_update_index(
        azure_search_config,
        index_name=index_name,
        vector_field_type="Collection(Edm.Single)", #  OpenAI embedding storage format dim: 1536
        vector_dimensions=embeddings_config["vector_dimensions"]
    )
    
    # Generate the embeddings
    embeddings_client = EmbeddingsClient(embeddings_config)
    product_name_embeddings, product_description_embeddings = await generate_embeddings(
        embeddings_client,
        data["product_names"],
        data["product_descriptions"]
    )

    # Upload embeddings to respective indexes
    upload_embeddings_to_index(
        azure_search_config["service_endpoint"],
        index_name,
        product_name_embeddings,
        product_description_embeddings
    )

    # Perform search
    search_client = SearchClient(
        endpoint=azure_search_config["service_endpoint"],
        index_name=index_name,
        credential=sync_credential, 
        api_version=azure_search_config["api_version"]
    )
    
    model_name = embeddings_config["name"]

    name_run_dict = await gather_search_data(search_client, embeddings_client, queries_df, "product_name_vector", top=k)
    description_run_dict = await gather_search_data(search_client, embeddings_client, queries_df, "product_description_vector", top=k)
    combined_run_dict = await gather_search_data(search_client, embeddings_client, queries_df, "product_name_vector, product_description_vector", top=k)

    # create runs for ranx
    name_run = Run(name_run_dict, name=f"{model_name}_product_name")
    description_run = Run(description_run_dict, name=f"{model_name}_product_description")
    combined_run = Run(combined_run_dict, name=f"{model_name}_combined")
    
    return {
        "dict": (name_run_dict, description_run_dict, combined_run_dict),
        "runs": (name_run, description_run, combined_run)
    }

# Step 4: Compare the results

In [25]:
from ranx import compare


def compare_runs(qrels, *runs, k=3):
    # Compare search relevance metrics across different models
    report = compare(
        qrels=qrels,
        runs=[
            *runs
        ],
        metrics=[
            f"precision@{k}", 
            f"recall@{k}", 
            f"mrr@{k}", 
            f"dcg@{k}", 
            f"ndcg@{k}"
        ],
        make_comparable=True # Ensure that qrels and runs have matching query IDs
    )

    # Convert the report to a DataFrame and display it
    results_df = report.to_dataframe()

    # Optionally, export results to a CSV
    results_df.to_csv(f"results/comparison_results_k{k}.csv", index=False)
    return results_df


In [26]:
from ranx import Qrels

# Create qrels from labels after converting dtypes
qrels = Qrels.from_df(filtered_labels_df, q_id_col="query_id", doc_id_col="product_id", score_col="score")

results_dfs = {}
for k in [3, 5, 10]:    # k being the number of top results to retrieve
    data = {
        "product_names": product_names,
        "product_descriptions": product_descriptions
    }

    ada2_results = await evaluation_process(azure_search_config, openai_embeddings_ada2_config, filtered_queries_df, data, k=k)
    t3small_results = await evaluation_process(azure_search_config, openai_embeddings_t3small_config, filtered_queries_df, data, k=k)
    
    # this saves the results to a csv file
    results_dfs[k] = compare_runs(qrels, *ada2_results["runs"], *t3small_results["runs"], k=k)

[SearchIndexClient][ranx-index-ada2] Created or updated index.
[SearchIndexClient][ranx-index-ada2] Uploaded 10 documents using buffered sender.
[SearchClient][ranx-index-ada2] Searching query 1. Query: smart coffee table
[SearchClient][ranx-index-ada2] - run_dict[1]: {}
[SearchClient][ranx-index-ada2] Searching query 9. Query: coffee table fire pit
[SearchClient][ranx-index-ada2] - Searching query 9. Result 0: {'product_name': 'solid wood platform bed', 'product_id': '0', 'product_description': 'good , deep sleep can be quite difficult to have in this busy age . fortunately , there ’ s an antidote to such a problem : a nice , quality bed frame like the acacia kaylin . solidly constructed from acacia wood , this bed frame will stand the test of time and is fit to rest your shoulders on for years and years . its sleek , natural wood grain appearance provides a pleasant aesthetic to adorn any bedroom , acting both as a decorative piece as well as a place to give comfort after a hard day 

# 5. Comparison

In [94]:
results_df = pd.read_csv("results/comparison_results_k3.csv")
results_df

Unnamed: 0,model_names,precision@3,recall@3,mrr@3,dcg@3,ndcg@3
0,ada2_product_name,0.255814,0.465116,0.430233,3.392833,0.438954
1,ada2_product_description,0.24031,0.418605,0.395349,3.160275,0.397763
2,ada2_combined,0.255814,0.465116,0.426357,3.392833,0.437776
3,t3small_product_name,0.255814,0.465116,0.422481,3.362385,0.432864
4,t3small_product_description,0.248062,0.44186,0.418605,3.31947,0.424281
5,t3small_combined,0.248062,0.44186,0.418605,3.334694,0.426148


In [93]:
results_df = pd.read_csv("results/comparison_results_k5.csv")
results_df

Unnamed: 0,model_names,precision@5,recall@5,mrr@5,dcg@5,ndcg@5
0,ada2_product_name,0.153488,0.465116,0.430233,3.392833,0.438954
1,ada2_product_description,0.153488,0.465116,0.406977,3.260433,0.417794
2,ada2_combined,0.153488,0.465116,0.426357,3.392833,0.437776
3,t3small_product_name,0.153488,0.465116,0.422481,3.362385,0.432864
4,t3small_product_description,0.153488,0.465116,0.424419,3.369548,0.434297
5,t3small_combined,0.153488,0.465116,0.426357,3.392833,0.437776


In [92]:
results_df = pd.read_csv("results/comparison_results_k10.csv")
results_df

Unnamed: 0,model_names,precision@10,recall@10,mrr@10,dcg@10,ndcg@10
0,ada2_product_name,0.076744,0.465116,0.430233,3.392833,0.438954
1,ada2_product_description,0.076744,0.465116,0.406977,3.260433,0.417794
2,ada2_combined,0.076744,0.465116,0.426357,3.392833,0.437776
3,t3small_product_name,0.076744,0.465116,0.422481,3.362385,0.432864
4,t3small_product_description,0.076744,0.465116,0.424419,3.369548,0.434297
5,t3small_combined,0.076744,0.465116,0.426357,3.392833,0.437776
