# Evaluating Search Relevance with Azure Al Search
Source: [step by step guide to measuring azure ai search relevance](https://farzzy.hashnode.dev/step-by-step-guide-to-measuring-azure-ai-search-relevance-the-hello-world-of-information-retrieval)

In [1]:
# requirements for running the evaluations
# httpx==0.27.2 is needed to avoid an inconsistencies in the openai interface
%pip install azure-identity==1.23.0 azure-search-documents==11.5.2 openai==1.43.1 ranx==0.3.20 dotenv tenacity pandas httpx==0.27.2 voyageai==0.2.4

^C
Note: you may need to restart the kernel to use updated packages.




# Step 1: Environment and Resources configuration

This step is composed of many substep:

1. Load environment variables

2. Open Al embeddings configuration

3. Azure Al Search confguration

4. Load data and configure dataset

## Step 1.1: Load environment variables

In [1]:
import os

from dotenv import load_dotenv

load_dotenv() # take environment variables from .env file
load_dotenv('.env.iva') # take environment variables from .env.iva file

True

## Step 1.2: Open Al embeddings configuration

In [2]:
from abc import ABC, abstractmethod  
from typing import List   
  
class BaseEmbeddingsClient(ABC):  
    """  
    Abstract base class for an Embeddings Client.  
    Child classes must implement the generate_embeddings method.  
    """  
  
    def __init__(self, embeddings_config):  
        self.embeddings_config = embeddings_config  
        self.model = self.initialize_model()  
  
    @abstractmethod  
    def initialize_model(self):  
        """  
        Abstract method to initialize and return the model instance.  
        Needs to be implemented by child classes.  
        """  
        pass  
    
    @abstractmethod 
    async def generate_embeddings(self, model_inputs: List[str]):  
        """  
        Abstract method to generate embeddings.   
        Needs to be implemented by child classes.  
        """  
        raise NotImplementedError("This method should be implemented by subclasses.")
        

    async def generate_embeddings_batches(self, model_inputs: List[str], batch_size: int = 20):  
        """  
        Abstract method to generate embeddings.   
        Needs to be implemented by child classes.  
        """  
        responses = []
        # Generate embeddings in batches
        batch_count = 0
        for i in range(0, len(model_inputs), batch_size):
            j = i + batch_size if i + batch_size <= len(model_inputs) else len(model_inputs)
            batch = model_inputs[i:j]
            print(f"[Embeddings] [{self.embeddings_config['name']}] Processing batch #{batch_count}, Batch: {1} -> {j}")
            try:
                batch_response = await self.generate_embeddings(
                    model_inputs=batch,
                )
                responses.extend(batch_response)
                batch_count += 1

            except Exception as e:
                print(f"[Embeddings][{self.embeddings_config['name']}] Error while computing embeddings: {e}. Retrying...")
                raise

        return responses

In [3]:
# Initialize OpenAI client
import voyageai
from openai import AsyncAzureOpenAI
from typing import List
from tenacity import retry, stop_after_attempt, wait_fixed



#async_credential = AsyncDefaultAzureCredential()
openai_api_key = os.getenv("AZURE_OPENAI_EMBEDDINGS_SERVICE_KEY")
voyage_api_key = os.getenv("VOYAGE_API_KEY")

openai_embeddings_ada2_config = {
    "name": "ada2",
    "service_name": os.getenv("AZURE_OPENAI_EMBEDDINGS_SERVICE_NAME"),
    "api_version": os.getenv("AZURE_OPENAI_EMBEDDINGS_ADA2_API_VERSION"),
    "deployment_model": os.getenv("AZURE_OPENAI_EMBEDDINGS_ADA2_DEPLOYMENT_MODEL"),
    "vector_dimensions": 1536
}

openai_embeddings_t3small_config = {
    "name": "t3small",
    "service_name": os.getenv("AZURE_OPENAI_EMBEDDINGS_SERVICE_NAME"), 
    "api_version": os.getenv("AZURE_OPENAI_EMBEDDINGS_T3SMALL_API_VERSION"),
    "deployment_model": os.getenv("AZURE_OPENAI_EMBEDDINGS_T3SMALL_DEPLOYMENT_MODEL"),
    "vector_dimensions": 1536
}

voyage_embeddings_law2_config = {
    "name": "voyage-law-2",
    "input_type": "document",
    "vector_dimensions": 1024
}

class AzureEmbeddingsClient(BaseEmbeddingsClient):

    def initialize_model(self):  
        """  
        Initialize and return the Azure-specific AsyncAzureOpenAI model.  
        """  
        return AsyncAzureOpenAI(  
            api_version=self.embeddings_config["api_version"],
            api_key=openai_api_key,
            azure_endpoint=f'https://{self.embeddings_config["service_name"]}.openai.azure.com',  
            max_retries=2,  
        ) 
    
    @retry(
        stop_after_attempt(15), #Retry up to X times
        wait_fixed(10), # Wait X seconds between retries
    )
    async def generate_embeddings(self, model_inputs: List[str]):
        response = await self.model.embeddings.create(
            model=self.embeddings_config["deployment_model"], 
            input=model_inputs,
        )
        return [i.embedding for i in response.data]

class VoyageEmbeddingsClient(BaseEmbeddingsClient):
    def initialize_model(self):  
        """  
        Initialize and return the SyncVoyage model.  
        """  
        return voyageai.Client()
    
    @retry(
        stop_after_attempt(15), #Retry up to X times
        wait_fixed(10), # Wait X seconds between retries
    )
    async def generate_embeddings(self, model_inputs: List[str]):
        return self.model.embed(model_inputs, model=self.embeddings_config["name"], input_type=self.embeddings_config["input_type"]).embeddings
    
class EmbeddingClientFactory:
    
    @staticmethod
    def create(embeddings_config) -> BaseEmbeddingsClient:
        if "input_type" in embeddings_config:
            embeddings_client = VoyageEmbeddingsClient(embeddings_config)
        else:
            embeddings_client = AzureEmbeddingsClient(embeddings_config)
        return embeddings_client

## Step 1.3: Azure Al Search configuration

In [4]:
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient

sync_credential = AzureKeyCredential(os.getenv("SEARCH_SERVICE_KEY"))

azure_search_config = {
    "service_name": os.getenv("SEARCH_SERVICE_NAME"),
    "index_name": os.getenv("SEARCH_INDEX_NAME"),
    "api_version": os.getenv("SEARCH_API_VERSION"),
    "service_endpoint": f'https://{os.getenv("SEARCH_SERVICE_NAME")}.search.windows.net'
}

## Step 1.4: Load data and configure dataset

In [5]:
import pandas as pd

docs_number = 100000000 # 3200

# Load dataset queries and groundtruth
folder = "legal-docs"
documents_df = pd.read_csv(f"dataset/{folder}/document.csv", sep="\t", index_col="id", keep_default_na=False)
queries_df = pd.read_csv(f"dataset/{folder}/query.csv", sep="\t", index_col="query_id")
labels_df = pd.read_csv(f"dataset/{folder}/label.csv", sep="\t")

# Map ground truth labels to scores
relevancy_scores = {"Relevant": 10, "Irrelevant": 0}
labels_df["score"] = labels_df["label"].map(relevancy_scores)

# Ensure query id and doc_id columns are of type string (object)
labels_df["query_id"] = labels_df["query_id"].astype(str)
labels_df["doc_id"] = labels_df["doc_id"].astype(str)

# Filter by the document number
contents = documents_df["content"].tolist()[:docs_number]
filtered_documents_df = documents_df[:docs_number]
filtered_labels_df = labels_df.loc[labels_df['doc_id'].isin(filtered_documents_df['doc_id'])]
filtered_queries_df = queries_df[queries_df.index.isin(filtered_labels_df['query_id'].astype(int))]

print(f"Number of documents: {len(filtered_documents_df)}")
print(f"Number of queries: {len(filtered_queries_df)}")
print(f"Number of labels: {len(filtered_labels_df)}")

Number of documents: 6516
Number of queries: 2578
Number of labels: 17046


In [6]:
filtered_documents_df.head()

Unnamed: 0_level_0,chunk_id,content,doc_id,category
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
eurlex_0__0,0,Name: Decision (EU) 2019/276 of the European P...,eurlex_0,eurlex
eurlex_0__1,1,(6) In order to allow for the quick use of the...,eurlex_0,eurlex
eurlex_10__0,0,Name: Decision (EU) 2018/515 of the European P...,eurlex_10,eurlex
eurlex_10__1,1,For the general budget of the Union for the fi...,eurlex_10,eurlex
eurlex_11__0,0,Name: Decision (EU) 2018/508 of the European P...,eurlex_11,eurlex


In [7]:
filtered_queries_df.head()

Unnamed: 0_level_0,query
query_id,Unnamed: 1_level_1
0,what is the purpose of the european parliament...
1,what is the daily rate for a consultant
2,when does per unit retains reclassify
3,when does the decision to use the flexibility ...
4,what is required to receive cops award


In [8]:
filtered_labels_df.head()

Unnamed: 0,query_id,id,doc_id,chunk_id,label,score
0,0,eurlex_0__0,eurlex_0,0,Relevant,10
1,1,eurlex_0__0,eurlex_0,0,Irrelevant,0
2,2,eurlex_0__0,eurlex_0,0,Irrelevant,0
3,3,eurlex_0__1,eurlex_0,1,Relevant,10
4,4,eurlex_0__1,eurlex_0,1,Irrelevant,0


# Step 2: Prepare the code to run the evaluation

This is composed of many substeps:

1. Generate embeddings

2. Create/update a search index and upload data

3. Set-up code for searching

4. Gather search data (score)

5. Set-up evaluation tool (ranx)

## Step 2.1: Generate embeddings

In [9]:
async def generate_embeddings(embeddings_client, contents):
    content_embeddings = await embeddings_client.generate_embeddings_batches(contents)
    return content_embeddings

## Step 2.2: Create/update a search index and upload data

In [10]:
from azure.search.documents.indexes.models import (
    HnswAlgorithmConfiguration,
    HnswParameters,
    SearchField,
    SearchFieldDataType,
    SearchIndex,
    SimpleField,
    VectorSearch,
    VectorSearchAlgorithmKind,
    VectorSearchAlgorithmMetric,
    VectorSearchProfile
)

def create_or_update_index(
    azure_search_config, index_name, vector_field_type, vector_dimensions
):
    search_index_client = SearchIndexClient(endpoint=azure_search_config["service_endpoint"], credential=sync_credential)
    # Define the search index fields based on your product schema
    fields = [
        SimpleField(name="id", type=SearchFieldDataType.String, key=True), 
        SimpleField(name="doc_id", type=SearchFieldDataType.String, searchable=True, filterable=True),
        SearchField(name="category", type=SearchFieldDataType.String, searchable=True, filterable=True),
        SearchField(name="content", type=SearchFieldDataType.String, searchable=True),
        SearchField(name="content_vector", type=vector_field_type, vector_search_dimensions=vector_dimensions, vector_search_profile_name="my-vector-config"),
    ]
    
    # Vector search configuration with HNSW algorithm and query vectorizer
    vector_search = VectorSearch(
        profiles=[
            VectorSearchProfile( 
                name="my-vector-config", 
                algorithm_configuration_name="my-hnsw", 
            )
        ],
        algorithms=[
            HnswAlgorithmConfiguration(
                name="my-hnsw", 
                kind=VectorSearchAlgorithmKind.HNSW, 
                parameters=HnswParameters(metric=VectorSearchAlgorithmMetric.COSINE),
            )
        ]
    )

    index= SearchIndex(name=index_name, fields=fields, vector_search=vector_search)
    search_index_client.create_or_update_index(index=index)
    print(f"[SearchIndexClient][{index_name}] Created or updated index.")

In [11]:
from azure.search.documents import SearchIndexingBufferedSender

def upload_embeddings_to_index(service_endpoint, index_name, documents_df, content_embeddings, batch_size=100): 
    documents = []
    # Prepare documents with embeddings
    for i, content_embedding in enumerate(content_embeddings):
        document = {
            "id": str(documents_df.index[i]),
            "doc_id": str(documents_df.iloc[i]["doc_id"]),
            "category": documents_df.iloc[i]["category"],
            "content": documents_df.iloc[i]["content"], 
            "content_vector": content_embedding,
        } 
        documents.append(document)

        # Initialize SearchIndexingBufferedSender for batch uploads 
    with SearchIndexingBufferedSender(
        endpoint=service_endpoint,
        index_name=index_name,
        credential=sync_credential,
        auto_flush_interval=60,  # Automatically flush every 60 seconds
        initial_batch_action_count=batch_size # Batch size for actions
    ) as batch_client:
        # Upload documents in batches
        for doc_batch in [documents[i:i + batch_size] for i in range(0, len(documents), batch_size)]:
            batch_client.upload_documents(documents=doc_batch)

        print(f"[SearchIndexClient][{index_name}] Uploaded {len(documents)} documents using buffered sender.")

    #Ensure all documents are flushed
    batch_client.flush()

## Step 2.3: Set-up code for searching

In [12]:
async def search(search_client, query_embeddings, vector_fields: str, top: int): 

    vector_query = {
        "kind": "vector", 
        "vector": query_embeddings, 
        "fields": vector_fields,
        "k": top, 
    }
    response = search_client.search(search_text=None, vector_queries=[vector_query], top=top)

    return response

## Step 2.4: Gather search data (score)

In [None]:
from collections import defaultdict

async def gather_search_data(search_client, embeddings_queries, field, top): 
    run_dict = defaultdict(dict)

    for index, query in embeddings_queries.items():
        query_text = query["text"]
        query_embeddings = query["embeddings"]
        #print(f"[SearchClient][{search_client._index_name}] Searching query {index}. Query: {query_text}")

        # Perform vector search using the Azure AI Search client
        results = await search(search_client, query_embeddings, vector_fields=field, top=top)
        

        query_id = f"{index}"  # Ensure query id matches what's in qrels
        # Use the actual product id from the search results instead of generating a 'doc 
        for i, result in enumerate(results): 
            #print(f"[SearchClient][{search_client._index_name}] - Searching query {index}. Result {i}: {result}")
            doc_id = result['doc_id'] 
            score = result['@search.score']

            # Populate the run dict using product id and score
            run_dict[str(query_id)][str(doc_id)] = score
        
        print(f"[SearchClient][{search_client._index_name}] run_dict[{query_id}]: {run_dict[query_id]} -  Query: {query_text}")
    
    return run_dict

## Step 3: Execute the evaluation process

In [23]:
async def populate_index(azure_search_config, embeddings_config, documents_df, data):
    # create search index
    index_name = f'{azure_search_config["index_name"]}-{embeddings_config["name"]}'

    create_or_update_index(
        azure_search_config,
        index_name=index_name,
        vector_field_type="Collection(Edm.Single)",  # OpenAI embedding storage format dim: 1536
        vector_dimensions=embeddings_config["vector_dimensions"]
    )
    
    # Generate the embeddings
    embeddings_client = EmbeddingClientFactory.create(embeddings_config)
 
    content_embeddings = await generate_embeddings(
        embeddings_client,
        data["contents"]
    )

    # Upload embeddings to respective indexes
    upload_embeddings_to_index(
        azure_search_config["service_endpoint"],
        index_name,
        documents_df,
        content_embeddings
    )

async def get_queries_embeddings(embeddings_config, queries_df):
    """ 
    Generate embeddings for the queries in the dataset.
    Returns a dictionary with query ids as keys and their embeddings and text as values.
    """
    embeddings_client = EmbeddingClientFactory.create(embeddings_config)
    
    queries_embeddings = await embeddings_client.generate_embeddings_batches(
        model_inputs=queries_df["query"].tolist()
    )
    
    return {
        str(index): {"text": query, "embeddings": embedding} 
        for index, (query, embedding) in enumerate(zip(queries_df["query"].tolist(), queries_embeddings))
    }

async def evaluation_process(azure_search_config, embeddings_queries, model_name, k=3):
    # create search index
    index_name = f'{azure_search_config["index_name"]}-{model_name}'
    # Perform search
    search_client = SearchClient(
        endpoint=azure_search_config["service_endpoint"],
        index_name=index_name,
        credential=sync_credential, 
        api_version=azure_search_config["api_version"]
    )
    

    content_run_dict = await gather_search_data(search_client, embeddings_queries, "content_vector", top=k)
    
    return content_run_dict

# Step 4: Compare the results

In [18]:
from ranx import compare


def compare_runs(qrels, *runs, result_folder, k=3):
    # Compare search relevance metrics across different models
    
    report = compare(
        qrels=qrels,
        runs=[
            *runs
        ],
        metrics=[
            f"precision@{k}", 
            f"recall@{k}", 
            f"mrr@{k}", 
            f"dcg@{k}", 
            f"ndcg@{k}"
        ],
        make_comparable=True # Ensure that qrels and runs have matching query IDs
    )

    # Convert the report to a DataFrame and display it
    results_df = report.to_dataframe()

    # Optionally, export results to a CSV
    results_df.to_csv(f"results/{result_folder}/comparison_results_k{k}.csv", index=False)
    return results_df


In [None]:
data = {
    "contents": contents
}
await populate_index(azure_search_config, openai_embeddings_ada2_config, filtered_documents_df, data)
await populate_index(azure_search_config, openai_embeddings_t3small_config, filtered_documents_df, data)
await populate_index(azure_search_config, voyage_embeddings_law2_config, filtered_documents_df, data)

In [None]:
import json 
query_embeddings_folder = "resources/data/embeddings"
os.makedirs(query_embeddings_folder, exist_ok=True)

queries_embeddings_ada2 = await get_queries_embeddings(openai_embeddings_ada2_config, filtered_queries_df)
with open(f"{query_embeddings_folder}/queries_embeddings_ada2.json", "w") as file:
    json.dump(queries_embeddings_ada2, file, indent=2)
    
queries_embeddings_t3small = await get_queries_embeddings(openai_embeddings_t3small_config, filtered_queries_df)
with open(f"{query_embeddings_folder}/queries_embeddings_t3small.json", "w") as file:
    json.dump(queries_embeddings_t3small, file, indent=2)
    
queries_embeddings_voyage_law2 = await get_queries_embeddings(voyage_embeddings_law2_config, filtered_queries_df)
with open(f"{query_embeddings_folder}/queries_embeddings_voyage_law2.json", "w") as file:
    json.dump(queries_embeddings_voyage_law2, file, indent=2)

In [20]:
# Read saved embeddings
import json

query_embeddings_folder = "resources/data/embeddings"
with open(f"{query_embeddings_folder}/queries_embeddings_ada2.json", "r") as file:
    queries_embeddings_ada2 = json.load(file)
with open(f"{query_embeddings_folder}/queries_embeddings_t3small.json", "r") as file:
    queries_embeddings_t3small = json.load(file)
with open(f"{query_embeddings_folder}/queries_embeddings_voyage_law2.json", "r") as file:
    queries_embeddings_voyage_law2 = json.load(file)

In [None]:
results_dfs = {}
compare_dfs = {}

In [30]:
k_list = [5, 10]


for k in k_list:    # k being the number of top results to retrieve
    ada2_results = await evaluation_process(azure_search_config, queries_embeddings_ada2, k=k, model_name=openai_embeddings_ada2_config["name"])
    t3small_results = await evaluation_process(azure_search_config, queries_embeddings_t3small, k=k, model_name=openai_embeddings_t3small_config["name"])
    voyage_law2_results = await evaluation_process(azure_search_config, queries_embeddings_voyage_law2, k=k, model_name=voyage_embeddings_law2_config["name"])
    
    results_dfs[k] = {
        "ada2": ada2_results,
        "t3small": t3small_results,
        "voyage_law2": voyage_law2_results
    }


[SearchClient][ranx-index-voyage-law-2] run_dict[1065]: {'guidance_14': 0.664356, 'guidance_91': 0.65949273, 'guidance_92': 0.65026057, 'guidance_76': 0.6536264, 'guidance_69': 0.6432963, 'guidance_93': 0.6385716} -  Query: cops pass program -
[SearchClient][ranx-index-voyage-law-2] run_dict[1066]: {'guidance_92': 0.7176313, 'guidance_55': 0.71594787, 'guidance_12': 0.7151545, 'guidance_30': 0.7147944, 'guidance_36': 0.7137608, 'guidance_38': 0.7133745, 'guidance_29': 0.7127414, 'guidance_14': 0.71247613, 'guidance_74': 0.7116696, 'guidance_40': 0.7081364} -  Query: why should a community policing officer be training -
[SearchClient][ranx-index-voyage-law-2] run_dict[1067]: {'guidance_69': 0.6573737, 'legal-advice_0': 0.59021974, 'guidance_0': 0.59481347, 'memos_42': 0.59851015, 'legal-advice_28': 0.59256756, 'guidance_91': 0.59058} -  Query: who can represent an employer -
[SearchClient][ranx-index-voyage-law-2] run_dict[1068]: {'guidance_92': 0.65318006, 'guidance_39': 0.66414833, 'g

3

In [32]:
with open(f"results/result_k.json", "w") as file:
    json.dump(results_dfs, file, indent=2)

In [34]:
from ranx import Qrels, Run

# Create qrels from labels after converting dtypes
qrels = Qrels.from_df(filtered_labels_df, q_id_col="query_id", doc_id_col="doc_id", score_col="score")

compare_dfs = {}
k_list = [3,5, 10]
for k in k_list: 
    # create runs for ranx
    ada2_run = Run(results_dfs[k]["ada2"] , name=f"ada2_content")
    t3small_run = Run(results_dfs[k]["t3small"] , name=f"t3small_content")
    voyage_law2_run = Run(results_dfs[k]["voyage_law2"] , name=f"voyage_law2_content")
    compare_dfs[k] = compare_runs(qrels, 
                                ada2_run, 
                                t3small_run, 
                                voyage_law2_run, 
                                result_folder=folder, k=k)

In [35]:
compare_dfs[3]

Unnamed: 0,model_names,precision@3,recall@3,mrr@3,dcg@3,ndcg@3
0,ada2_content,0.208042,0.613394,0.528769,5.548893,0.549249
1,t3small_content,0.181277,0.537704,0.463926,4.853129,0.481812
2,voyage_law2_content,0.176881,0.52385,0.430631,4.570788,0.453663


# 5. Comparison

In [38]:
results_df = pd.read_csv(f"results/{folder}/comparison_results_k3.csv")
results_df

Unnamed: 0,model_names,precision@3,recall@3,mrr@3,dcg@3,ndcg@3
0,ada2_content,0.208042,0.613394,0.528769,5.548893,0.549249
1,t3small_content,0.181277,0.537704,0.463926,4.853129,0.481812
2,voyage_law2_content,0.176881,0.52385,0.430631,4.570788,0.453663


In [36]:
results_df = pd.read_csv(f"results/{folder}/comparison_results_k5.csv")
results_df

Unnamed: 0,model_names,precision@5,recall@5,mrr@5,dcg@5,ndcg@5
0,ada2_content,0.142126,0.694844,0.526998,5.750876,0.567523
1,t3small_content,0.122576,0.603562,0.454157,4.951668,0.490773
2,voyage_law2_content,0.121334,0.595656,0.426881,4.733417,0.468361


In [37]:
results_df = pd.read_csv(f"results/{folder}/comparison_results_k10.csv")
results_df

Unnamed: 0,model_names,precision@10,recall@10,mrr@10,dcg@10,ndcg@10
0,ada2_content,0.081963,0.794785,0.478041,5.632844,0.552788
1,t3small_content,0.072033,0.702986,0.432084,5.032378,0.496447
2,voyage_law2_content,0.072033,0.700126,0.397977,4.779659,0.470095
