# Test searching, reranker and answer generation

This code demonstrate how to test the search with different options (uppercase/lowercase, combination of vector fields, number of documents retrieved in the search and number of documents used in the answer generation), re-rank the retrieve documents and generate and evaluate the answers creating excel files with the different combinations.

The tests are defined in the constact TESTS with the following fields:
+ test-name: it will be used as the Excel file name
+ embeddings_fields: list of vector fields to be used in the search
+ uppercase/lowercase: the query will be converted to uppercase or lowercase to execute the search
+ embbeding_model: ada or large-3
+ index_name: the name of the index created with the notebook 'create_index_and_index_documents.ipynb'
+ max_retrieve: maximum number of search results
+ max_generate: maximum number of documents (chunks) used to generate the answers

The output is the Excels files with the tests results.

## Prerequisites

+ An Azure subscription, with [access to Azure OpenAI](https://aka.ms/oai/access).
+ An Azure OpenAI service with the service name and an API key.
+ A deployment of the text-embedding-ada-002 embedding model on the Azure OpenAI Service with the deployment name 'ada'.
+ A deployment of the text-embedding-3-large embedding model on the Azure OpenAI Service with the deployment name 'ada'.
+ An Azure AI Search service with the end-point, API Key and the index name to create.

We used Python 3.12.3, [Visual Studio Code with the Python extension](https://code.visualstudio.com/docs/python/python-tutorial), and the [Jupyter extension](https://marketplace.visualstudio.com/items?itemName=ms-toolsai.jupyter) to test this example.

### Set up a Python virtual environment in Visual Studio Code

1. Open the Command Palette (Ctrl+Shift+P).
1. Search for **Python: Create Environment**.
1. Select **Venv**.
1. Select a Python interpreter. Choose 3.10 or later.

It can take a minute to set up. If you run into problems, see [Python environments in VS Code](https://code.visualstudio.com/docs/python/environments).

### Install packages

In [None]:
! pip install openai
! pip install azure-search-documents

## Import packages and create AOAI clients

In [1]:
import os
from dotenv import load_dotenv
from openai import AzureOpenAI
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
import sys
sys.path.append('..')
from pa_utils import call_aoai, semantic_hybrid_search_with_filter, get_filtered_chunks, generate_answer

# Load environment variables from .env
load_dotenv(override=True)

# AZURE AI SEARCH
ai_search_endpoint = os.environ["SEARCH_SERVICE_ENDPOINT"]
ai_search_apikey = os.environ["SEARCH_SERVICE_QUERY_KEY"]
ai_search_index_name = os.environ["SEARCH_INDEX_NAME"]
ai_search_credential = AzureKeyCredential(ai_search_apikey)

aoai_api_version = '2024-02-15-preview'

# AOAI FOR ANSWER GENERATION
aoai_answer_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
aoai_answer_apikey = os.environ["AZURE_OPENAI_API_KEY"]
aoai_answer_model_name = os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"]
# Create AOAI client for answer generation
aoai_answer_client = AzureOpenAI(
    azure_deployment=aoai_answer_model_name,
    api_version=aoai_api_version,
    azure_endpoint=aoai_answer_endpoint,
    api_key=aoai_answer_apikey
)

# AZURE OPENAI FOR RERANKING
aoai_rerank_endpoint = os.environ["AZURE_OPENAI_RERANK_ENDPOINT"]
azure_openai_rerank_key = os.environ["AZURE_OPENAI_RERANK_API_KEY"]
rerank_model_name = os.environ["AZURE_OPENAI_RERANK_DEPLOYMENT_NAME"]
# Create AOAI client for reranking
aoai_rerank_client = AzureOpenAI(
    azure_deployment=rerank_model_name,
    api_version=aoai_api_version,
    azure_endpoint=aoai_rerank_endpoint,
    api_key=azure_openai_rerank_key
)

# AZURE OPENAI FOR EMBEDDING
aoai_embedding_endpoint = os.environ["AZURE_OPENAI_EMBEDDING_ENDPOINT"]
azure_openai_embedding_key = os.environ["AZURE_OPENAI_EMBEDDING_API_KEY"]
embedding_model_name_ada = os.environ["AZURE_OPENAI_EMBEDDING_NAME_ADA"]
embedding_model_name_large_3 = os.environ["AZURE_OPENAI_EMBEDDING_NAME_LARGE_3"]
# Create AOAI client for embedding creation (ADA)
aoai_embedding_client_ada = AzureOpenAI(
    azure_deployment=embedding_model_name_ada,
    api_version=aoai_api_version,
    azure_endpoint=aoai_embedding_endpoint,
    api_key=azure_openai_embedding_key
)
# Create AOAI client for embedding creation (Large-3)
aoai_embedding_client_large_3 = AzureOpenAI(
    azure_deployment=embedding_model_name_large_3,
    api_version=aoai_api_version,
    azure_endpoint=aoai_embedding_endpoint,
    api_key=azure_openai_embedding_key
)

# CONSTANTS
SELECT_FIELDS=["id", "title", "content"] # Fields to retrieve in the search
QUERY_LANGUAGE="en-US" # Query language

# Test-name: embeddings_fields | uppercase/lowercase | embbeding_model | index_name | max_retrieve | max_generate
TESTS = {
        "title_content_large3_512_search_upper_20_10": ("embeddingTitle, embeddingContent", "upper", "large-3", "project_assurance_large_3", 20, 10),
        "title_content_large3_512_search_upper_20_20": ("embeddingTitle, embeddingContent", "upper", "large-3", "project_assurance_large_3", 20, 20),
        "title_content_large3_512_search_lower_20_10": ("embeddingTitle, embeddingContent", "lower", "large-3", "project_assurance_large_3", 20, 10),
        "title_content_large3_512_search_lower_20_20": ("embeddingTitle, embeddingContent", "lower", "large-3", "project_assurance_large_3", 20, 20),
}
#        "title_content_ada_512_search_upper_20_10": ("embeddingTitle, embeddingContent", "upper", "ada", "find_duplicates_4", 20, 10),
#        "title_content_ada_512_search_upper_20_20": ("embeddingTitle, embeddingContent", "upper", "ada", "find_duplicates_4", 20, 20),
#        "title_content_ada_512_search_lower_20_10": ("embeddingTitle, embeddingContent", "lower", "ada", "find_duplicates_4", 20, 10),
#        "title_content_ada_512_search_lower_20_20": ("embeddingTitle, embeddingContent", "lower", "ada", "find_duplicates_4", 20, 20),

QA_WITH_ANSWERS_FILENAME = 'qa_pairs.xlsx' #'QA_with_answers.xlsx'

In [21]:
def evaluate_answer(question, correct_answer, answer):
    
    system_prompt = """You are an AI assistant that helps people validate the accuracy and completeness of a response against a ground trust. Given the user's question, the expected ground truth answer and the current answer generated by a RAG pattern, compare the meaning of both answers and assess if the current answer addresses the user's question and select a number that best describes this assessment considering the following guidelines:
    - 0: The generated answer and the expected answer have completely different meanings, and the generated answer does not address the user's question.
    - 1: The generated answer is very similar in meaning to the expected answer but lacks some crucial information, and it partially addresses the user's question.
    - 2: The generated answer is well-aligned with the expected answer, capturing the main points accurately, and fully addressing the user's question.
    - 3: The generated answer not only aligns with the expected ground truth and answers the user's question but also adds valuable additional details or insights.
    Based on these guidelines, provide only the number that best represents the relationship between the generated answer and the expected ground truth answer. Do not include any explaination, only the number.
    """
    
    user_prompt = f'\nQuestion: {question}\nExpected Ground Truth Answer: "{correct_answer}\nGenerated Answer: "{answer}"\n"\nYour evaluation: '

    return call_aoai(aoai_answer_client, aoai_answer_model_name, system_prompt, user_prompt, 0.0, 800)
   

In [11]:
def check_answer_iddoc(text, ids_doc, all=False):
    #print(f'ANSWER TO CHECK: [{text}]')

    ids = ids_doc.split(', ')
    if all: # All the IDs must appears to be 1
        for id in ids:
            if id in text:
                continue
            else:
                return 0
        return 1
    
    else: # If any ID is included is 1
        for id in ids:
            if id in text:
                return 1
        return 0

In [22]:
import pandas as pd

def execute_test(test_name, embedding_fields, case, embedding_model, index_name, max_retrieve, max_generate, q_a_filename_in=QA_WITH_ANSWERS_FILENAME):

    dir_out = "data_out"
    os.makedirs(dir_out,exist_ok=True)
    data_in = pd.read_excel(q_a_filename_in)

    print(f'test_name: {test_name}')
    print(f"\t embeddings_fields: {embedding_fields}")
    print(f"\t case: {case}")
    print(f"\t embedding_model: {embedding_model}")
    print(f"\t index_name: {index_name}")
    print(f"\t max_retrieve: {max_retrieve}")
    print(f"\t max_generate: {max_generate}")

    #data_out = [
    #    ['QUESTION', 'EXPECTED_ANSWER', 'EXPECTED_ID_DOCS', 'ANSWER_WITH_ANSWERS', 'EVALUATION_GPT', 'ID_IN_ANSWER', 'ID_IN_SEARCH', 'ANSWER_WITH_CHUNKS', 'EVALUATION_GPT', 'ID_IN_ANSWER', 'ID_IN_SEARCH']
    #]

    data_out = {'QUESTION': [],
            'EXPECTED_ANSWER': [],
            #'EXPECTED_ID_DOCS': [],
            'ANSWER_WITH_ANSWERS': [],
            'EVALUATION_GPT_AA': [],
            #'ID_IN_ANSWER_AA': [],
            #'ID_IN_SEARCH_AA': [],
            'ANSWER_WITH_CHUNKS': [],
            'EVALUATION_GPT_AC': [],
            #'ID_IN_ANSWER_AC': [],
            #'ID_IN_SEARCH_AC': []
    }

    # Create Azure AI Search client
    ai_search_client = SearchClient(endpoint=ai_search_endpoint, index_name=index_name, credential=ai_search_credential)

    # FOR EVERY Q&A FILE IN THE INPUT FILE
    for index, row in data_in.iterrows():
        print(f"Row {index + 1}: =====================================================")

        user_question = row["question"] # Valor de la columna Pregunta
        if case == 'upper':
            user_question = user_question.upper()
        else:
            user_question = user_question.lower()

        respuesta_best = row["answer"] # Valor de la columna de Respuesta Esperada
        #docs_best = str(row["DOCS_BEST"]) # Valor de la columna de Respuesta Esperada

        data_out['QUESTION'].append(user_question)          # EXCEL COLUMN: QUESTION
        data_out['EXPECTED_ANSWER'].append(respuesta_best)  # EXCEL COLUMN: EXPECTED_ANSWER
        #data_out['EXPECTED_ID_DOCS'].append(docs_best)       # EXCEL COLUMN: EXPECTED_ID_DOCS

        # SEMANTIC HYBRID SEARCH
        if embedding_model == 'ada':
            embedding_client = aoai_embedding_client_ada
        else:
            embedding_client = aoai_embedding_client_large_3

        results = semantic_hybrid_search_with_filter(ai_search_client, user_question.lower(), embedding_client, embedding_model, embedding_fields, max_retrieve, SELECT_FIELDS, QUERY_LANGUAGE) # SEARCH
        
        # Re-rank the chunks
        data = get_filtered_chunks(aoai_rerank_client, rerank_model_name, results, user_question, max_retrieve)

        if max_retrieve > max_generate: # If the max number of docs to retrieve is higher than max number of docs to use in answer generation, sort them by confidence and leave only the max number of docs to generate
            if data != None:
                #sorted_data = sorted(data, key=lambda x: x["confidence"], reverse=True)
                sorted_data = sorted(data, key=lambda x: x.get('confidence', float('-inf')), reverse=True)
                data=sorted_data[:max_generate]

        ids_in_search_results = ""
        for result in results:
            ids_in_search_results = ids_in_search_results + ' ' + result['id']

        # Si quedan contenidos para generar la respuesta
        if data != []:
            # ANSWER_WITH_ANSWERS: Generate the answer with the answers generated by the re-ranker with filtered chunks
            answer = generate_answer(aoai_answer_client, aoai_answer_model_name, data, user_question, 'answer')
            print(f'RESPONSE WITH ANSWERS: [{answer}]')
            data_out['ANSWER_WITH_ANSWERS'].append(answer)                                                # EXCEL COLUMN: ANSWER_WITH_ANSWERS
            data_out['EVALUATION_GPT_AA'].append(evaluate_answer(user_question, respuesta_best, answer))  # EXCEL COLUMN: EVALUATION_GPT_AA
            #data_out['ID_IN_ANSWER_AA'].append(check_answer_iddoc(answer, docs_best))                    # EXCEL COLUMN: ID_IN_ANSWER_AA
            #data_out['ID_IN_ANSWER_AA'].append(check_answer_iddoc(ids_in_search_results, docs_best))     # EXCEL COLUMN: ID_IN_SEARCH_AA
            print('------------------------------------------------------------------')
        
            # ANSWER_WITH_CHUNKS - Generate the answer with the chunks filtered by the re-ranker
            answer = generate_answer(aoai_answer_client, aoai_answer_model_name, data, user_question, 'content')
            print(f'RESPONSE WITH CHUNKS: [{answer}]')
            data_out['ANSWER_WITH_CHUNKS'].append(answer)                                                 # EXCEL COLUMN: ANSWER_WITH_CHUNKS
            data_out['EVALUATION_GPT_AC'].append(evaluate_answer(user_question, respuesta_best, answer))  # EXCEL COLUMN: EVALUATION_GPT_AC
            #data_out['ID_IN_ANSWER_AC'].append(check_answer_iddoc(answer, docs_best))                    # EXCEL COLUMN: ID_IN_ANSWER_AC
            #data_out['ID_IN_SEARCH_AC'].append(check_answer_iddoc(ids_in_search_results, docs_best))     # EXCEL COLUMN: ID_IN_SEARCH_AC

            print('------------------------------------------------------------------')

        else:
            answer = 'There is not any content to generate the answer'
            data_out['ANSWER_WITH_ANSWERS'].append(answer)  # EXCEL COLUMN: ANSWER_WITH_ANSWERS
            data_out['EVALUATION_GPT_AA'].append(0)         # EXCEL COLUMN: EVALUATION_GPT_AA
            #data_out['ID_IN_ANSWER_AA'].append(0)           # EXCEL COLUMN: ID_IN_ANSWER_AA
            #data_out['ID_IN_SEARCH_AA'].append(0)           # EXCEL COLUMN: ID_IN_SEARCH_AA
            data_out['ANSWER_WITH_CHUNKS'].append(answer)   # EXCEL COLUMN: ANSWER_WITH_CHUNKS
            data_out['EVALUATION_GPT_AC'].append(0)         # EXCEL COLUMN: EVALUATION_GPT_AC
            #data_out['ID_IN_ANSWER_AC'].append(0)           # EXCEL COLUMN: ID_IN_ANSWER_AC
            #data_out['ID_IN_SEARCH_AC'].append(0)           # EXCEL COLUMN: ID_IN_SEARCH_AC

        if index > 5: break # parar después de 4 filas

    # Excel file with the results
    df = pd.DataFrame(data_out)
    filename_out = dir_out + '/' + test_name + '.xlsx'
    print(f'Writting file {filename_out}')
    df.to_excel(filename_out, index=False, header=True)


In [None]:
for test_name, (embedding_fields, case, embedding_model, index_name, max_retrieve, max_generate) in TESTS.items():
    execute_test(test_name, embedding_fields, case, embedding_model, index_name, max_retrieve, max_generate)