# RAG Evaluation

## Prep

In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [17]:
import os
from openai import AzureOpenAI

MODEL = "gpt-4o"
MODEL_GPT35 = "gpt-35-turbo"
client = AzureOpenAI(
    api_key=os.environ['AZURE_OPENAI_KEY'],
    #api_version="2024-10-01-preview",
    api_version="2024-08-01-preview",
    azure_endpoint = os.environ['AZURE_OPENAI_ENDPOINT']
)

In [3]:
def _chat_gpt(messages, model=MODEL, temp=0, topp=0.1):
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=temp,
        max_tokens=2000,
        top_p=topp
    )   
    
    return response.choices[0].message.content

In [4]:
from azure.identity import DefaultAzureCredential
from azure.core.credentials import AzureKeyCredential
import os

# Variables not used here do not need to be updated in your .env file
endpoint = os.environ["AZSCH_ENDPOINT"]
credential = AzureKeyCredential(os.environ["AZSCH_KEY"])

index_name = os.environ["AZSCH_INDEX_NAME"]

## Search

In [6]:
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizableTextQuery

topK = 2
# Pure Vector Search
search_client = SearchClient(endpoint, index_name, credential=credential)

def get_context(text):
    vector_query = VectorizableTextQuery(text=text, k_nearest_neighbors=topK, fields="vector", exhaustive=True)
    results = search_client.search(  
        search_text=None,  
        vector_queries= [vector_query],
        select=["parent_id", "chunk_id", "chunk", "title"],
        top=topK
    )  

    context = ""
    metadata = []
    for result in results:  
        context = context + result['chunk'] + "\n\n"

    return context

## Response

In [12]:
# prompt templates
from jinja2 import Template
from common import parse_chat

with open('./reply_simple.jinja2') as file:
    simple_response_template = file.read()

In [13]:
def get_response(query, chat_history, template, model=MODEL):

    context = get_context(query)
    
    prompt = Template(template, trim_blocks=True, keep_trailing_newline=True).render(
        context=context,
        chat_history=chat_history,
        user_query=query
    )
    
    messages = parse_chat(prompt)
    
    return _chat_gpt(messages, model), context

## Groundness - GPT-3.5-Turbo

In [18]:
from eval import measure_groundness

In [19]:
chat_history = []
question = "What is OpenAI assistant api?"  
answer, context = get_response(question, chat_history, simple_response_template, model=MODEL_GPT35)
chat_history.append([question, answer])
print("Q>", question, "\nA> ", answer)

score = measure_groundness(question, answer, context)
print("groundness:", score)

Q> What is OpenAI assistant api? 
A>  The OpenAI Assistants API is a powerful tool that allows developers to create custom AI assistants. These assistants can understand and respond to user queries, provide recommendations, automate tasks, and more. The API is built on OpenAI's GPT (Generative Pre-trained Transformer) models, which are trained on a wide range of data to generate human-like responses. With the Assistants API, developers can easily build conversational AI applications without having to manage conversation state or handle complex integrations. The API supports features like persistent threads, code interpretation, and function calling, making it easier to create sophisticated AI assistants.
groundness: <Score: 4>


In [20]:
question = "What is Google" 
answer, context = get_response(question, chat_history, simple_response_template, model=MODEL_GPT35)
chat_history.append([question, answer])
print("Q>", question, "\nA> ", answer)

score = measure_groundness(question, answer, context)
print("groundness:", score)

Q> What is Google 
A>  Google is a multinational technology company that specializes in Internet-related services and products. It was founded in 1998 by Larry Page and Sergey Brin while they were Ph.D. students at Stanford University. Google's mission is to organize the world's information and make it universally accessible and useful. The company is best known for its search engine, which is the most widely used search engine in the world. Google also offers a wide range of other products and services, including online advertising technologies, cloud computing, software, hardware, and more.
groundness: <Score: 1>


## Groundness - GPT-4o

In [21]:
chat_history = []
question = "What is OpenAI assistant api?"  
answer, context = get_response(question, chat_history, simple_response_template)
chat_history.append([question, answer])
print("Q>", question, "\nA> ", answer)

score = measure_groundness(question, answer, context)
print("groundness:", score)

Q> What is OpenAI assistant api? 
A>  The OpenAI Assistants API is a feature of the Azure OpenAI Service that allows developers to create sophisticated AI assistants with capabilities similar to copilot experiences. This API simplifies the process of building custom AI assistants by managing conversation states, integrating tools, retrieving documents, and executing code. It supports persistent, automatically managed conversation threads, allowing developers to append new messages without worrying about context window constraints. The Assistants API can access multiple tools in parallel, such as code interpreters and custom functions, to perform various tasks. This makes it easier to develop applications like product recommenders, sales analysts, coding assistants, and more.
groundness: <Score: 5>


In [22]:
question = "What is Google" 
answer, context = get_response(question, chat_history, simple_response_template)
chat_history.append([question, answer])
print("Q>", question, "\nA> ", answer)

score = measure_groundness(question, answer, context)
print("groundness:", score)

Q> What is Google 
A>  The provided text does not contain information about Google. Please refer to other sources for information on Google.
groundness: <Score: 5>


## Groundness - Condensed Query

In [23]:
chat_history = []
question = "What is OpenAI assistant api?"  
answer, context = get_response(question, chat_history, simple_response_template)
chat_history.append([question, answer])

question = "show sample code in python"  
answer, context = get_response(question, chat_history, simple_response_template)
chat_history.append([question, answer])
print("Q>", question, "\nA> ", answer)

score = measure_groundness(question, answer, context)
print("\ngroundness:", score)

Q> show sample code in python 
A>  Sure! Could you please specify what kind of sample code you are looking for in Python? For example, are you interested in data analysis, web development, machine learning, or something else? Let me know so I can provide a relevant example.

groundness: <Score: 1>
