# Evaluating retrieval Quality

## Creating syntethic ground truth

In [None]:
import sys
sys.path.append('..')

from utils import load_from_json, save_to_json, llm

In [40]:
# reading in all the parsed data from the civics guide
input_file = '../documents/parsed_civics_guide.json'

dataset = load_from_json(input_file)

Data loaded from ../documents/parsed_civics_guide.json


In [41]:
user_prompt = """
Based on the *text* provided below, write 5 realistic queries you might ask to find information that is clearly answerable by the text. 

Guidelines:
- Each query should be natural, specific, and complete.
- Avoid copying exact phrases or sentences from the text whenever possible.
- Do not make up information that is not supported by the text.
- Make the queries diverse (e.g., mix "what", "who", "when", "how", etc.).
- Keep queries concise but clear enough to retrieve the right passage.

text: {text}

Output valid JSON only (no code blocks, no extra text):

["query1", "query2", "query3", "query4", "query5"]
""".strip()

In [42]:
def generate_questions(data):
    "This function generates synthetic questions to evaluate later."
    #
    text = data['text']
    d_user_prompt = user_prompt.format(text=text)
    
    # call llm
    return llm(system_prompt="You are a U.S. immigrant preparing for the civics test.",user_prompt=d_user_prompt,model=model)


In [43]:
# Set model
model = 'gpt-4o-mini'

# Initialize results as a list
golden_questions = []

for data in dataset:
    # Get id 
    id = data['uuid']
    # Generate set of questions
    questions = generate_questions(data)
    
    # Add each question with its UUID
    for question in questions['queries']:
        golden_questions.append({
            'uuid': id,
            'question': question
        })

In [44]:
# save the golden data as a json
output_file = '../documents/golden_data_questions.json'
_ = save_to_json(output_file,golden_questions)

Data saved to ../documents/golden_data_questions.json


## Evaluating retrieval

### Hit rate