# Generation d'un ensemble de test pour le pipeline end-to-end

1. Les questions ont ete generees avec DeepSeek-R1 (50%) et GeminiPro 2.5 (50%)
2. Les reponses sont generees avec GeminiFlash 2.5 en faisant des recherches sur le web
3. Les entitees des questions sont extraites
4. Les embeddings des questions sont ajoutes

In [3]:
import pandas as pd
import yaml
import time

from google import genai
from google.genai.types import Tool, GenerateContentConfig, GoogleSearch

In [4]:
import sys
sys.path.append('../../src')

from entity import extract_entities, match_entity
from embeddings import generate_embeddings

# 1. Chargement des questions

In [9]:
with open('questions.yml', 'r') as file:
    data = yaml.safe_load(file)

In [13]:
questions = []

for k in data:
    for level in data[k]:
        _questions = data[k][level].splitlines()

        questions += [
            (k, level, q.strip())
            for q in _questions
        ]

df = pd.DataFrame(questions, columns=['source', 'level', 'question'])

## 2. Generation des reponses

Gemini2.5 Flash fait des recherches sur le web, en mode 'reasoning'
  - Il y a trois difficultes de reponse: facile, moyen, difficile

In [None]:
import os

client = genai.Client(api_key=os.environ.get("GOOGLE_API_KEY"))
model_id = "gemini-2.5-flash-preview-04-17"

In [37]:
system_prompt = """You are helping me creating a ground truth for a question-answering task.
Search the web and find the answer to the question.
Be as consice as possible: do not add additional information.

Example:
Q: What is the capital of France?
A: The capital of France is Paris."""

In [None]:
def generate_answer(question):
    google_search_tool = Tool(
        google_search = GoogleSearch()
    )

    response = client.models.generate_content(
        model=model_id,
        contents=question,
        config=GenerateContentConfig(
            tools=[google_search_tool],
            response_modalities=["TEXT"],
            system_instruction=system_prompt,
        )
    )

    return response.candidates[0].content.parts[0].text

In [None]:
answers = {}

In [None]:
for i, row in df.iterrows():
    if i in answers: continue

    start = time.time()
    answers[i] = generate_answer(row.question)

    time.sleep(max(0, 7 - (time.time() - start)))  # Avoid rate limits

In [None]:
answers_df = pd.DataFrame(answers.items(), columns=['index', 'answer'])
answers_df = answers_df.set_index('index')

In [None]:
df = df.join(answers_df)

# 3. Extraction des entitees dans les questions

In [6]:
df['entities'] = df.question.apply(extract_entities, sleep=7)

2025-05-12T18:18:37.794 [BAML [92mINFO[0m] [35mFunction ExtractEntities[0m:
    [33mClient: GeminiFlash (gemini-2.0-flash) - 715ms. StopReason: STOP. Tokens(in/out): 99/32[0m
    [34m---PROMPT---[0m
    [2m[43muser: [0m[2mExtract all the relevant entities from the user question on the video games and consonles domain.
     - Resolve entities to their canonic form (example: LOTR -> Lord of the Rings).
    
     User question:
     What year was the Nintendo Entertainment System (NES) released in North America?
    
    Answer with a JSON Array using this schema:
    [
      {
        name: string,
        type: 'Game' or 'Console' or 'Publisher',
      }
    ]
    [0m
    [34m---LLM REPLY---[0m
    [2m```json
    [
      {
        "name": "Nintendo Entertainment System",
        "type": "Console"
      }
    ]
    ```[0m
    [34m---Parsed Response (list<class Entity>)---[0m
    [
      {
        "name": "Nintendo Entertainment System",
        "type": "Console"
      

In [9]:
df['entities'] = df['entities'].apply(
    lambda x: {
        k: [match_entity(v, k) for v in v]
        for k,v in x.items()
        if v is not None
    }
)

# 4. Generation des embeddings (pour la recherche semantique)

In [15]:
df['embeddings'] = generate_embeddings(df.question.tolist())

In [16]:
df

Unnamed: 0,source,level,question,answer,entities,embeddings
0,gemini,easy,What year was the Nintendo Entertainment Syste...,The Nintendo Entertainment System (NES) was re...,{'console': ['nintendo entertainment system']},"[0.0004506858, 0.03856677, 0.06810948, 0.08225..."
1,gemini,easy,Who developed the game Pac-Man?,The game Pac-Man was developed by Toru Iwatani...,{'game': ['pac-man']},"[0.012381856, 0.014237181, 0.027101945, 0.0473..."
2,gemini,easy,On which console was the game Sonic the Hedgeh...,The game Sonic the Hedgehog was first released...,"{'game': ['sonic the hedgehog'], 'console': [N...","[-0.010663498, -0.007071837, 0.03953355, 0.039..."
3,gemini,easy,What was the flagship fighting game for the Su...,While there were many fighting games on the Su...,"{'game': [None], 'console': ['super nintendo e...","[-0.0005581793, 0.043345034, 0.05450761, 0.078..."
4,gemini,easy,Which company created the Atari 2600?,The Atari 2600 was developed and produced by A...,"{'console': ['atari 2600'], 'publisher': ['ata...","[-0.022092162, 0.02777733, 0.06295724, 0.02797..."
...,...,...,...,...,...,...
115,deepseek,hard,Analyze the influence of Castlevania: Symphony...,*Castlevania: Symphony of the Night* significa...,{'game': ['castlevania: symphony of the night'...,"[-0.0040893066, 0.02658793, -0.0054087443, 0.0..."
116,deepseek,hard,How did the Sega Dreamcast’s online capabiliti...,The Sega Dreamcast foreshadowed modern gaming ...,{'console': ['sega dreamcast']},"[-0.016330058, 0.025852317, 0.014798954, 0.023..."
117,deepseek,hard,Compare the longevity of Tetris (Game Boy) to ...,Both Tetris and Dr. Mario were released for th...,"{'game': ['tetris', 'dr. mario'], 'console': [...","[0.004156725, 0.016723322, 0.027663492, 0.0203..."
118,deepseek,hard,Why did E.T. the Extra-Terrestrial (Atari 2600...,E.T. the Extra-Terrestrial for the Atari 2600 ...,"{'game': [None], 'console': ['atari 2600']}","[0.034979213, -0.046446722, 0.0764473, 0.04750..."


In [None]:
df.to_parquet("../../data/qa_testset.parquet", index=False)