In [41]:
import os
from dotenv import load_dotenv
from openai import OpenAI
import pandas as pd
import minsearch
from tqdm.auto import tqdm


In [6]:
load_dotenv()

api_key = os.getenv('OPENAI_API_KEY')

In [19]:
from openai import OpenAI

client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=api_key,
)



In [7]:
df = pd.read_csv('data.csv')
documents = df.to_dict(orient='records')


In [15]:
prompt_template = """
You emulate a user of our fitness assistant application.
Formulate 5 questions this user might ask based on a provided exercise.
Make the questions specific to this exercise.
The record should contain the answer to the questions, and the questions should
be complete and not too short. Use as fewer words as possible from the record.

The record:

exercise_name: {exercise_name}
type_of_activity: {type_of_activity}
type_of_equipment: {type_of_equipment}
body_part: {body_part}
type: {type}
muscle_groups_activated: {muscle_groups_activated}
instructions: {instructions}

Provide the output in parsable JSON without using code blocks:

{{"questions": ["question1", "question2", ..., "question5"]}}
""".strip()

In [16]:
prompt = prompt_template.format(**documents[0])
print(prompt)

You emulate a user of our fitness assistant application.
Formulate 5 questions this user might ask based on a provided exercise.
Make the questions specific to this exercise.
The record should contain the answer to the questions, and the questions should
be complete and not too short. Use as fewer words as possible from the record.

The record:

exercise_name: Push-Ups
type_of_activity: Strength
type_of_equipment: Bodyweight
body_part: Upper Body
type: Push
muscle_groups_activated: Pectorals, Triceps, Deltoids
instructions: Start in a high plank position with your hands under your shoulders. Lower your body until your chest nearly touches the floor. Push back up to the starting position.

Provide the output in parsable JSON without using code blocks:

{"questions": ["question1", "question2", ..., "question5"]}


In [17]:
def llm(prompt):
    response = client.chat.completions.create(
      extra_body={},
      model="deepseek/deepseek-chat-v3.1:free",
      messages=[
        {
          "role": "user",
          "content": prompt
        }
      ]
    )

    return response.choices[0].message.content

In [20]:
questions = llm(prompt)


In [21]:
import json
json.loads(questions)


{'questions': ['How do I position my hands correctly for this exercise?',
  'Which muscles should I feel working during the movement?',
  'What is the proper way to lower my body during the exercise?',
  'Should my chest touch the floor at the bottom of the movement?',
  'What is the starting position I should return to after each repetition?']}

# Retrieval evaluation data generation

In [None]:
def generate_questions(doc):
    prompt = prompt_template.format(**doc)

    response = client.chat.completions.create(
      extra_body={},
      model="deepseek/deepseek-chat-v3.1:free",
      messages=[
        {
          "role": "user",
          "content": prompt
        }
      ]
    )

    json_response = response.choices[0].message.content
    return json_response

In [None]:
from tqdm.auto import tqdm
results = {}


In [None]:
for doc in tqdm(documents):
    doc_id = doc['id']
    if doc_id in results:
        continue

    questions_raw = generate_questions(doc)
    questions = json.loads(questions_raw)
    results[doc_id] = questions['questions']

In [None]:
final_results = []

for doc_id, questions in results.items():
    for q in questions:
        final_results.append((doc_id, q))

In [None]:
df_results = pd.DataFrame(final_results, columns=['id', 'question'])


In [None]:
df_results.to_csv('../data/ground-truth-retrieval.csv', index=False)
!head ../data/ground-truth-retrieval.csv


## Evaluating the retrieval search

In [38]:
df_ground_truth = pd.read_csv('data/ground-truth-retrieval.csv')
df = pd.read_csv('data/data.csv')
documents = df.to_dict(orient='records')


In [39]:
ground_truth = df_ground_truth.to_dict(orient='records')


In [42]:
index= minsearch.Index(
    text_fields = [ 'exercise_name', 'type_of_activity', 'type_of_equipment',
       'body_part', 'type', 'muscle_groups_activated', 'instructions'],
    keyword_fields=['id']
)
index.fit(documents)


<minsearch.Index at 0x11c0125d0>

In [43]:
def minsearch(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [34]:
def evaluate(ground_truth, search_function):
    relevance_total = []

    for q in tqdm(ground_truth):
        doc_id = q['id']
        results = search_function(q)
        relevance = [d['id'] == doc_id for d in results]
        relevance_total.append(relevance)

    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }


In [35]:
def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)

def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)

In [50]:
q['question']

'What is the starting position for doing push-ups?'

In [44]:
evaluate(ground_truth, lambda q:  minsearch(q['question']))


100%|██████████| 1035/1035 [00:01<00:00, 781.56it/s]


{'hit_rate': 0.9478260869565217, 'mrr': 0.822744038033893}

## finding the best parameters


In [45]:
df_validation = df_ground_truth[:100]
df_test = df_ground_truth[100:]



In [46]:
import random

def simple_optimize(param_ranges, objective_function, n_iterations=10):
    best_params = None
    best_score = float('-inf')

    for _ in range(n_iterations):
        # Generate random parameters
        current_params = {}
        for param, (min_val, max_val) in param_ranges.items():
            if isinstance(min_val, int) and isinstance(max_val, int):
                current_params[param] = random.randint(min_val, max_val)
            else:
                current_params[param] = random.uniform(min_val, max_val)

        # Evaluate the objective function
        current_score = objective_function(current_params)

        # Update best if current is better
        if current_score > best_score:  # Change to > if maximizing
            best_score = current_score
            best_params = current_params

    return best_params, best_score

In [47]:
gt_val = df_validation.to_dict(orient='records')


In [48]:
def minsearch_search(query, boost=None):
    if boost is None:
        boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [49]:
param_ranges = {
    'exercise_name': (0.0, 3.0),
    'type_of_activity': (0.0, 3.0),
    'type_of_equipment': (0.0, 3.0),
    'body_part': (0.0, 3.0),
    'type': (0.0, 3.0),
    'muscle_groups_activated': (0.0, 3.0),
    'instructions': (0.0, 3.0),
}

def objective(boost_params):
    def search_function(q):
        return minsearch_search(q['question'], boost_params)

    results = evaluate(gt_val, search_function)
    return results['mrr']

In [51]:
simple_optimize(param_ranges, objective, n_iterations=20)


100%|██████████| 100/100 [00:00<00:00, 657.25it/s]
100%|██████████| 100/100 [00:00<00:00, 759.04it/s]
100%|██████████| 100/100 [00:00<00:00, 767.09it/s]
100%|██████████| 100/100 [00:00<00:00, 768.56it/s]
100%|██████████| 100/100 [00:00<00:00, 767.40it/s]
100%|██████████| 100/100 [00:00<00:00, 761.78it/s]
100%|██████████| 100/100 [00:00<00:00, 772.22it/s]
100%|██████████| 100/100 [00:00<00:00, 773.11it/s]
100%|██████████| 100/100 [00:00<00:00, 766.27it/s]
100%|██████████| 100/100 [00:00<00:00, 766.00it/s]
100%|██████████| 100/100 [00:00<00:00, 718.09it/s]
100%|██████████| 100/100 [00:00<00:00, 717.29it/s]
100%|██████████| 100/100 [00:00<00:00, 715.27it/s]
100%|██████████| 100/100 [00:00<00:00, 748.96it/s]
100%|██████████| 100/100 [00:00<00:00, 760.52it/s]
100%|██████████| 100/100 [00:00<00:00, 757.80it/s]
100%|██████████| 100/100 [00:00<00:00, 765.36it/s]
100%|██████████| 100/100 [00:00<00:00, 777.03it/s]
100%|██████████| 100/100 [00:00<00:00, 775.53it/s]
100%|██████████| 100/100 [00:00

({'exercise_name': 2.2079862069745246,
  'type_of_activity': 0.673923082355541,
  'type_of_equipment': 0.3206386885027458,
  'body_part': 0.7272928780794605,
  'type': 1.9285969667153524,
  'muscle_groups_activated': 0.02618059564665598,
  'instructions': 0.3773032031981245},
 0.857)

In [52]:
def minsearch_improved(query):
    boost = {
        'exercise_name': 2.11,
        'type_of_activity': 1.46,
        'type_of_equipment': 0.65,
        'body_part': 2.65,
        'type': 1.31,
        'muscle_groups_activated': 2.54,
        'instructions': 0.74
    }

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

evaluate(ground_truth, lambda q: minsearch_improved(q['question']))

100%|██████████| 1035/1035 [00:01<00:00, 758.07it/s]


{'hit_rate': 0.9468599033816425, 'mrr': 0.9029733149298367}