In [1]:
import pandas as pd

In [2]:
import minsearch

## Retrival Evaluation ##

In [21]:
df = pd.read_csv('../data/data.csv')
df.shape

(207, 8)

In [22]:
df.head()

Unnamed: 0,id,exercise_name,type_of_activity,type_of_equipment,body_part,type,muscle_groups_activated,instructions
0,0,Push-Ups,Strength,Bodyweight,Upper Body,Push,"Pectorals, Triceps, Deltoids",Start in a high plank position with your hands...
1,1,Squats,Strength,Bodyweight,Lower Body,Push,"Quadriceps, Glutes, Hamstrings",Stand with feet shoulder-width apart. Lower yo...
2,2,Plank,Strength/Mobility,Bodyweight,Core,Hold,"Rectus Abdominis, Transverse Abdominis",Start in a forearm plank position with your el...
3,3,Deadlift,Strength,Barbell,Lower Body,Pull,"Glutes, Hamstrings, Lower Back","Stand with feet hip-width apart, barbell in fr..."
4,4,Bicep Curls,Strength,Dumbbells,Upper Body,Pull,"Biceps, Forearms","Stand with a dumbbell in each hand, arms fully..."


In [23]:
df.columns  

Index(['id', 'exercise_name', 'type_of_activity', 'type_of_equipment',
       'body_part', 'type', 'muscle_groups_activated', 'instructions'],
      dtype='object')

In [24]:
documents = df.to_dict(orient='records')

In [6]:
documents

[{'id': 0,
  'exercise_name': 'Push-Ups',
  'type_of_activity': 'Strength',
  'type_of_equipment': 'Bodyweight',
  'body_part': 'Upper Body',
  'type': 'Push',
  'muscle_groups_activated': 'Pectorals, Triceps, Deltoids',
  'instructions': 'Start in a high plank position with your hands under your shoulders. Lower your body until your chest nearly touches the floor. Push back up to the starting position.'},
 {'id': 1,
  'exercise_name': 'Squats',
  'type_of_activity': 'Strength',
  'type_of_equipment': 'Bodyweight',
  'body_part': 'Lower Body',
  'type': 'Push',
  'muscle_groups_activated': 'Quadriceps, Glutes, Hamstrings',
  'instructions': 'Stand with feet shoulder-width apart. Lower your body as if sitting back into a chair, keeping your chest up. Return to standing.'},
 {'id': 2,
  'exercise_name': 'Plank',
  'type_of_activity': 'Strength/Mobility',
  'type_of_equipment': 'Bodyweight',
  'body_part': 'Core',
  'type': 'Hold',
  'muscle_groups_activated': 'Rectus Abdominis, Transvers

In [27]:
prompt_template = """
You emulate a user of our fitness assistant application.
Formulate 5 questions this user might ask based on a provided exercise.
Make the questions specific to this exercise.
The record should contain the answer to the questions, and the questions should
be complete and not too short. Use as fewer words as possible from the record. 

The record:

exercise_name: {exercise_name}
type_of_activity: {type_of_activity}
type_of_equipment: {type_of_equipment}
body_part: {body_part}
type: {type}
muscle_groups_activated: {muscle_groups_activated}
instructions: {instructions}

Provide the output in parsable JSON without using code blocks:

{{"questions": ["question1", "question2", ..., "question5"]}}
""".strip()

In [28]:
prompt = prompt_template.format(**documents[0])

In [29]:
print(prompt)

You emulate a user of our fitness assistant application.
Formulate 5 questions this user might ask based on a provided exercise.
Make the questions specific to this exercise.
The record should contain the answer to the questions, and the questions should
be complete and not too short. Use as fewer words as possible from the record. 

The record:

exercise_name: Push-Ups
type_of_activity: Strength
type_of_equipment: Bodyweight
body_part: Upper Body
type: Push
muscle_groups_activated: Pectorals, Triceps, Deltoids
instructions: Start in a high plank position with your hands under your shoulders. Lower your body until your chest nearly touches the floor. Push back up to the starting position.

Provide the output in parsable JSON without using code blocks:

{"questions": ["question1", "question2", ..., "question5"]}


In [9]:
import os

In [11]:
#os.environ['OPENAI_API_KEY'] = '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'

In [12]:
from openai import OpenAI
client = OpenAI()

In [30]:
def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content

In [31]:
res = llm(prompt)

In [32]:
print(res)

{"questions": ["What is the correct starting position for doing push-ups?", "Which muscle groups are primarily targeted by push-ups?", "How should I lower my body during a push-up?", "Do I need any equipment to perform push-ups?", "How do I return to the starting position after lowering my body?"]}


In [33]:
import json
json.loads(res)

{'questions': ['What is the correct starting position for doing push-ups?',
  'Which muscle groups are primarily targeted by push-ups?',
  'How should I lower my body during a push-up?',
  'Do I need any equipment to perform push-ups?',
  'How do I return to the starting position after lowering my body?']}

In [35]:
def generate_questions(doc):
    prompt = prompt_template.format(**doc)
    results = llm(prompt)
    return results

In [39]:
from tqdm.auto import tqdm

In [40]:
results = {}
for doc in tqdm(documents):
    if doc['id'] in results:
        continue
    questions_raw = generate_questions(doc)
    questions = json.loads(questions_raw)
    results[doc['id']] = questions

  0%|          | 0/207 [00:00<?, ?it/s]

In [43]:
results

{0: {'questions': ['What is the starting position for push-ups?',
   'Which muscle groups are targeted during push-ups?',
   'Do I need any equipment to perform push-ups?',
   'How do I properly lower my body during a push-up?',
   'What type of exercise are push-ups classified as?']},
 1: {'questions': ['What body position should I maintain during squats?',
   'Which muscle groups are activated when performing squats?',
   'Do I need any equipment to perform squats?',
   'How should my feet be positioned while doing squats?',
   'What is the proper motion for lowering my body during squats?']},
 2: {'questions': ['What is the correct starting position for the Plank exercise?',
   'Which muscle groups are primarily activated during the Plank?',
   'How should my body be aligned while holding a Plank?',
   'What type of exercise is Plank classified as?',
   'Do I need any equipment to perform the Plank?']},
 3: {'questions': ['What is the proper stance for performing a deadlift?',
   'W

In [57]:
final_results = []

In [58]:
for doc_id, questions in results.items():
    for question in questions['questions']:
        final_results.append((doc_id, question))

In [61]:
df_results = pd.DataFrame(final_results, columns=['id', 'question'])

In [62]:
df_results.shape

(1035, 2)

In [63]:
df_results.to_csv('../data/ground-truth-retrieval.csv', index=False)

In [64]:
!head ../data/ground-truth-retrieval.csv

id,question
0,What is the starting position for push-ups?
0,Which muscle groups are targeted during push-ups?
0,Do I need any equipment to perform push-ups?
0,How do I properly lower my body during a push-up?
0,What type of exercise are push-ups classified as?
1,What body position should I maintain during squats?
1,Which muscle groups are activated when performing squats?
1,Do I need any equipment to perform squats?
1,How should my feet be positioned while doing squats?
