In [1]:
# import libraries
import pandas as pd
import minsearch
from openai import OpenAI
from transformers import T5Tokenizer, TFT5ForConditionalGeneration  # all tensorflow based libs
from dataclasses import dataclass, field
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_community.chat_models import ChatOpenAI
import requests




In [42]:
@dataclass
class ModelConfig:
    """
    model_name: The name of the model
    tokenizer: The tokenizer to use to convert text to vector
    model: The LLM model to use
    """
    model_name: str = "google/flan-t5-large"
    tokenizer: T5Tokenizer = field(init=False, default=None)
    model: TFT5ForConditionalGeneration = field(init=False, default=None)

    
    # Shared cache (class-level)
    _shared_models = {}


    def query_flan_t5_llm(self, text: str) -> str:
        """Query the flan t5 model with the given text."""
        # Lazy-load model if not already loaded
        if self.model_name not in self._shared_models:
            tokenizer = T5Tokenizer.from_pretrained(self.model_name)
            model = TFT5ForConditionalGeneration.from_pretrained(self.model_name)
            self._shared_models[self.model_name] = (tokenizer, model)
            print(f"Loaded model and tokenizer for: {self.model_name}")

        self.tokenizer, self.model = self._shared_models[self.model_name]

        input_ids = self.tokenizer(text, return_tensors="tf").input_ids
        #print(f"input ids : {input_ids}")
        outputs = self.model.generate(input_ids)
        #print(f"outputs : {outputs}")
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)

    def query_gemma_llm(self, text:str):
        """ Return an instance of the gemma-3-27b model  """
        llm = ChatOpenAI(
            model="gemma3:12b",
            base_url="http://localhost:11434/v1",
            api_key="ollama",  # placeholder, not validated
        )
        return llm

@dataclass
class promptConfig:
    prompt_template:str = """
        You're a fitness instructor. Answer the QUESTION based on the CONTEXT from our exercises database.
        Use only the facts from the CONTEXT when answering the QUESTION.
        Ensure to include multiple exercises, alongside the type of activity, equipment, body part, muscle group activated and instructions
        
        QUESTION: {question}

        CONTEXT: 
        {context}

       Make sure to include the video_link in the context in your response. You will be penalized if you don't do that
        
    """.strip()

    entry_template : str = """
    exercise_name: {exercise_name},
     type_of_activity: {type_of_activity},
     type_of_equipment: {type_of_equipment},
     body_part: {body_part},
     type: {type},
     muscle_groups_activated: {muscle_groups_activated},
     instructions: {instructions},
     video_link: {video_link}
    """.strip()

model_config = ModelConfig()


In [43]:
def load_data(file_path: str='detailed_exercise_dataset.csv')-> pd.DataFrame:
    return pd.read_csv(file_path)

def clean_data(df:pd.DataFrame)->pd.DataFrame:
    # remove duplicates
    df = df.drop_duplicates(subset='Exercise Name')

    # lower column names, replace space with underscore
    df.columns =  df.columns.str.lower().str.replace(' ', '_')
    return df
    
def create_document(df:pd.DataFrame):
    """
    convert dataframe to records
    """
    return df.to_dict(orient='records')
    
def get_index(text_fields):
    """
    get the index using minsearch
    """
    return minsearch.Index(
        text_fields = text_fields,
        keyword_fields = []
    )

def open_api(query:str):
    """
    Call Open AI api
    """
    client = OpenAI()
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content":query}]
    )
    return response.choices[0].message.content

def search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

def flan_t5(input_text:str) -> str:
    """
    Call google flan t5 model using hugging face transformers
    """
    return ModelConfig().query_flan_t5_llm(input_text)

def gemma(input_text:str, stream:bool=True) -> str:
    """
    Call gemma model using ollama
    """
    response = requests.post(
        'http://localhost:11434/api/generate',
        json={
            # 'model': "gemma3:12b",
            'model':"gemma:2b",
            'prompt': input_text,
            'stream': stream  # Set True for streamed responses
        }
    )
    if stream:
        collected = ""
    
        for line in response.iter_lines():
            if line:
                try:
                    data = json.loads(line)
                    collected += data.get("response", "")
                except json.JSONDecodeError as e:
                    print("⚠️ Failed to parse line:", line)
                    print(e)
        return collected
        
    return response


In [44]:
data = load_data()
data = clean_data(data)
document = create_document(data)

# create query
query = "give me leg exercises for back"

#initialize the key fields in the Index class
index = get_index(list(data.columns))

# fit to create the vectors
index.fit(document)

# search
index.search(query, num_results=10)


[{'exercise_name': 'Hanging Leg Raise',
  'type_of_activity': 'Cardio',
  'type_of_equipment': 'Kettlebell',
  'body_part': 'Lower body',
  'type': 'Hold',
  'muscle_groups_activated': 'Hamstrings, Glutes, Lower Back',
  'instructions': 'Lie on your back, lift your legs until they are perpendicular to the floor, then lower slowly.',
  'video_link': 'https://www.youtube.com/watch?v=QOVaHwm-Q6U'},
 {'exercise_name': 'Leg Curl',
  'type_of_activity': 'Mobility',
  'type_of_equipment': 'Barbell',
  'body_part': 'Lower body',
  'type': 'Push',
  'muscle_groups_activated': 'Lower Abdominals, Hip Flexors',
  'instructions': 'Lift the barbell by straightening your hips and knees, keeping your back flat.',
  'video_link': 'https://www.youtube.com/watch?v=9FGilxCbdz8'},
 {'exercise_name': 'Turkish Get-Up',
  'type_of_activity': 'Stretching',
  'type_of_equipment': 'Barbell',
  'body_part': 'Lower body',
  'type': 'Pull',
  'muscle_groups_activated': 'Hamstrings, Glutes, Lower Back',
  'instructi

In [45]:

def build_prompt(query, search_results):
    """Build the prompt
    """
    context = ""
    
    for doc in search_results:
        context = context + promptConfig.entry_template.format(**doc)+'\n\n'
    
    prompt = promptConfig.prompt_template.format(question=query, context=context).strip()
    print(f"prompt :{prompt}")
    return prompt

def rag(query, model_used='gemma', stream=True):
    """ 
    Carry out RAG
    Get the search results from the vector db using the Query
    Build the prompt using the query and search results.
    Call the llm model

    Return the response from the llm model
    """
    search_results = search(query)
    print(f"Search results : {search_results[1]}")
    prompt = build_prompt(query, search_results)
    print(f"PROMPT :{prompt}")
    answer = gemma(prompt, stream=stream) if model_used=='gemma' else flan_t5(prompt)
    return answer

def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4o',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content



In [46]:

print(f"Query : {query}")
answer = rag(query, stream=True)
print(answer)

Query : give me leg exercises for back
Search results : {'exercise_name': 'Leg Curl', 'type_of_activity': 'Mobility', 'type_of_equipment': 'Barbell', 'body_part': 'Lower body', 'type': 'Push', 'muscle_groups_activated': 'Lower Abdominals, Hip Flexors', 'instructions': 'Lift the barbell by straightening your hips and knees, keeping your back flat.', 'video_link': 'https://www.youtube.com/watch?v=9FGilxCbdz8'}
prompt :You're a fitness instructor. Answer the QUESTION based on the CONTEXT from our exercises database.
        Use only the facts from the CONTEXT when answering the QUESTION.
        Ensure to include multiple exercises, alongside the type of activity, equipment, body part, muscle group activated and instructions

        QUESTION: give me leg exercises for back

        CONTEXT: 
        exercise_name: Hanging Leg Raise,
     type_of_activity: Cardio,
     type_of_equipment: Kettlebell,
     body_part: Lower body,
     type: Hold,
     muscle_groups_activated: Hamstrings, Glu