# Explore and generate Embedding on Dataset

## Part 1: Load and Explore the Dataset

In [4]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# 1. Load Dataset
df = pd.read_csv("megaGymDataset_trimmed.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,Title,Desc,Type,BodyPart,Equipment,Level,Rating,RatingDesc
0,1,Adductor SMR,Adductor self-myofascial release (SMR) is a se...,Stretching,Adductors,Foam Roll,Beginner,4.0,Average
1,2,Overhead cable curl,"Also known as a crucifix curl, the overhead ca...",Strength,Biceps,Cable,Intermediate,9.0,Average
2,3,Battle ropes,Battle ropes are an explosive training tool th...,Strength,Shoulders,Other,Intermediate,8.6,Average
3,4,Brachialis SMR,Brachialis self-myofascial release (SMR) is a ...,Stretching,Biceps,Foam Roll,Beginner,3.2,Average
4,5,Calf SMR,Calf self-myofascial release (SMR) is a self-a...,Stretching,Calves,Foam Roll,Beginner,6.0,Average


## Part 2: Generate BERT Embedding for Exercise Descriptions

In [5]:
# 2. Generate BERT Embedding
model = SentenceTransformer('all-MiniLM-L6-v2')
df['Desc'] = df['Desc'].fillna("")
df['embeddings'] = df['Desc'].apply(lambda x: model.encode(x))

In [13]:
#DEBUG
print(df.shape)
print(df['embeddings'])
print(df['embeddings'][0])
print(df['embeddings'][0].shape)

(506, 10)
0      [-0.0877463, -0.016791563, -0.011263413, 0.056...
1      [-0.064070694, 0.031366855, -0.048818823, -0.0...
2      [-0.06768111, 0.04791671, -0.0612316, 0.017209...
3      [-0.055915833, -0.011789124, -0.029092168, 0.0...
4      [-0.09619398, -0.012659797, 0.022272453, 0.023...
                             ...                        
501    [-0.0813132, -0.047826353, -0.04091641, 0.0094...
502    [-0.024680601, 0.004833572, -0.025807275, 0.01...
503    [-0.048602447, 0.0057841097, -0.03043114, 0.00...
504    [-0.07165635, -0.023867361, -0.022176031, 0.01...
505    [-0.04580419, -0.026240228, 0.011660796, -0.02...
Name: embeddings, Length: 506, dtype: object
[-8.77462998e-02 -1.67915635e-02 -1.12634134e-02  5.67757413e-02
  4.34585242e-03 -2.48453617e-02  5.95822260e-02  8.56339186e-02
  4.42064926e-02 -2.21412466e-03  6.65282682e-02  9.41809360e-03
 -2.66557792e-03  4.73841615e-02  7.51340482e-03  3.16242799e-02
  3.23010348e-02 -3.95922549e-02  2.21997537e-02 -6.789506

## Part 3: Define Recommender Func and try a query

In [17]:
# 3. Define Recommender Function
def recommend(query, df, model, top_k=5):
    query_vec = model.encode(query)
    all_embeddings = np.vstack(df['embeddings'].values)
    similarities = cosine_similarity([query_vec], all_embeddings)[0]
    
    top_indices = np.argsort(similarities)[-top_k:][::-1]
    print(all_embeddings)
    results = df.iloc[top_indices].copy()
    results['similarity'] = similarities[top_indices]
    return results

# 4. Try a Query
query = "core strength with dumbbells"
recommendations = recommend(query, df, model)
recommendations[['Title', 'Desc', 'similarity']]

[[-0.0877463  -0.01679156 -0.01126341 ... -0.02650267  0.03391294
   0.02897057]
 [-0.06407069  0.03136685 -0.04881882 ... -0.02043694  0.01391077
   0.05760772]
 [-0.06768111  0.04791671 -0.0612316  ... -0.02460235 -0.00984586
   0.05669298]
 ...
 [-0.04860245  0.00578411 -0.03043114 ...  0.03788218  0.05852997
   0.01382147]
 [-0.07165635 -0.02386736 -0.02217603 ...  0.03924169  0.04261698
   0.00120317]
 [-0.04580419 -0.02624023  0.0116608  ... -0.02723262 -0.03201497
   0.09843025]]


Unnamed: 0,Title,Desc,similarity
157,Side Lateral Raise,The dumbbell lateral raise is an upper body is...,0.569576
463,Straight-arm dumbbell pull-over,The straight-arm dumbbell pull-over is an exer...,0.569548
166,Dumbbell step-up,The dumbbell step-up is a great exercise for b...,0.568739
167,Dumbbell step-up,The dumbbell step-up is a great exercise for b...,0.568739
146,Dumbbell Bench Press,The dumbbell bench press is a mainstay of work...,0.556518


## Part X: Streamlit UI for recommendation

In [None]:
# import streamlit as st
# from sklearn.metrics.pairwise import cosine_similarity
# import numpy as np

# st.title("🏋️ Exercise Recommender")

# query = st.text_input("Describe your workout goal or need:")
# if st.button("Get Recommendations") and query:
#     query_vec = model.encode(query)
#     sims = cosine_similarity([query_vec], list(df['embeddings']))[0]
#     top_idx = np.argsort(sims)[-5:][::-1]
    
#     st.subheader("Top Exercise Matches:")
#     for i in top_idx:
#         st.write(f"**{df.iloc[i]['exercise_name']}**")
#         st.write(df.iloc[i]['description'])
