# PRIMARY FILTERING FUNCTIONS

Filters functions based on experience level, equipment, training modalities, risk level and pain points. Optimized version of the function is provided.

In [None]:
#from itertools import combinations
import pandas as pd

def filter_data(df, user_level, user_equipment, training_modalities, pain_points, age):
    """
    Filters exercises based on user level, equipment, training modalities, pain points, and age.

    Parameters:
    - df(DataFrame): Pandas DataFrame to be filtered based on user criteria.
    - user_level (str): User's experience level.
    - user_equipment (list): List of equipment available to the user.
    - training_modalities (list): List of desired training modalities.
    - pain_points (list): List of user's pain points.
    - age (int): User's age.

    Returns:
    - filtered_df (DataFrame): Filtered Pandas DataFrame based on user criteria.
    """
    
    # Adjust training modalities if user is at level 0
    if user_level == 0:
        training_modalities.append('MP')
    
    filtered_df = df.copy()  # Start with the full dataset to avoid overwriting the original
    
    # 1. Filter by Experience Level
    filtered_df = filtered_df[
        filtered_df['Level'].apply(lambda x: user_level in x)
    ]
    
    # User-provided equipment set
    user_equipment_set = set(user_equipment)
    print(user_equipment_set)

    # Equipment Filtering Logic
    filtered_df = filtered_df[
        filtered_df['Equipment'].apply(
            lambda subsets: any(
                set(subset).issubset(user_equipment_set) for subset in subsets if subset
            )
        )
    ]
    
    # 3. Filter by Training Modalities
    filtered_df = filtered_df[
        filtered_df['Exercise Purpose'].apply(
            lambda x: any(modality in x for modality in training_modalities)
        )
    ]
    
    # 4. Filter by Risk Level (if user is older than 50)
    if age > 50:
        filtered_df = filtered_df[
            filtered_df['Risk level'] <= 2
        ]
    
    # 5. Exclude Exercises Based on Pain Points
    filtered_df = filtered_df[
        ~filtered_df['Pain Exclusions'].apply(
            lambda x: any(pain in x for pain in pain_points)
        )
    ]
    
    return filtered_df

In [2]:
path = 'dataset.json'
df = pd.read_json(path)
df.head()

Unnamed: 0,Exercise,Main muscle(s),Secondary muscles,Movement,Type,Lower bound (lbs/resistance/time),Level,Difficulty,Equipment Type,Equipment,Prerequesite Exercise,Variations,Regression,Progression,Loading type (Asymmetrical / Symmetrical),Risk level,Exercise Purpose,"Force type (Push, Pull, Rotation, Isomatric)","Movement Pattern (Squat, Hinge, Lunge, Push, Pull, Carry, Twist)",Pain Exclusions
0,Wall push up,[Chest],"[Shoulders, triceps]",Push,Compound,0,[1],1,body,[[None]],[],"[kneeling push up, push up, close grip push up...",[],[kneeling push up],0,0,"[H, STR, ME]",[push],[push],"[Shoulder, Elbow]"
1,Kneeling push up,[Chest],"[Shoulders, triceps]",Push,Compound,0,"[1, 2]",1,body,[[None]],[],"[Wall push up, push up, loop band push up, clo...",[Wall push up],[push up],0,0,"[H, STR, ME]",[push],[push],"[Shoulder, Elbow]"
2,Push up,[Chest],"[Shoulders, triceps]",Push,Compound,0,"[2, 3, 4]",2,body,[[None]],[],"[Wall push up, kneeling push up, loop band pus...",[Kneeling push up],[loop band push up],0,1,"[H, STR, ME]",[push],[push],"[Shoulder, Elbow]"
3,Close grip push up,[Chest],"[Shoulders, triceps]",Push,Compound,0,"[2, 3, 4]",2,body,[[None]],[Push up],"[Wall push up, kneeling push up, loop band pus...",[Kneeling push up],[loop band push up],0,0,"[H, STR, ME]",[push],[push],"[Shoulder, Elbow]"
4,Loop band push up,[Chest],"[Shoulders, triceps]",Push,Compound,Light,"[2, 3, 4]",2,band,[[Loop band]],[push up],"[Wall push up, kneeling push up, push up, clos...",[push up],[],0,1,"[H, STR, ME]",[push],[push],"[Shoulder, Elbow]"


In [3]:
print(df['Equipment'].apply(type).unique())
print(df['Equipment'].head())


[<class 'list'>]
0         [[None]]
1         [[None]]
2         [[None]]
3         [[None]]
4    [[Loop band]]
Name: Equipment, dtype: object


In [4]:
result = filter_data(df=df,
    user_level='4',
    user_equipment=['Bench', 'Olympic barbell','None'],
    training_modalities=['H', 'STR', 'ME'],
    age=30,
    pain_points=[]
)

print("✅ Final Filtered Dataset:")
print(result.head())
print(f"Total rows after filtering: {result.shape[0]}")



{'None', 'Bench', 'Olympic barbell'}
✅ Final Filtered Dataset:
                Exercise Main muscle(s)     Secondary muscles Movement  \
2                Push up        [Chest]  [Shoulders, triceps]     Push   
3     Close grip push up        [Chest]  [Shoulders, triceps]     Push   
5        Deficit push up        [Chest]  [Shoulders, triceps]     Push   
17   Barbell bench press        [Chest]  [Shoulders, triceps]     Push   
18  Barbell larsen press        [Chest]  [Shoulders, triceps]     Push   

        Type Lower bound (lbs/resistance/time)      Level  Difficulty  \
2   Compound                                 0  [2, 3, 4]           2   
3   Compound                                 0  [2, 3, 4]           2   
5   Compound                                 0  [2, 3, 4]           2   
17  Compound                                45  [2, 3, 4]           2   
18  Compound                                45  [2, 3, 4]           2   

   Equipment Type                   Equipment  Prereq

# OPTIMIZED VERSION

In [None]:
def filter_data(df, user_level, user_equipment, training_modalities, age, pain_points):
    """
    Filters exercises based on user level, equipment, training modalities, pain points, and age.

    Parameters:
    - df (DataFrame): The dataset to filter.
    - user_level (str): User's experience level.
    - user_equipment (list): List of equipment available to the user.
    - training_modalities (list): List of desired training modalities.
    - age (int): User's age.
    - pain_points (list): List of user's pain points.

    Returns:
    - filtered_df (DataFrame): Filtered Pandas DataFrame based on user criteria.
    """
    # Ensure user equipment is a set for faster lookups
    user_equipment_set = set(user_equipment)
    
    # Apply filters
    filtered_df = df[
        (df['Level'].apply(lambda x: user_level in x)) &
        (df['Equipment'].apply(
            lambda subsets: any(
                set(subset).issubset(user_equipment_set) for subset in subsets if subset
            )
        )) &
        (df['Exercise Purpose'].apply(lambda x: any(modality in x for modality in training_modalities))) &
        (~df['Pain Exclusions'].apply(lambda x: any(pain in x for pain in pain_points))) &
        ((age <= 50) | (df['Risk level'] <= 2))
    ]
    
    return filtered_df


In [16]:
result = filter_data(df=df,
    user_level='4',
    user_equipment=['Bench', 'Olympic barbell','None'],
    training_modalities=['H', 'STR', 'ME'],
    age=30,
    pain_points=[]
)

print("✅ Final Filtered Dataset:")
print(result.head())
print(f"Total rows after filtering: {result.shape[0]}")


✅ Final Filtered Dataset:
                Exercise Main muscle(s)     Secondary muscles Movement  \
2                Push up        [Chest]  [Shoulders, triceps]     Push   
3     Close grip push up        [Chest]  [Shoulders, triceps]     Push   
5        Deficit push up        [Chest]  [Shoulders, triceps]     Push   
17   Barbell bench press        [Chest]  [Shoulders, triceps]     Push   
18  Barbell larsen press        [Chest]  [Shoulders, triceps]     Push   

        Type Lower bound (lbs/resistance/time)      Level  Difficulty  \
2   Compound                                 0  [2, 3, 4]           2   
3   Compound                                 0  [2, 3, 4]           2   
5   Compound                                 0  [2, 3, 4]           2   
17  Compound                                45  [2, 3, 4]           2   
18  Compound                                45  [2, 3, 4]           2   

   Equipment Type                   Equipment  Prerequesite Exercise  \
2            body 

# SECONDARY FILTERING FUNCTIONS

In [None]:
def filter_muscles(df, muscles):
    """
    Filters exercises based on muscle.

    Parameters:
    - df (DataFrame): The dataset to filter.
    - muscles (list): Keep these muscles on the dataset

    Returns:
    - filtered_df (DataFrame): Filtered Pandas DataFrame based on user criteria.
    """
    # Apply filters
  # 3. Filter by muscles
    filtered_df = df[
        df['Main muscle(s)'].apply(
            lambda x: any(muscle in x for muscle in muscles)
        )
    ]
    
    return filtered_df