# **Upload the Datasets**

In [None]:
# Create a Kaggle folder
!mkdir ~/.kaggle

In [None]:
# Upload kaggle.json
from google.colab import files
files.upload()

In [None]:
# Move kaggle.json to the kaggle folder
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
# Upload dataset 1 from Kaggle: "Fitness Exercises"
!kaggle datasets download -d omarxadel/fitness-exercises-dataset

Dataset URL: https://www.kaggle.com/datasets/omarxadel/fitness-exercises-dataset
License(s): MIT
fitness-exercises-dataset.zip: Skipping, found more recently modified local copy (use --force to force download)


In [None]:
# Unzip dataset 1
import zipfile
import os

with zipfile.ZipFile("fitness-exercises-dataset.zip", 'r') as zip_ref:
    zip_ref.extractall("fitness_dataset")

os.listdir("fitness_dataset")

['exercises.csv']

In [None]:
# Upload Dataset2.xlsx
from google.colab import files
files.upload()

# **Preprocessing of Dataset 1**

In [None]:
import pandas as pd

df = pd.read_csv("fitness_dataset/exercises.csv")
df.head()

Unnamed: 0,bodyPart,equipment,gifUrl,id,name,target,secondaryMuscles/0,secondaryMuscles/1,instructions/0,instructions/1,...,instructions/5,secondaryMuscles/2,instructions/6,instructions/7,secondaryMuscles/3,instructions/8,secondaryMuscles/4,instructions/9,secondaryMuscles/5,instructions/10
0,waist,body weight,https://v2.exercisedb.io/image/MOnK4iG0MEt9h8,1,3/4 sit-up,abs,hip flexors,lower back,Lie flat on your back with your knees bent and...,Place your hands behind your head with your el...,...,,,,,,,,,,
1,waist,body weight,https://v2.exercisedb.io/image/PERWLDGUxVbpHS,2,45° side bend,abs,obliques,,Stand with your feet shoulder-width apart and ...,Keeping your back straight and your core engag...,...,,,,,,,,,,
2,waist,body weight,https://v2.exercisedb.io/image/PLr4yo3j-f1amp,3,air bike,abs,hip flexors,,Lie flat on your back with your hands placed b...,Lift your legs off the ground and bend your kn...,...,,,,,,,,,,
3,upper legs,body weight,https://v2.exercisedb.io/image/XPQwM7HECjgNFE,1512,all fours squad stretch,quads,hamstrings,glutes,Start on all fours with your hands directly un...,"Extend one leg straight back, keeping your kne...",...,,,,,,,,,,
4,waist,body weight,https://v2.exercisedb.io/image/5nYph4eUGNiEdf,6,alternate heel touchers,abs,obliques,,Lie flat on your back with your knees bent and...,"Extend your arms straight out to the sides, pa...",...,,,,,,,,,,


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1324 entries, 0 to 1323
Data columns (total 23 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   bodyPart            1324 non-null   object
 1   equipment           1324 non-null   object
 2   gifUrl              1324 non-null   object
 3   id                  1324 non-null   int64 
 4   name                1324 non-null   object
 5   target              1324 non-null   object
 6   secondaryMuscles/0  1324 non-null   object
 7   secondaryMuscles/1  986 non-null    object
 8   instructions/0      1324 non-null   object
 9   instructions/1      1324 non-null   object
 10  instructions/2      1324 non-null   object
 11  instructions/3      1324 non-null   object
 12  instructions/4      1242 non-null   object
 13  instructions/5      739 non-null    object
 14  secondaryMuscles/2  233 non-null    object
 15  instructions/6      313 non-null    object
 16  instructions/7      92 n

In [None]:
# Removal of the columns id, gifURL, all instructions and nearly zero secondary muscles (2-5)
df = df.drop(columns=[
      "id",
      "gifUrl",
      "secondaryMuscles/2",
      "secondaryMuscles/3",
      "secondaryMuscles/4",
      "secondaryMuscles/5",
      "instructions/0",
      "instructions/1",
      "instructions/2",
      "instructions/3",
      "instructions/4",
      "instructions/5",
      "instructions/6",
      "instructions/7",
      "instructions/8",
      "instructions/9",
      "instructions/10"
])

In [None]:
df

Unnamed: 0,bodyPart,equipment,name,target,secondaryMuscles/0,secondaryMuscles/1
0,waist,body weight,3/4 sit-up,abs,hip flexors,lower back
1,waist,body weight,45° side bend,abs,obliques,
2,waist,body weight,air bike,abs,hip flexors,
3,upper legs,body weight,all fours squad stretch,quads,hamstrings,glutes
4,waist,body weight,alternate heel touchers,abs,obliques,
...,...,...,...,...,...,...
1319,chest,body weight,wide-grip chest dip on high parallel bars,pectorals,triceps,shoulders
1320,waist,body weight,wind sprints,abs,quadriceps,hamstrings
1321,upper legs,body weight,world greatest stretch,hamstrings,glutes,quadriceps
1322,lower arms,body weight,wrist circles,forearms,hands,wrists


In [None]:
# Rename Columns
df.rename(columns={"bodyPart" : "body_part"}, inplace=True)

In [None]:
df

Unnamed: 0,body_part,equipment,name,target,secondaryMuscles/0,secondaryMuscles/1
0,waist,body weight,3/4 sit-up,abs,hip flexors,lower back
1,waist,body weight,45° side bend,abs,obliques,
2,waist,body weight,air bike,abs,hip flexors,
3,upper legs,body weight,all fours squad stretch,quads,hamstrings,glutes
4,waist,body weight,alternate heel touchers,abs,obliques,
...,...,...,...,...,...,...
1319,chest,body weight,wide-grip chest dip on high parallel bars,pectorals,triceps,shoulders
1320,waist,body weight,wind sprints,abs,quadriceps,hamstrings
1321,upper legs,body weight,world greatest stretch,hamstrings,glutes,quadriceps
1322,lower arms,body weight,wrist circles,forearms,hands,wrists


In [None]:
# Merge the remaining columns of secondary muscles

## Identify the columns of secondary muscles that will be removed
sec_cols = [col for col in df.columns if col.startswith("secondaryMuscles/")]

df["secondary_muscles_full"] = df[sec_cols].apply(
    lambda x: ", ".join([str(i) for i in x if pd.notnull(i)]),
    axis=1
)

# Removal of unnecessary secondary muscle columns
df.drop(columns=sec_cols, inplace=True)

In [None]:
df

Unnamed: 0,body_part,equipment,name,target,secondary_muscles_full
0,waist,body weight,3/4 sit-up,abs,"hip flexors, lower back"
1,waist,body weight,45° side bend,abs,obliques
2,waist,body weight,air bike,abs,hip flexors
3,upper legs,body weight,all fours squad stretch,quads,"hamstrings, glutes"
4,waist,body weight,alternate heel touchers,abs,obliques
...,...,...,...,...,...
1319,chest,body weight,wide-grip chest dip on high parallel bars,pectorals,"triceps, shoulders"
1320,waist,body weight,wind sprints,abs,"quadriceps, hamstrings"
1321,upper legs,body weight,world greatest stretch,hamstrings,"glutes, quadriceps"
1322,lower arms,body weight,wrist circles,forearms,"hands, wrists"


In [None]:
# Add a difficulty column (beginner, intermediate, advanced)
def classify_difficulty(equipment, target):

    beginner_equip = ['body weight', 'assisted', 'ball']
    intermediate_equip = ['dumbbell', 'band', 'rope', 'roller', 'cable']
    advanced_equip = ['barbell', 'smith machine', 'kettlebell',
                      'trap bar', 'sled machine', 'stepmill machine']

    easy_targets = ['abs', 'biceps', 'forearms']
    medium_targets = ['quads', 'hamstrings', 'glutes', 'adductors', 'delts']
    hard_targets = ['pectorals', 'lats', 'traps', 'upper back', 'spine']

    # Classify based on the equipment
    if any(eq in equipment for eq in beginner_equip):
        difficulty = "beginner"
    elif any(eq in equipment for eq in intermediate_equip):
        difficulty = "intermediate"
    elif any(eq in equipment for eq in advanced_equip):
        difficulty = "advanced"
    else:
        difficulty = "intermediate"

    # Adjust based on the target
    if target in easy_targets and difficulty == "intermediate":
        return "beginner"
    elif target in hard_targets and difficulty == "intermediate":
        return "advanced"
    else:
        return difficulty

# Apply on df
df["difficulty"] = df.apply(lambda x: classify_difficulty(x["equipment"], x["target"]), axis=1)


In [None]:
# See how many beginner, intermediate and advanced exercices we have in dataset 1
df.groupby("difficulty").count()

Unnamed: 0_level_0,body_part,equipment,name,target,secondary_muscles_full
difficulty,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
advanced,473,473,473,473,473
beginner,574,574,574,574,574
intermediate,277,277,277,277,277


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1324 entries, 0 to 1323
Data columns (total 6 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   body_part               1324 non-null   object
 1   equipment               1324 non-null   object
 2   name                    1324 non-null   object
 3   target                  1324 non-null   object
 4   secondary_muscles_full  1324 non-null   object
 5   difficulty              1324 non-null   object
dtypes: object(6)
memory usage: 62.2+ KB


# **Preprocessing of Dataset 2**

In [None]:
# Upload dataset 2 into pandas
df2 = pd.read_excel("Dataset2.xlsx")
df2.head()

Unnamed: 0.1,Unnamed: 0,Exercise,Short YouTube Demonstration,In-Depth YouTube Explanation,Difficulty Level,Target Muscle Group,Prime Mover Muscle,Secondary Muscle,Tertiary Muscle,Primary Equipment,...,Movement Pattern #2,Movement Pattern #3,Plane Of Motion #1,Plane Of Motion #2,Plane Of Motion #3,Body Region,Force Type,Mechanics,Laterality,Primary Exercise Classification
0,,Stability Ball Dead Bug,Video Demonstration,Video Explanation,Beginner,Abdominals,Rectus Abdominis,Obliques,Rectus Femoris,Stability Ball,...,,,Sagittal Plane,,,Core,Other,Compound,Contralateral,Postural
1,,Bodyweight Glute Bridge,Video Demonstration,Video Explanation,Beginner,Glutes,Gluteus Maximus,Biceps Femoris,Erector Spinae,Bodyweight,...,,,Sagittal Plane,,,Lower Body,Unsorted*,Compound,Bilateral,Bodybuilding
2,,Bodyweight Bird Dog,Video Demonstration,Video Explanation,Beginner,Abdominals,Rectus Abdominis,Gluteus Maximus,Erector Spinae,Bodyweight,...,,,Sagittal Plane,,,Core,Other,Compound,Contralateral,Postural
3,,Stability Ball Russian Twist,Video Demonstration,Video Explanation,Beginner,Abdominals,Obliques,Rectus Abdominis,Iliopsoas,Stability Ball,...,,,Transverse Plane,,,Core,Other,Compound,Bilateral,Unsorted*
4,,Stability Ball Feet Elevated Crunch,Video Demonstration,Video Explanation,Beginner,Abdominals,Rectus Abdominis,Iliopsoas,Obliques,Stability Ball,...,,,Sagittal Plane,,,Core,Other,Isolation,Bilateral,Unsorted*


In [None]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3242 entries, 0 to 3241
Data columns (total 32 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Unnamed: 0                       0 non-null      float64
 1   Exercise                         3242 non-null   object 
 2   Short YouTube Demonstration      2013 non-null   object 
 3   In-Depth YouTube Explanation     950 non-null    object 
 4   Difficulty Level                 3242 non-null   object 
 5   Target Muscle Group              3242 non-null   object 
 6   Prime Mover Muscle               3242 non-null   object 
 7   Secondary Muscle                 2831 non-null   object 
 8   Tertiary Muscle                  2047 non-null   object 
 9   Primary Equipment                3242 non-null   object 
 10  # Primary Items                  3242 non-null   int64  
 11  Secondary Equipment              688 non-null    object 
 12  # Secondary Items   

In [None]:
# See how many difficulty levels we have and how many exercices for each level
df2.groupby("Difficulty Level").count()

Unnamed: 0_level_0,Unnamed: 0,Exercise,Short YouTube Demonstration,In-Depth YouTube Explanation,Target Muscle Group,Prime Mover Muscle,Secondary Muscle,Tertiary Muscle,Primary Equipment,# Primary Items,...,Movement Pattern #2,Movement Pattern #3,Plane Of Motion #1,Plane Of Motion #2,Plane Of Motion #3,Body Region,Force Type,Mechanics,Laterality,Primary Exercise Classification
Difficulty Level,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Advanced,0,462,208,93,462,462,396,250,462,462,...,99,9,462,13,1,462,462,461,462,462
Beginner,0,414,360,195,414,414,371,284,414,414,...,15,2,411,10,0,414,414,413,414,414
Expert,0,126,70,39,126,126,91,39,126,126,...,30,4,126,4,0,126,126,126,126,126
Grand Master,0,8,4,1,8,8,4,3,8,8,...,3,0,8,0,0,8,8,8,8,8
Intermediate,0,1092,622,303,1092,1092,961,650,1092,1092,...,195,20,1091,22,0,1092,1092,1087,1092,1092
Legendary,0,3,1,0,3,3,3,3,3,3,...,3,0,3,0,0,3,3,3,3,3
Master,0,32,14,8,32,32,23,6,32,32,...,5,0,32,0,0,32,32,32,32,32
Novice,0,1105,734,311,1105,1105,982,812,1105,1105,...,111,2,1105,4,0,1105,1105,1099,1105,1105


In [None]:
# Display the Columns
print(df2.columns.tolist())

['Unnamed: 0', 'Exercise', 'Short YouTube Demonstration', 'In-Depth YouTube Explanation', 'Difficulty Level', 'Target Muscle Group ', 'Prime Mover Muscle', 'Secondary Muscle', 'Tertiary Muscle', 'Primary Equipment ', '# Primary Items', 'Secondary Equipment', '# Secondary Items', 'Posture', 'Single or Double Arm', 'Continuous or Alternating Arms ', 'Grip', 'Load Position (Ending)', 'Continuous or Alternating Legs ', 'Foot Elevation', 'Combination Exercises', 'Movement Pattern #1', 'Movement Pattern #2', 'Movement Pattern #3', 'Plane Of Motion #1', 'Plane Of Motion #2', 'Plane Of Motion #3', 'Body Region', 'Force Type', 'Mechanics', 'Laterality', 'Primary Exercise Classification']


In [None]:
# Deletion of unnecessary columns
df2.drop([
    "Unnamed: 0",
    "Short YouTube Demonstration",
    "In-Depth YouTube Explanation",
    "# Primary Items",
    "# Secondary Items",
    "Posture",
    "Single or Double Arm",
    "Continuous or Alternating Arms ",
    "Grip",
    "Load Position (Ending)",
    "Continuous or Alternating Legs ",
    "Foot Elevation",
    "Combination Exercises",
    "Movement Pattern #1",
    "Movement Pattern #2",
    "Movement Pattern #3",
    "Plane Of Motion #1",
    "Plane Of Motion #2",
    "Plane Of Motion #3",
    "Force Type",
    "Mechanics",
    "Laterality",
    "Primary Exercise Classification"
], axis=1, inplace=True)

In [None]:
df2.head()

Unnamed: 0,Exercise,Difficulty Level,Target Muscle Group,Prime Mover Muscle,Secondary Muscle,Tertiary Muscle,Primary Equipment,Secondary Equipment,Body Region
0,Stability Ball Dead Bug,Beginner,Abdominals,Rectus Abdominis,Obliques,Rectus Femoris,Stability Ball,,Core
1,Bodyweight Glute Bridge,Beginner,Glutes,Gluteus Maximus,Biceps Femoris,Erector Spinae,Bodyweight,,Lower Body
2,Bodyweight Bird Dog,Beginner,Abdominals,Rectus Abdominis,Gluteus Maximus,Erector Spinae,Bodyweight,,Core
3,Stability Ball Russian Twist,Beginner,Abdominals,Obliques,Rectus Abdominis,Iliopsoas,Stability Ball,,Core
4,Stability Ball Feet Elevated Crunch,Beginner,Abdominals,Rectus Abdominis,Iliopsoas,Obliques,Stability Ball,,Core


In [None]:
# Remove the unnecessary spaces from the columns names
df2.columns = df2.columns.str.strip().str.lower()
print(df2.columns.tolist())

['exercise', 'difficulty level', 'target muscle group', 'prime mover muscle', 'secondary muscle', 'tertiary muscle', 'primary equipment', 'secondary equipment', 'body region']


In [None]:
# Rename Target Muscle Group -> target
df2.rename(columns={"target muscle group": "target"}, inplace=True)
df2

Unnamed: 0,exercise,difficulty level,target,prime mover muscle,secondary muscle,tertiary muscle,primary equipment,secondary equipment,body region
0,Stability Ball Dead Bug,Beginner,Abdominals,Rectus Abdominis,Obliques,Rectus Femoris,Stability Ball,,Core
1,Bodyweight Glute Bridge,Beginner,Glutes,Gluteus Maximus,Biceps Femoris,Erector Spinae,Bodyweight,,Lower Body
2,Bodyweight Bird Dog,Beginner,Abdominals,Rectus Abdominis,Gluteus Maximus,Erector Spinae,Bodyweight,,Core
3,Stability Ball Russian Twist,Beginner,Abdominals,Obliques,Rectus Abdominis,Iliopsoas,Stability Ball,,Core
4,Stability Ball Feet Elevated Crunch,Beginner,Abdominals,Rectus Abdominis,Iliopsoas,Obliques,Stability Ball,,Core
...,...,...,...,...,...,...,...,...,...
3237,Slider Sandbag Front Rack Reverse Lunge,Novice,Quadriceps,Quadriceps Femoris,Gluteus Maximus,Adductor Magnus,Sliders,Sandbag,Lower Body
3238,Slider Sandbag Back Rack Reverse Lunge,Novice,Quadriceps,Quadriceps Femoris,Gluteus Maximus,Adductor Magnus,Sliders,Sandbag,Lower Body
3239,Cable Straight Bar Reverse Grip Front Raise,Novice,Shoulders,Anterior Deltoids,Pectoralis Major,,Cable,,Upper Body
3240,EZ Bar Reverse Grip Front Raise,Novice,Shoulders,Anterior Deltoids,Pectoralis Major,,EZ Bar,,Upper Body


In [None]:
# Merge the Secondary Muscles Columns
sec_cols = ["prime mover muscle" , "secondary muscle" , "tertiary muscle"]

df2["secondary_muscles_full"] = df2[sec_cols].apply(
    lambda x: ", ".join([str(i) for i in x if pd.notnull(i)]),
    axis=1
)

df2.drop(columns=sec_cols, inplace=True)


In [None]:
df2

Unnamed: 0,exercise,difficulty level,target,primary equipment,secondary equipment,body region,secondary_muscles_full
0,Stability Ball Dead Bug,Beginner,Abdominals,Stability Ball,,Core,"Rectus Abdominis, Obliques, Rectus Femoris"
1,Bodyweight Glute Bridge,Beginner,Glutes,Bodyweight,,Lower Body,"Gluteus Maximus, Biceps Femoris, Erector Spinae"
2,Bodyweight Bird Dog,Beginner,Abdominals,Bodyweight,,Core,"Rectus Abdominis, Gluteus Maximus, Erector Spinae"
3,Stability Ball Russian Twist,Beginner,Abdominals,Stability Ball,,Core,"Obliques, Rectus Abdominis, Iliopsoas"
4,Stability Ball Feet Elevated Crunch,Beginner,Abdominals,Stability Ball,,Core,"Rectus Abdominis, Iliopsoas, Obliques"
...,...,...,...,...,...,...,...
3237,Slider Sandbag Front Rack Reverse Lunge,Novice,Quadriceps,Sliders,Sandbag,Lower Body,"Quadriceps Femoris, Gluteus Maximus, Adductor ..."
3238,Slider Sandbag Back Rack Reverse Lunge,Novice,Quadriceps,Sliders,Sandbag,Lower Body,"Quadriceps Femoris, Gluteus Maximus, Adductor ..."
3239,Cable Straight Bar Reverse Grip Front Raise,Novice,Shoulders,Cable,,Upper Body,"Anterior Deltoids, Pectoralis Major"
3240,EZ Bar Reverse Grip Front Raise,Novice,Shoulders,EZ Bar,,Upper Body,"Anterior Deltoids, Pectoralis Major"


In [None]:
# Merge the Equipment Columns
equ_cols = ["primary equipment" , "secondary equipment"]

df2["equipment"] = df2[equ_cols].apply(
    lambda x: ", ".join([str(i) for i in x if pd.notnull(i)]),
    axis=1
)

df2.drop(columns=equ_cols, inplace=True)

In [None]:
df2

Unnamed: 0,exercise,difficulty level,target,body region,secondary_muscles_full,equipment
0,Stability Ball Dead Bug,Beginner,Abdominals,Core,"Rectus Abdominis, Obliques, Rectus Femoris",Stability Ball
1,Bodyweight Glute Bridge,Beginner,Glutes,Lower Body,"Gluteus Maximus, Biceps Femoris, Erector Spinae",Bodyweight
2,Bodyweight Bird Dog,Beginner,Abdominals,Core,"Rectus Abdominis, Gluteus Maximus, Erector Spinae",Bodyweight
3,Stability Ball Russian Twist,Beginner,Abdominals,Core,"Obliques, Rectus Abdominis, Iliopsoas",Stability Ball
4,Stability Ball Feet Elevated Crunch,Beginner,Abdominals,Core,"Rectus Abdominis, Iliopsoas, Obliques",Stability Ball
...,...,...,...,...,...,...
3237,Slider Sandbag Front Rack Reverse Lunge,Novice,Quadriceps,Lower Body,"Quadriceps Femoris, Gluteus Maximus, Adductor ...","Sliders, Sandbag"
3238,Slider Sandbag Back Rack Reverse Lunge,Novice,Quadriceps,Lower Body,"Quadriceps Femoris, Gluteus Maximus, Adductor ...","Sliders, Sandbag"
3239,Cable Straight Bar Reverse Grip Front Raise,Novice,Shoulders,Upper Body,"Anterior Deltoids, Pectoralis Major",Cable
3240,EZ Bar Reverse Grip Front Raise,Novice,Shoulders,Upper Body,"Anterior Deltoids, Pectoralis Major",EZ Bar


In [None]:
# Rename Columns
df2.rename(columns={"exercise": "name" , "difficulty level": "difficulty" , "body region" : "body_part"}, inplace=True)
df2

Unnamed: 0,name,difficulty,target,body_part,secondary_muscles_full,equipment
0,Stability Ball Dead Bug,Beginner,Abdominals,Core,"Rectus Abdominis, Obliques, Rectus Femoris",Stability Ball
1,Bodyweight Glute Bridge,Beginner,Glutes,Lower Body,"Gluteus Maximus, Biceps Femoris, Erector Spinae",Bodyweight
2,Bodyweight Bird Dog,Beginner,Abdominals,Core,"Rectus Abdominis, Gluteus Maximus, Erector Spinae",Bodyweight
3,Stability Ball Russian Twist,Beginner,Abdominals,Core,"Obliques, Rectus Abdominis, Iliopsoas",Stability Ball
4,Stability Ball Feet Elevated Crunch,Beginner,Abdominals,Core,"Rectus Abdominis, Iliopsoas, Obliques",Stability Ball
...,...,...,...,...,...,...
3237,Slider Sandbag Front Rack Reverse Lunge,Novice,Quadriceps,Lower Body,"Quadriceps Femoris, Gluteus Maximus, Adductor ...","Sliders, Sandbag"
3238,Slider Sandbag Back Rack Reverse Lunge,Novice,Quadriceps,Lower Body,"Quadriceps Femoris, Gluteus Maximus, Adductor ...","Sliders, Sandbag"
3239,Cable Straight Bar Reverse Grip Front Raise,Novice,Shoulders,Upper Body,"Anterior Deltoids, Pectoralis Major",Cable
3240,EZ Bar Reverse Grip Front Raise,Novice,Shoulders,Upper Body,"Anterior Deltoids, Pectoralis Major",EZ Bar


In [None]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3242 entries, 0 to 3241
Data columns (total 6 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   name                    3242 non-null   object
 1   difficulty              3242 non-null   object
 2   target                  3242 non-null   object
 3   body_part               3242 non-null   object
 4   secondary_muscles_full  3242 non-null   object
 5   equipment               3242 non-null   object
dtypes: object(6)
memory usage: 152.1+ KB


# **Concatenate the 2 Datasets**

In [None]:
# Concatenate
df_all = pd.concat([df, df2])
df_all

Unnamed: 0,body_part,equipment,name,target,secondary_muscles_full,difficulty
0,waist,body weight,3/4 sit-up,abs,"hip flexors, lower back",beginner
1,waist,body weight,45° side bend,abs,obliques,beginner
2,waist,body weight,air bike,abs,hip flexors,beginner
3,upper legs,body weight,all fours squad stretch,quads,"hamstrings, glutes",beginner
4,waist,body weight,alternate heel touchers,abs,obliques,beginner
...,...,...,...,...,...,...
3237,Lower Body,"Sliders, Sandbag",Slider Sandbag Front Rack Reverse Lunge,Quadriceps,"Quadriceps Femoris, Gluteus Maximus, Adductor ...",Novice
3238,Lower Body,"Sliders, Sandbag",Slider Sandbag Back Rack Reverse Lunge,Quadriceps,"Quadriceps Femoris, Gluteus Maximus, Adductor ...",Novice
3239,Upper Body,Cable,Cable Straight Bar Reverse Grip Front Raise,Shoulders,"Anterior Deltoids, Pectoralis Major",Novice
3240,Upper Body,EZ Bar,EZ Bar Reverse Grip Front Raise,Shoulders,"Anterior Deltoids, Pectoralis Major",Novice


In [None]:
df_all.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4566 entries, 0 to 3241
Data columns (total 6 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   body_part               4566 non-null   object
 1   equipment               4566 non-null   object
 2   name                    4566 non-null   object
 3   target                  4566 non-null   object
 4   secondary_muscles_full  4566 non-null   object
 5   difficulty              4566 non-null   object
dtypes: object(6)
memory usage: 249.7+ KB


In [None]:
# Delete Duplicates
df_all.drop_duplicates(subset=["name"], inplace=True)

In [None]:
df_all.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4558 entries, 0 to 3241
Data columns (total 6 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   body_part               4558 non-null   object
 1   equipment               4558 non-null   object
 2   name                    4558 non-null   object
 3   target                  4558 non-null   object
 4   secondary_muscles_full  4558 non-null   object
 5   difficulty              4558 non-null   object
dtypes: object(6)
memory usage: 249.3+ KB


In [None]:
# Make all the info in lower case
df_all = df_all.applymap(lambda x: x.lower() if isinstance(x, str) else x)
df_all

  df_all = df_all.applymap(lambda x: x.lower() if isinstance(x, str) else x)


Unnamed: 0,body_part,equipment,name,target,secondary_muscles_full,difficulty
0,waist,body weight,3/4 sit-up,abs,"hip flexors, lower back",beginner
1,waist,body weight,45° side bend,abs,obliques,beginner
2,waist,body weight,air bike,abs,hip flexors,beginner
3,upper legs,body weight,all fours squad stretch,quads,"hamstrings, glutes",beginner
4,waist,body weight,alternate heel touchers,abs,obliques,beginner
...,...,...,...,...,...,...
3237,lower body,"sliders, sandbag",slider sandbag front rack reverse lunge,quadriceps,"quadriceps femoris, gluteus maximus, adductor ...",novice
3238,lower body,"sliders, sandbag",slider sandbag back rack reverse lunge,quadriceps,"quadriceps femoris, gluteus maximus, adductor ...",novice
3239,upper body,cable,cable straight bar reverse grip front raise,shoulders,"anterior deltoids, pectoralis major",novice
3240,upper body,ez bar,ez bar reverse grip front raise,shoulders,"anterior deltoids, pectoralis major",novice


In [None]:
df_all.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4558 entries, 0 to 3241
Data columns (total 6 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   body_part               4558 non-null   object
 1   equipment               4558 non-null   object
 2   name                    4558 non-null   object
 3   target                  4558 non-null   object
 4   secondary_muscles_full  4558 non-null   object
 5   difficulty              4558 non-null   object
dtypes: object(6)
memory usage: 249.3+ KB


In [None]:
# Make the difficulty levels united: beginner, intermediate and advanced
df_all["difficulty"].replace({
    "novice": "beginner",
    "master": "advanced",
    "legendary": "advanced",
    "grand master": "advanced",
    "expert": "advanced"
}, inplace=True)


In [None]:
df_all.groupby("difficulty").count()

Unnamed: 0_level_0,body_part,equipment,name,target,secondary_muscles_full
difficulty,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
advanced,1100,1100,1100,1100,1100
beginner,2090,2090,2090,2090,2090
intermediate,1368,1368,1368,1368,1368


In [None]:
df_all.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4558 entries, 0 to 3241
Data columns (total 6 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   body_part               4558 non-null   object
 1   equipment               4558 non-null   object
 2   name                    4558 non-null   object
 3   target                  4558 non-null   object
 4   secondary_muscles_full  4558 non-null   object
 5   difficulty              4558 non-null   object
dtypes: object(6)
memory usage: 249.3+ KB


In [None]:
# Get the unique values of the column "target"
print(df_all["target"].unique())

['abs' 'quads' 'lats' 'calves' 'pectorals' 'glutes' 'hamstrings'
 'adductors' 'triceps' 'cardiovascular system' 'spine' 'upper back'
 'biceps' 'delts' 'forearms' 'traps' 'serratus anterior' 'abductors'
 'levator scapulae' 'abdominals' 'chest' 'hip flexors' 'shoulders' 'back'
 'quadriceps' 'trapezius' 'shins' 'trapezius ']


In [None]:
# Unify duplicate targets
df_all['target'] = df_all['target'].replace({
    'abs': 'abdominals',
    'quads': 'quadriceps',
    'traps': 'trapezius',
    'chest': 'pectorals',
    'delts': 'shoulders'
})

In [None]:
df_all.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4558 entries, 0 to 3241
Data columns (total 6 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   body_part               4558 non-null   object
 1   equipment               4558 non-null   object
 2   name                    4558 non-null   object
 3   target                  4558 non-null   object
 4   secondary_muscles_full  4558 non-null   object
 5   difficulty              4558 non-null   object
dtypes: object(6)
memory usage: 249.3+ KB


In [None]:
df_all.groupby("target").count()

Unnamed: 0_level_0,body_part,equipment,name,secondary_muscles_full,difficulty
target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
abdominals,601,601,601,601,601
abductors,27,27,27,27,27
adductors,26,26,26,26,26
back,182,182,182,182,182
biceps,264,264,264,264,264
calves,106,106,106,106,106
cardiovascular system,29,29,29,29,29
forearms,65,65,65,65,65
glutes,325,325,325,325,325
hamstrings,68,68,68,68,68


In [None]:
df_all['target'].unique()

array(['abdominals', 'quadriceps', 'lats', 'calves', 'pectorals',
       'glutes', 'hamstrings', 'adductors', 'triceps',
       'cardiovascular system', 'spine', 'upper back', 'biceps',
       'shoulders', 'forearms', 'trapezius', 'serratus anterior',
       'abductors', 'levator scapulae', 'hip flexors', 'back', 'shins',
       'trapezius '], dtype=object)

In [None]:
# trapezius (with space) -> trapezius without space
df_all["target"].replace(
    {"trapezius ": "trapezius"}
,inplace=True)

In [None]:
df_all.groupby("target").count()

Unnamed: 0_level_0,body_part,equipment,name,secondary_muscles_full,difficulty
target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
abdominals,601,601,601,601,601
abductors,27,27,27,27,27
adductors,26,26,26,26,26
back,182,182,182,182,182
biceps,264,264,264,264,264
calves,106,106,106,106,106
cardiovascular system,29,29,29,29,29
forearms,65,65,65,65,65
glutes,325,325,325,325,325
hamstrings,68,68,68,68,68


In [None]:
df_all.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4558 entries, 0 to 3241
Data columns (total 6 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   body_part               4558 non-null   object
 1   equipment               4558 non-null   object
 2   name                    4558 non-null   object
 3   target                  4558 non-null   object
 4   secondary_muscles_full  4558 non-null   object
 5   difficulty              4558 non-null   object
dtypes: object(6)
memory usage: 249.3+ KB


# **Add the columns "sets" and "reps"**

In [None]:
# Generate the sets and reps based on the difficulty levels
import random

def generate_sets_reps(difficulty):
    if difficulty == "beginner":
        sets = random.randint(2, 3)
        reps = random.choice(["10-12", "12-15"])
    elif difficulty == "intermediate":
        sets = random.randint(3, 4)
        reps = random.choice(["12-15", "15-18"])
    else:
        sets = random.randint(4, 5)
        reps = random.choice(["8-10", "10-12"])
    return sets, reps

In [None]:
# Apply and add these 2 columns
df_all[["sets", "reps"]] = df_all["difficulty"].apply(
    lambda d: pd.Series(generate_sets_reps(d))
)

In [None]:
df_all

Unnamed: 0,body_part,equipment,name,target,secondary_muscles_full,difficulty,sets,reps
0,waist,body weight,3/4 sit-up,abdominals,"hip flexors, lower back",beginner,3,10-12
1,waist,body weight,45° side bend,abdominals,obliques,beginner,3,10-12
2,waist,body weight,air bike,abdominals,hip flexors,beginner,2,10-12
3,upper legs,body weight,all fours squad stretch,quadriceps,"hamstrings, glutes",beginner,3,10-12
4,waist,body weight,alternate heel touchers,abdominals,obliques,beginner,2,12-15
...,...,...,...,...,...,...,...,...
3237,lower body,"sliders, sandbag",slider sandbag front rack reverse lunge,quadriceps,"quadriceps femoris, gluteus maximus, adductor ...",beginner,2,12-15
3238,lower body,"sliders, sandbag",slider sandbag back rack reverse lunge,quadriceps,"quadriceps femoris, gluteus maximus, adductor ...",beginner,3,12-15
3239,upper body,cable,cable straight bar reverse grip front raise,shoulders,"anterior deltoids, pectoralis major",beginner,2,12-15
3240,upper body,ez bar,ez bar reverse grip front raise,shoulders,"anterior deltoids, pectoralis major",beginner,2,10-12


In [None]:
df_all.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4558 entries, 0 to 3241
Data columns (total 8 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   body_part               4558 non-null   object
 1   equipment               4558 non-null   object
 2   name                    4558 non-null   object
 3   target                  4558 non-null   object
 4   secondary_muscles_full  4558 non-null   object
 5   difficulty              4558 non-null   object
 6   sets                    4558 non-null   int64 
 7   reps                    4558 non-null   object
dtypes: int64(1), object(7)
memory usage: 320.5+ KB


# **Save the Resulting Dataset**

In [None]:
df_all.to_csv("fitgenai_dataset.csv", index=False)