In [3]:
import pandas as pd
import numpy as np

In [4]:
data = pd.read_csv('musclewiki_exercises_with_type.csv')
data.head()

Unnamed: 0,Muscle Group,Exercise Name,Instructions,Exercise URL,Video URL,Detailed How-To,Difficulty,Tags,Exercise Type
0,biceps,Chin Ups,1\nGrab the bar shoulder width apart with a su...,https://musclewiki.com/bodyweight/male/biceps/...,https://media.musclewiki.com/media/uploads/vid...,How To Perform The Chin Up\n\nSetup\n\nGrab th...,Intermediate,"How To Do A Chin Ups, intermediate Bodyweight ...",Muscle Building
1,biceps,Cable Bayesian Curl,1\nUse a handle attachment. The cable should b...,https://musclewiki.com/cables/male/biceps/cabl...,https://media.musclewiki.com/media/uploads/vid...,,Beginner,"Biceps Exercise, Bicep Exercise, Arm Exercise",Muscle Building
2,biceps,Dumbbell Curl,1\nStand up straight with a dumbbell in each h...,https://musclewiki.com/dumbbells/male/biceps/d...,https://media.musclewiki.com/media/uploads/vid...,How To Perform the Dumbbell Bicep Curl\n\nSetu...,Novice,"Biceps Exercise, beginner Dumbbells exercise, ...",Muscle Building
3,biceps,Dumbbell Hammer Curl,1\nHold the dumbbells with a neutral grip (thu...,https://musclewiki.com/dumbbells/male/biceps/d...,https://media.musclewiki.com/media/uploads/vid...,,Novice,"Biceps Exercise, beginner Dumbbells exercise, ...",Muscle Building
4,biceps,Dumbbell Reverse Curl,1\nGrab the dumbbells with a pronated (overhan...,https://musclewiki.com/dumbbells/male/biceps/d...,https://media.musclewiki.com/media/uploads/vid...,,Novice,"How to do Dumbbell Reverse Curl, How To Do A D...",Muscle Building


#MISSING Values

In [5]:
data.isna().sum()

Muscle Group          0
Exercise Name         0
Instructions         28
Exercise URL          0
Video URL            34
Detailed How-To    2486
Difficulty           19
Tags                 49
Exercise Type         0
dtype: int64

In [6]:
missing_data_df = data[data['Instructions'].isna() | data['Difficulty'].isna()]



CSV file 'exercises_missing_instructions_or_difficulty.csv' created successfully.


In [8]:
output_filename = 'musclewiki_exercises_combined_with_filled_data.csv'
data_fixed = pd.read_csv('updated_exercises_with_instructions_and_difficulty.csv')

df_merged = pd.merge(
    data,
    data_fixed[['Exercise Name', 'Instructions', 'Difficulty']],
    on='Exercise Name',
    how='left',
    suffixes=('', '_updates') # Use empty string for left df to keep original names, and _updates for right df
)

# Fill missing values in the original 'Instructions' column using values from 'Instructions_updates'.
df_merged['Instructions'] = df_merged['Instructions'].fillna(df_merged['Instructions_updates'])

# Fill missing values in the original 'Difficulty' column using values from 'Difficulty_updates'.
df_merged['Difficulty'] = df_merged['Difficulty'].fillna(df_merged['Difficulty_updates'])

# Drop the temporary '_updates' columns which are no longer needed after filling.
df_merged = df_merged.drop(columns=['Instructions_updates', 'Difficulty_updates'])

# Drop any columns that originally ended with '_filled' from the DataFrame.
columns_to_drop_filled = [col for col in df_merged.columns if col.endswith('_filled')]
df_final = df_merged.drop(columns=columns_to_drop_filled)




Combined and cleaned data saved to final_combined_exercises_revised.csv


In [9]:
df_final.isna().sum()

Muscle Group          0
Exercise Name         0
Instructions          0
Exercise URL          0
Video URL            46
Detailed How-To    2504
Difficulty            0
Tags                 61
Exercise Type         0
dtype: int64

In [10]:
placeholder_url = 'https://placehold.co/640x360/png?text=No+Video+Available'
df_final['Video URL'] = df_final['Video URL'].fillna(placeholder_url)

In [11]:
df_final.isna().sum()

Muscle Group          0
Exercise Name         0
Instructions          0
Exercise URL          0
Video URL             0
Detailed How-To    2504
Difficulty            0
Tags                 61
Exercise Type         0
dtype: int64

In [12]:
# Ensure the required columns exist
required_columns = ['Instructions', 'Detailed How-To', 'Tags']

# Fill any potential NaN values with empty strings to avoid concatenation errors
df_final['Instructions'] = df_final['Instructions'].fillna('')
df_final['Detailed How-To'] = df_final['Detailed How-To'].fillna('')
df_final['Tags'] = df_final['Tags'].fillna('')

# Combine the columns into a new 'Combined_Text' column
# We'll separate them with a clear delimiter like "\n\n" for readability
df_final['Combined_Text'] = df_final['Instructions'] + "\n\n" + df_final['Detailed How-To'] + "\n\n" + df_final['Tags']

df_fin = df_final

In [13]:
df_fin.drop(columns=['Instructions', 'Detailed How-To', 'Tags'], inplace=True)
df_fin.head()

Unnamed: 0,Muscle Group,Exercise Name,Exercise URL,Video URL,Difficulty,Exercise Type,Combined_Text
0,biceps,Chin Ups,https://musclewiki.com/bodyweight/male/biceps/...,https://media.musclewiki.com/media/uploads/vid...,Intermediate,Muscle Building,1\nGrab the bar shoulder width apart with a su...
1,biceps,Cable Bayesian Curl,https://musclewiki.com/cables/male/biceps/cabl...,https://media.musclewiki.com/media/uploads/vid...,Beginner,Muscle Building,1\nUse a handle attachment. The cable should b...
2,biceps,Dumbbell Curl,https://musclewiki.com/dumbbells/male/biceps/d...,https://media.musclewiki.com/media/uploads/vid...,Novice,Muscle Building,1\nStand up straight with a dumbbell in each h...
3,biceps,Dumbbell Hammer Curl,https://musclewiki.com/dumbbells/male/biceps/d...,https://media.musclewiki.com/media/uploads/vid...,Novice,Muscle Building,1\nHold the dumbbells with a neutral grip (thu...
4,biceps,Dumbbell Reverse Curl,https://musclewiki.com/dumbbells/male/biceps/d...,https://media.musclewiki.com/media/uploads/vid...,Novice,Muscle Building,1\nGrab the dumbbells with a pronated (overhan...


In [14]:
df_fin.isna().sum()

Muscle Group     0
Exercise Name    0
Exercise URL     0
Video URL        0
Difficulty       0
Exercise Type    0
Combined_Text    0
dtype: int64

In [15]:
# Extract the equipment type from the URL and create the new 'Equipment' column
df_fin['Equipment'] = df_fin['Exercise URL'].str.split('/').str[3]
df_fin.head()

Unnamed: 0,Muscle Group,Exercise Name,Exercise URL,Video URL,Difficulty,Exercise Type,Combined_Text,Equipment
0,biceps,Chin Ups,https://musclewiki.com/bodyweight/male/biceps/...,https://media.musclewiki.com/media/uploads/vid...,Intermediate,Muscle Building,1\nGrab the bar shoulder width apart with a su...,bodyweight
1,biceps,Cable Bayesian Curl,https://musclewiki.com/cables/male/biceps/cabl...,https://media.musclewiki.com/media/uploads/vid...,Beginner,Muscle Building,1\nUse a handle attachment. The cable should b...,cables
2,biceps,Dumbbell Curl,https://musclewiki.com/dumbbells/male/biceps/d...,https://media.musclewiki.com/media/uploads/vid...,Novice,Muscle Building,1\nStand up straight with a dumbbell in each h...,dumbbells
3,biceps,Dumbbell Hammer Curl,https://musclewiki.com/dumbbells/male/biceps/d...,https://media.musclewiki.com/media/uploads/vid...,Novice,Muscle Building,1\nHold the dumbbells with a neutral grip (thu...,dumbbells
4,biceps,Dumbbell Reverse Curl,https://musclewiki.com/dumbbells/male/biceps/d...,https://media.musclewiki.com/media/uploads/vid...,Novice,Muscle Building,1\nGrab the dumbbells with a pronated (overhan...,dumbbells


In [16]:
print(df_fin['Equipment'].unique())

['bodyweight' 'cables' 'dumbbells' 'barbell' 'kettlebells' 'machine'
 'stretches' 'band' 'plate' 'trx' 'smith-machine' 'yoga' 'bosu-ball'
 'vitruvian' 'cardio' 'recovery' 'medicine-ball']


In [17]:
equipment_to_empty = ['recovery', 'cardio', 'stretches', 'yoga']

# Set the 'Equipment' cell to NaN for rows where 'Equipment' is in the list
df_fin.loc[df_fin['Equipment'].isin(equipment_to_empty), 'Equipment'] = np.nan

In [18]:
print(df_fin['Equipment'].unique())

['bodyweight' 'cables' 'dumbbells' 'barbell' 'kettlebells' 'machine' nan
 'band' 'plate' 'trx' 'smith-machine' 'bosu-ball' 'vitruvian'
 'medicine-ball']


In [19]:
df_fin.describe()

Unnamed: 0,Muscle Group,Exercise Name,Exercise URL,Video URL,Difficulty,Exercise Type,Combined_Text,Equipment
count,2607,2607,2607,2607,2607,2607,2607,2142
unique,36,1474,1474,1460,4,3,1466,13
top,glutes,Kettlebell Single Arm Curtsy Lunge,https://musclewiki.com/kettlebells/male/glutes...,https://placehold.co/640x360/png?text=No+Video...,Beginner,Muscle Building,1\nStand upright with your feet hip-width apar...,dumbbells
freq,491,16,16,46,887,2258,16,410


In [20]:
df_fin.head()

Unnamed: 0,Muscle Group,Exercise Name,Exercise URL,Video URL,Difficulty,Exercise Type,Combined_Text,Equipment
0,biceps,Chin Ups,https://musclewiki.com/bodyweight/male/biceps/...,https://media.musclewiki.com/media/uploads/vid...,Intermediate,Muscle Building,1\nGrab the bar shoulder width apart with a su...,bodyweight
1,biceps,Cable Bayesian Curl,https://musclewiki.com/cables/male/biceps/cabl...,https://media.musclewiki.com/media/uploads/vid...,Beginner,Muscle Building,1\nUse a handle attachment. The cable should b...,cables
2,biceps,Dumbbell Curl,https://musclewiki.com/dumbbells/male/biceps/d...,https://media.musclewiki.com/media/uploads/vid...,Novice,Muscle Building,1\nStand up straight with a dumbbell in each h...,dumbbells
3,biceps,Dumbbell Hammer Curl,https://musclewiki.com/dumbbells/male/biceps/d...,https://media.musclewiki.com/media/uploads/vid...,Novice,Muscle Building,1\nHold the dumbbells with a neutral grip (thu...,dumbbells
4,biceps,Dumbbell Reverse Curl,https://musclewiki.com/dumbbells/male/biceps/d...,https://media.musclewiki.com/media/uploads/vid...,Novice,Muscle Building,1\nGrab the dumbbells with a pronated (overhan...,dumbbells


In [21]:
# Group by 'Exercise Name' and aggregate 'Muscle Group' and 'Equipment'
# into lists of unique values, then convert to comma-separated strings.
df_combined = df_fin.groupby('Exercise Name').agg(
    Muscle_Group=('Muscle Group', lambda x: ', '.join(x.dropna().unique())),
    Exercise_URL=('Exercise URL', 'first'),
    Video_URL=('Video URL', 'first'),
    Difficulty=('Difficulty', 'first'),
    Exercise_Type=('Exercise Type', 'first'),
    Combined_Text=('Combined_Text', 'first'),
    Equipment=('Equipment', lambda x: ', '.join(x.dropna().unique()))
).reset_index()
# Reorder columns to ensure 'Exercise Name' is first and other columns are in a logical order
# Rename the aggregated columns to their original names if desired,
# or keep them as Muscle_Group and Equipment
df_combined.rename(columns={'Muscle_Group': 'Muscle Group'}, inplace=True)


# Reorder columns to ensure 'Exercise Name' is first and other columns are in a logical order
column_order = [
    'Exercise Name',
    'Muscle Group',
    'Equipment',
    'Difficulty',
    'Exercise_Type',
    'Combined_Text',
    'Exercise_URL',
    'Video_URL'
]
df_final_unique = df_combined[column_order]

df_final_unique.head()

Unnamed: 0,Exercise Name,Muscle Group,Equipment,Difficulty,Exercise_Type,Combined_Text,Exercise_URL,Video_URL
0,Abdominals Stretch Variation Four,abdominals,,Beginner,Stretching,1\nLay on a ball or a Bosu ball with your feet...,https://musclewiki.com/stretches/male/abdomina...,https://media.musclewiki.com/media/uploads/vid...
1,Abdominals Stretch Variation One,abdominals,,Novice,Stretching,1\nLay on your stomach on the floor with your ...,https://musclewiki.com/stretches/male/abdomina...,https://media.musclewiki.com/media/uploads/vid...
2,Abdominals Stretch Variation Three,"abdominals, obliques",,Novice,Stretching,1\nStand upright.\n2\nAfter completing the des...,https://musclewiki.com/stretches/male/abdomina...,https://media.musclewiki.com/media/uploads/vid...
3,Abdominals Stretch Variation Two,"abdominals, obliques",,Novice,Stretching,1\nStand upright.\n2\nReach with both hands up...,https://musclewiki.com/stretches/male/abdomina...,https://media.musclewiki.com/media/uploads/vid...
4,Abductor Leg Raise Side Lying,"glutes, gluteus-medius",,Beginner,Muscle Building,1\nStabilise yourself in the side lying positi...,https://musclewiki.com/recovery/male/glutes/ab...,https://media.musclewiki.com/media/uploads/vid...


In [22]:
display(df_final_unique[df_final_unique['Exercise Name']=='Kettlebell Single Arm Curtsy Lunge'])

Unnamed: 0,Exercise Name,Muscle Group,Equipment,Difficulty,Exercise_Type,Combined_Text,Exercise_URL,Video_URL
925,Kettlebell Single Arm Curtsy Lunge,"quads, glutes, gluteus-medius, gluteus-maximus",kettlebells,Intermediate,Muscle Building,1\nStand upright with your feet hip-width apar...,https://musclewiki.com/kettlebells/male/glutes...,https://placehold.co/640x360/png?text=No+Video...


In [23]:
df_final_unique.describe()

Unnamed: 0,Exercise Name,Muscle Group,Equipment,Difficulty,Exercise_Type,Combined_Text,Exercise_URL,Video_URL
count,1474,1474,1474.0,1474,1474,1474,1474,1474
unique,1474,142,14.0,4,3,1450,1474,1455
top,Abdominals Stretch Variation Four,"quads, glutes",,Beginner,Muscle Building,1\nAdjust weight for assistance. Stand on plat...,https://musclewiki.com/stretches/male/abdomina...,https://placehold.co/640x360/png?text=No+Video...
freq,1,182,306.0,553,1252,4,1,13


In [24]:
df_temp = df_final_unique[df_final_unique['Equipment']=='']


In [25]:
df_temp.head()

Unnamed: 0,Exercise Name,Muscle Group,Equipment,Difficulty,Exercise_Type,Combined_Text,Exercise_URL,Video_URL
0,Abdominals Stretch Variation Four,abdominals,,Beginner,Stretching,1\nLay on a ball or a Bosu ball with your feet...,https://musclewiki.com/stretches/male/abdomina...,https://media.musclewiki.com/media/uploads/vid...
1,Abdominals Stretch Variation One,abdominals,,Novice,Stretching,1\nLay on your stomach on the floor with your ...,https://musclewiki.com/stretches/male/abdomina...,https://media.musclewiki.com/media/uploads/vid...
2,Abdominals Stretch Variation Three,"abdominals, obliques",,Novice,Stretching,1\nStand upright.\n2\nAfter completing the des...,https://musclewiki.com/stretches/male/abdomina...,https://media.musclewiki.com/media/uploads/vid...
3,Abdominals Stretch Variation Two,"abdominals, obliques",,Novice,Stretching,1\nStand upright.\n2\nReach with both hands up...,https://musclewiki.com/stretches/male/abdomina...,https://media.musclewiki.com/media/uploads/vid...
4,Abductor Leg Raise Side Lying,"glutes, gluteus-medius",,Beginner,Muscle Building,1\nStabilise yourself in the side lying positi...,https://musclewiki.com/recovery/male/glutes/ab...,https://media.musclewiki.com/media/uploads/vid...


In [None]:
df_with_equipment = pd.read_csv('exercise_with_equipment_fixed.csv')
df_with_equipment.describe()

Unnamed: 0,Exercise Name,Muscle Group,Equipment,Difficulty,Exercise_Type,Combined_Text,Exercise_URL,Video_URL
count,306,306,306,306,306,306,306,306
unique,306,59,7,4,3,301,306,304
top,Abdominals Stretch Variation Four,calves,unknown,Beginner,Stretching,"1\nKneel in front of a bench, placing your elb...",https://musclewiki.com/stretches/male/abdomina...,https://placehold.co/640x360/png?text=No+Video...
freq,1,27,240,187,151,3,1,3


In [None]:
df_no_equipment = df_final_unique


df_mergedd = df_no_equipment.merge(
    df_with_equipment[['Exercise Name', 'Equipment']],
    on='Exercise Name',
    how='left',
    suffixes=('', '_new')
)

df_mergedd['Equipment'].replace('', np.nan, inplace=True)

# Now fill NaNs with values from 'Equipment_new'
df_mergedd['Equipment'] = df_mergedd['Equipment'].fillna(df_mergedd['Equipment_new'])

# Drop the temporary column
df_mergedd.drop(columns=['Equipment_new'], inplace=True)

# Save the updated DataFrame to a new CSV file
#df_merged.to_csv('exercise_no_equip_updated.csv', index=False)
#df_final.to_csv(output_filename, index=False)

#print(f"Combined and cleaned data saved to {output_filename}")


In [28]:
df_mergedd.describe()

Unnamed: 0,Exercise Name,Muscle Group,Equipment,Difficulty,Exercise_Type,Combined_Text,Exercise_URL,Video_URL
count,1474,1474,1474,1474,1474,1474,1474,1474
unique,1474,142,14,4,3,1450,1474,1455
top,Abdominals Stretch Variation Four,"quads, glutes",unknown,Beginner,Muscle Building,1\nAdjust weight for assistance. Stand on plat...,https://musclewiki.com/stretches/male/abdomina...,https://placehold.co/640x360/png?text=No+Video...
freq,1,182,240,553,1252,4,1,13


In [29]:
df_mergedd['Equipment'] = df_mergedd['Equipment'].replace('unknown','None')
df_mergedd['Difficulty'] = df_mergedd['Difficulty'].replace('Novice','Beginner')
df_mergedd.describe()



Unnamed: 0,Exercise Name,Muscle Group,Equipment,Difficulty,Exercise_Type,Combined_Text,Exercise_URL,Video_URL
count,1474,1474,1474.0,1474,1474,1474,1474,1474
unique,1474,142,14.0,3,3,1450,1474,1455
top,Abdominals Stretch Variation Four,"quads, glutes",,Beginner,Muscle Building,1\nAdjust weight for assistance. Stand on plat...,https://musclewiki.com/stretches/male/abdomina...,https://placehold.co/640x360/png?text=No+Video...
freq,1,182,240.0,922,1252,4,1,13


In [30]:
# Load the dataset
df = df_mergedd

# Add a unique 'id' column. You can use the DataFrame's index or a sequential counter.
df['id'] = df.index # or use range(len(df)) for a simple sequential ID

# Create the 'description' column including 'Difficulty' and 'Combined_Text'
df['description'] = df.apply(lambda row: f"The '{row['Exercise Name']}' is a '{row['Difficulty']}' level '{row['Exercise_Type']}' exercise that targets the '{row['Muscle Group']}' and requires '{row['Equipment']}'. Instructions and tips: {row['Combined_Text']}", axis=1)

# Reorder columns to have 'id' at the beginning (optional, for better readability)
df = df[['id', 'Exercise Name', 'Muscle Group', 'Equipment', 'Difficulty', 'Exercise_Type', 'Combined_Text', 'Exercise_URL', 'Video_URL', 'description']]



In [31]:

df['description'] = df['description'].str.replace(r'[\n\r]+', '.', regex=True)



In [32]:
df['description'].head()

0    The 'Abdominals Stretch Variation Four' is a '...
1    The 'Abdominals Stretch Variation One' is a 'B...
2    The 'Abdominals Stretch Variation Three' is a ...
3    The 'Abdominals Stretch Variation Two' is a 'B...
4    The 'Abductor Leg Raise Side Lying' is a 'Begi...
Name: description, dtype: object

In [33]:
output_filename_fin = 'exercises_data_final.csv'
df.to_csv(output_filename_fin, index=False)