# Gym Personal Training Plan Recommender

## Exploratory Data Analysis (EDA) & Preprocessing

### Imports

In [44]:
import pandas as pd
import numpy as np
import os
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MultiLabelBinarizer, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.neighbors import NearestNeighbors

### Load Datasets

In [30]:
program_summary_path = "../data/program_summary.csv"
exercise_path = "../data/programs_detailed_boostcamp_kaggle.csv"

if not os.path.exists(program_summary_path):
    raise FileNotFoundError(f"Error: The file '{program_summary_path}' was not found.")

if not os.path.exists(exercise_path):
    raise FileNotFoundError(f"Error: The file '{exercise_path}' was not found.")

program_summary_df = pd.read_csv(program_summary_path)
exercise_df = pd.read_csv(exercise_path)

### Data Analysis

In [31]:
# Explore Program Dataset
print("\n********** Program Summary Dataset **********")

print("\nShape:")
print(program_summary_df.shape)

print("\nColumns:")
print(program_summary_df.columns.to_list())

print("\nInfo:")
print(program_summary_df.info())

print("\nDescribe:")
display(program_summary_df.describe().T)

print("\nCheck missing values:")
print(program_summary_df.isna().sum())

print("\nSample rows:")
display(program_summary_df.head())
display(program_summary_df.tail())


********** Program Summary Dataset **********

Shape:
(2598, 10)

Columns:
['title', 'description', 'level', 'goal', 'equipment', 'program_length', 'time_per_workout', 'total_exercises', 'created', 'last_edit']

Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2598 entries, 0 to 2597
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   title             2598 non-null   object 
 1   description       2594 non-null   object 
 2   level             2598 non-null   object 
 3   goal              2598 non-null   object 
 4   equipment         2597 non-null   object 
 5   program_length    2597 non-null   float64
 6   time_per_workout  2598 non-null   float64
 7   total_exercises   2598 non-null   int64  
 8   created           2597 non-null   object 
 9   last_edit         2596 non-null   object 
dtypes: float64(2), int64(1), object(7)
memory usage: 203.1+ KB
None

Describe:


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
program_length,2597.0,8.812476,4.185403,1.0,5.0,8.0,12.0,18.0
time_per_workout,2598.0,69.035412,24.394798,10.0,60.0,60.0,90.0,180.0
total_exercises,2598.0,232.884142,208.123873,1.0,108.0,192.5,307.75,5040.0



Check missing values:
title               0
description         4
level               0
goal                0
equipment           1
program_length      1
time_per_workout    0
total_exercises     0
created             1
last_edit           2
dtype: int64

Sample rows:


Unnamed: 0,title,description,level,goal,equipment,program_length,time_per_workout,total_exercises,created,last_edit
0,(MASS MONSTER) High Intensity 4 Day Upper Lowe...,Build tones of muscular with this high intensi...,['Intermediate'],"['Muscle & Sculpting', 'Bodyweight Fitness']",Full Gym,12.0,90.0,384,2024-01-20 10:23:00,2025-06-29 12:39:00
1,(NOT MY PROGRAM)SHJ Jotaro,Build strength and size,"['Advanced', 'Intermediate']",['Bodybuilding'],Full Gym,8.0,60.0,224,2024-07-08 02:28:00,2025-06-18 09:15:00
2,1 PowerLift Per Day Powerbuilding 5 Day Bro Split,Based off of Andy Baker's KCS (Kingwood Streng...,"['Beginner', 'Novice', 'Intermediate']","['Athletics', 'Powerlifting', 'Powerbuilding']",Full Gym,6.0,90.0,237,2025-04-23 09:21:00,2025-06-18 11:55:00
3,10 Week Mass Building Program,This workout is designed to increase your musc...,"['Intermediate', 'Advanced']",['Powerbuilding'],Garage Gym,10.0,70.0,280,2024-09-07 03:44:00,2025-06-18 08:01:00
4,10 week deadlift focus,Increase deadlift,"['Intermediate', 'Advanced']","['Powerbuilding', 'Powerlifting', 'Bodybuildin...",Full Gym,10.0,80.0,356,2024-12-23 03:13:00,2025-06-18 12:19:00


Unnamed: 0,title,description,level,goal,equipment,program_length,time_per_workout,total_exercises,created,last_edit
2593,🎧,Lihaskasvu,"['Intermediate', 'Advanced']",['Bodybuilding'],Garage Gym,12.0,90.0,228,2024-10-10 04:20:00,2025-06-18 11:32:00
2594,👾Reza's Routine👾,This is a beginner friendly routine made for m...,"['Beginner', 'Intermediate']",['Muscle & Sculpting'],Dumbbell Only,1.0,60.0,60,2024-09-15 08:45:00,2025-06-18 07:48:00
2595,"🔥 ""Upper Body Dominance: 3-Day Growth System"" 🔥","""Upper Body Dominance: A science-based 3-day w...","['Intermediate', 'Novice']",['Muscle & Sculpting'],Full Gym,6.0,60.0,96,2025-02-15 08:18:00,2025-06-18 07:48:00
2596,🙈🙉🙊🐵,Muscle Memory Training,['Intermediate'],['Bodybuilding'],Full Gym,8.0,90.0,211,2024-12-08 01:04:00,2025-06-18 11:35:00
2597,🥷🥷🥷,To become stronger without becoming “bulky”,"['Intermediate', 'Novice']","['Bodybuilding', 'Powerbuilding']",Garage Gym,9.0,100.0,216,2025-05-15 10:44:00,2025-06-18 12:08:00


### Preprocessing

##### Drop unnecessary columns

In [32]:
program_summary_df = program_summary_df.drop(columns=['created','last_edit'])
print("\nProgram summary sample rows:")
display(program_summary_df.head())


Program summary sample rows:


Unnamed: 0,title,description,level,goal,equipment,program_length,time_per_workout,total_exercises
0,(MASS MONSTER) High Intensity 4 Day Upper Lowe...,Build tones of muscular with this high intensi...,['Intermediate'],"['Muscle & Sculpting', 'Bodyweight Fitness']",Full Gym,12.0,90.0,384
1,(NOT MY PROGRAM)SHJ Jotaro,Build strength and size,"['Advanced', 'Intermediate']",['Bodybuilding'],Full Gym,8.0,60.0,224
2,1 PowerLift Per Day Powerbuilding 5 Day Bro Split,Based off of Andy Baker's KCS (Kingwood Streng...,"['Beginner', 'Novice', 'Intermediate']","['Athletics', 'Powerlifting', 'Powerbuilding']",Full Gym,6.0,90.0,237
3,10 Week Mass Building Program,This workout is designed to increase your musc...,"['Intermediate', 'Advanced']",['Powerbuilding'],Garage Gym,10.0,70.0,280
4,10 week deadlift focus,Increase deadlift,"['Intermediate', 'Advanced']","['Powerbuilding', 'Powerlifting', 'Bodybuildin...",Full Gym,10.0,80.0,356


##### Convert string representation of lists into Python lists

In [33]:
import ast

def normalize_words(words_list):
    if isinstance(words_list, str):
        words_list = ast.literal_eval(words_list)
    return [g.lower().strip() for g in words_list]

program_summary_df['level_list'] = program_summary_df['level'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
program_summary_df['goal_list'] = program_summary_df['goal'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

##### Handle missing values

In [34]:
numeric_cols = ['program_length']
imputer = SimpleImputer(strategy='median')
program_summary_df[numeric_cols] = imputer.fit_transform(program_summary_df[numeric_cols])

categorical_cols = ['description', 'equipment']
imputer = SimpleImputer(strategy='most_frequent')
program_summary_df[categorical_cols] = imputer.fit_transform(program_summary_df[categorical_cols])

print("\nCheck missing values:")
print(program_summary_df.isna().sum())


Check missing values:
title               0
description         0
level               0
goal                0
equipment           0
program_length      0
time_per_workout    0
total_exercises     0
level_list          0
goal_list           0
dtype: int64


In [35]:
# Get rows where the 'level' column contains empty lists
empty_level_rows = program_summary_df[program_summary_df['level_list'].apply(lambda x: isinstance(x, list) and len(x) == 0)]
display(empty_level_rows)

# Get rows where the 'goal' column contains empty lists
empty_goal_rows = program_summary_df[program_summary_df['goal_list'].apply(lambda x: isinstance(x, list) and len(x) == 0)]
display(empty_goal_rows)

rows_to_drop = program_summary_df[
    program_summary_df['level_list'].apply(lambda x: isinstance(x, list) and len(x) == 0) | 
    program_summary_df['goal_list'].apply(lambda x: isinstance(x, list) and len(x) == 0)
].index
print("\nRows to drop:")
print(rows_to_drop)

# Drop rows
program_summary_df = program_summary_df.drop(rows_to_drop)
program_summary_df = program_summary_df.drop(columns=['level_list', 'goal_list'])
print("\nPrograms dataset shape:")
print(program_summary_df.shape)

Unnamed: 0,title,description,level,goal,equipment,program_length,time_per_workout,total_exercises,level_list,goal_list
1394,Lyle McDonald Routine (Strength/Hypertrophy Vers),IMPORTANT: THIS IS NOT THE ORIGINAL LYLE MCDON...,[],[],Full Gym,12.0,90.0,1152,[],[]
1423,Mania (Upper/Lower),Mania is a 4 day per week training program tha...,[],[],Full Gym,12.0,90.0,288,[],[]
1520,Monster Bench,Increase your 1RM on barbell bench.,[],[],Full Gym,4.0,60.0,104,[],[]
1915,Reese Training Program,This is Reese's premium Whaley gym Training pr...,[],[],Dumbbell Only,16.0,50.0,144,[],[]
1952,Rugby forward off season training,The purpose of this program is to build a soli...,[],[],Full Gym,12.0,90.0,388,[],[]
2172,Swole 5 (Advanced),Same as intermediate but with more revocery in...,[],[],Garage Gym,12.0,80.0,144,[],[]
2321,Treino do Josu,Hipertrofia,[],[],Full Gym,1.0,60.0,26,[],[]
2423,Viking strong,Gain functional strength and hypertrophy,[],[],Full Gym,12.0,60.0,208,[],[]


Unnamed: 0,title,description,level,goal,equipment,program_length,time_per_workout,total_exercises,level_list,goal_list
1394,Lyle McDonald Routine (Strength/Hypertrophy Vers),IMPORTANT: THIS IS NOT THE ORIGINAL LYLE MCDON...,[],[],Full Gym,12.0,90.0,1152,[],[]
1423,Mania (Upper/Lower),Mania is a 4 day per week training program tha...,[],[],Full Gym,12.0,90.0,288,[],[]
1520,Monster Bench,Increase your 1RM on barbell bench.,[],[],Full Gym,4.0,60.0,104,[],[]
1915,Reese Training Program,This is Reese's premium Whaley gym Training pr...,[],[],Dumbbell Only,16.0,50.0,144,[],[]
1952,Rugby forward off season training,The purpose of this program is to build a soli...,[],[],Full Gym,12.0,90.0,388,[],[]
2172,Swole 5 (Advanced),Same as intermediate but with more revocery in...,[],[],Garage Gym,12.0,80.0,144,[],[]
2321,Treino do Josu,Hipertrofia,[],[],Full Gym,1.0,60.0,26,[],[]
2423,Viking strong,Gain functional strength and hypertrophy,[],[],Full Gym,12.0,60.0,208,[],[]



Rows to drop:
Index([1394, 1423, 1520, 1915, 1952, 2172, 2321, 2423], dtype='int64')

Programs dataset shape:
(2590, 8)


##### Clean titles and descriptions with specific characters

In [36]:
import re

CJK_PATTERN = r'[\u4e00-\u9fff\uac00-\ud7a3]' 
EXTRA_SYMBOLS_PATTERN = r'[∩^ω^⊂⊃]+' 
FILTER_PATTERN = CJK_PATTERN + '|' + EXTRA_SYMBOLS_PATTERN

def should_be_removed(text):
    return bool(re.search(FILTER_PATTERN, text))

should_be_removed_titles_df = program_summary_df[program_summary_df['title'].apply(should_be_removed)]
display(should_be_removed_titles_df)
# Drop titles with specific characters
program_summary_df = program_summary_df[~program_summary_df['title'].apply(should_be_removed)]

should_be_removed_desc_df = program_summary_df[program_summary_df['description'].apply(should_be_removed)]
display(should_be_removed_desc_df)
# Drop descriptions with specific characters
program_summary_df = program_summary_df[~program_summary_df['description'].apply(should_be_removed)]

print("\nShape:")
print(program_summary_df.shape)

Unnamed: 0,title,description,level,goal,equipment,program_length,time_per_workout,total_exercises
1234,Japanese Muscle 最強,Perfect Muscle,"['Beginner', 'Novice', 'Intermediate']","['Powerbuilding', 'Bodybuilding']",Full Gym,12.0,60.0,48
1656,PHUL루틴 변형,로니콜먼,['Intermediate'],['Muscle & Sculpting'],Full Gym,1.0,80.0,23
2586,∩^ω^∩,Intermediate program : lower in volume than ot...,['Beginner'],"['Muscle & Sculpting', 'Athletics']",Full Gym,4.0,70.0,100
2587,상체하체,근육빵빵 무고민 마이크 서머필드 무작정 따라하기,"['Advanced', 'Intermediate', 'Novice']","['Bodybuilding', 'Muscle & Sculpting']",Full Gym,12.0,90.0,450
2588,시험기간 헬스장,3대10000,"['Novice', 'Advanced', 'Intermediate']","['Athletics', 'Muscle & Sculpting']",Garage Gym,1.0,80.0,39
2589,자기화 프로그램,powerbuilding,"['Novice', 'Intermediate']","['Bodybuilding', 'Powerlifting', 'Powerbuilding']",Full Gym,5.0,60.0,205
2590,찬중,This program is designed for beginners to buil...,['Intermediate'],"['Muscle & Sculpting', 'Bodybuilding']",Full Gym,3.0,60.0,54


Unnamed: 0,title,description,level,goal,equipment,program_length,time_per_workout,total_exercises
1281,K.BUM Y3T (Back),커지거나 뒤지거나,['Novice'],"['Athletics', 'Powerlifting']",Full Gym,3.0,100.0,108



Shape:
(2582, 8)


##### Categorical columns encoding

In [37]:
mlb_goal = MultiLabelBinarizer()
mlb_level = MultiLabelBinarizer()

# Multi-hot encoding for goal list: ['Muscle & Sculpting', 'Bodyweight Fitness'] -> muscle & sculpting = 1, bodyweight fitness = 1, other = 0
program_summary_df['goal'] = program_summary_df['goal'].apply(normalize_words)
goal_encoded = mlb_goal.fit_transform(program_summary_df['goal'])
goal_df = pd.DataFrame(goal_encoded, columns=[f"goal_{c}" for c in mlb_goal.classes_])
program_summary_df = pd.concat([program_summary_df, goal_df], axis=1)
program_summary_df = program_summary_df.dropna()

# Multi-hot encoding for goal list: ['Muscle & Sculpting', 'Bodyweight Fitness'] -> muscle & sculpting = 1, bodyweight fitness = 1, other = 0
program_summary_df['level'] = program_summary_df['level'].apply(normalize_words)
level_encoded = mlb_level.fit_transform(program_summary_df['level'])
level_df = pd.DataFrame(level_encoded, columns=[f"level{c}" for c in mlb_level.classes_])
program_summary_df = pd.concat([program_summary_df, level_df], axis=1)
program_summary_df = program_summary_df.dropna()

# One-hot encoding for equipment
equipment_dummies = pd.get_dummies(program_summary_df['equipment'], prefix='equip')
program_summary_df = pd.concat([program_summary_df, equipment_dummies], axis=1)

# Program title
le = LabelEncoder()
y = program_summary_df["title"]
y_encoded = le.fit_transform(y)

##### Numerical columns

In [38]:
num_cols = ['program_length', 'time_per_workout']
scaler = StandardScaler()

program_summary_df['program_length_orig'] = program_summary_df['program_length']
program_summary_df['time_per_workout_orig'] = program_summary_df['time_per_workout']

program_summary_df[num_cols] = scaler.fit_transform(program_summary_df[num_cols])

### Model training

##### Feature/Target separation

In [39]:
display(program_summary_df)

feature_cols = list(goal_df.columns) + list(level_df.columns) + list(equipment_dummies.columns) + num_cols
print(feature_cols)
X = program_summary_df[feature_cols]
y = y_encoded

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


Unnamed: 0,title,description,level,goal,equipment,program_length,time_per_workout,total_exercises,goal_athletics,goal_bodybuilding,...,leveladvanced,levelbeginner,levelintermediate,levelnovice,equip_At Home,equip_Dumbbell Only,equip_Full Gym,equip_Garage Gym,program_length_orig,time_per_workout_orig
0,(MASS MONSTER) High Intensity 4 Day Upper Lowe...,Build tones of muscular with this high intensi...,[intermediate],"[muscle & sculpting, bodyweight fitness]",Full Gym,0.762893,0.857384,384.0,0.0,0.0,...,0.0,0.0,1.0,0.0,False,False,True,False,12.0,90.0
1,(NOT MY PROGRAM)SHJ Jotaro,Build strength and size,"[advanced, intermediate]",[bodybuilding],Full Gym,-0.195285,-0.367769,224.0,0.0,1.0,...,1.0,0.0,1.0,0.0,False,False,True,False,8.0,60.0
2,1 PowerLift Per Day Powerbuilding 5 Day Bro Split,Based off of Andy Baker's KCS (Kingwood Streng...,"[beginner, novice, intermediate]","[athletics, powerlifting, powerbuilding]",Full Gym,-0.674374,0.857384,237.0,1.0,0.0,...,0.0,1.0,1.0,1.0,False,False,True,False,6.0,90.0
3,10 Week Mass Building Program,This workout is designed to increase your musc...,"[intermediate, advanced]",[powerbuilding],Garage Gym,0.283804,0.040615,280.0,0.0,0.0,...,1.0,0.0,1.0,0.0,False,False,False,True,10.0,70.0
4,10 week deadlift focus,Increase deadlift,"[intermediate, advanced]","[powerbuilding, powerlifting, bodybuilding, mu...",Full Gym,0.283804,0.449000,356.0,0.0,1.0,...,1.0,0.0,1.0,0.0,False,False,True,False,10.0,80.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2566,strongman basebuilding,"week 1-5: basebuilding, volume acclimation\nwe...","[intermediate, novice]","[powerlifting, powerbuilding]",Full Gym,0.762893,0.857384,200.0,0.0,1.0,...,1.0,0.0,1.0,1.0,False,False,True,False,12.0,90.0
2567,strongman bigloz starter,get strong. do strongman stuff. \nthis program...,[intermediate],[muscle & sculpting],At Home,0.762893,0.449000,280.0,0.0,1.0,...,1.0,0.0,1.0,0.0,True,False,False,False,12.0,80.0
2568,subeen,for me,"[beginner, novice, intermediate, advanced]",[bodybuilding],Full Gym,-0.195285,0.040615,208.0,0.0,0.0,...,1.0,0.0,0.0,0.0,False,False,True,False,8.0,70.0
2569,superfart,poot,[intermediate],"[bodyweight fitness, bodybuilding, athletics]",At Home,1.721072,-0.776154,304.0,0.0,1.0,...,0.0,0.0,1.0,0.0,True,False,False,False,16.0,50.0


['goal_athletics', 'goal_bodybuilding', 'goal_bodyweight fitness', 'goal_muscle & sculpting', 'goal_olympic weightlifting', 'goal_powerbuilding', 'goal_powerlifting', 'leveladvanced', 'levelbeginner', 'levelintermediate', 'levelnovice', 'equip_At Home', 'equip_Dumbbell Only', 'equip_Full Gym', 'equip_Garage Gym', 'program_length', 'time_per_workout']


##### Training

In [40]:


knn = NearestNeighbors(n_neighbors=3, metric='cosine')
knn.fit(X_train) 

0,1,2
,n_neighbors,3
,radius,1.0
,algorithm,'auto'
,leaf_size,30
,metric,'cosine'
,p,2
,metric_params,
,n_jobs,


### Evaluation

In [41]:
user_level_input = ["beginner", "novice"]
user_goal_input = ["muscle & sculpting", "bodyweight fitness"]
user_equipment_input = ["Dumbbell Only"]
user_numeric_input = np.array([[8, 45]])  # program_length, time_per_workout

user_level = mlb_level.transform([user_level_input])
user_goal = mlb_goal.transform([user_goal_input])
user_equipment = pd.get_dummies(pd.Series(user_equipment_input), prefix='equip').reindex(columns=equipment_dummies.columns, fill_value=0)
user_numeric_df = pd.DataFrame(user_numeric_input, columns=['program_length', 'time_per_workout'])
user_numeric_scaled = scaler.transform(user_numeric_df)

user_X = np.hstack([user_goal, user_level, user_equipment.values, user_numeric_scaled])
user_X_df = pd.DataFrame(user_X, columns=X_train.columns) 

distances, indices = knn.kneighbors(user_X_df)

top_programs = le.inverse_transform(y_train[indices[0]])
print("Top 3 suggested programs:", top_programs)



def overlap_score(user_list, program_list):
    if not user_list or not program_list:
        return 0
    return len(set(user_list) & set(program_list)) / len(set(user_list) | set(program_list))

def numeric_similarity(user_value, program_value, scale=1.0):
    return 1 / (1 + abs(user_value - program_value) / scale)
    
# Score
WEIGHTS = {
    'knn': 1.0,
    'goal': 1.0,
    'level': 1.0,
    'equipment': 0.8,
    'program_length': 0.6,
    'time_per_workout': 0.6
}

scored_programs = []
for i, prog_name in enumerate(top_programs):
    prog_row = program_summary_df[program_summary_df['title'] == prog_name].iloc[0]
    score_goal = overlap_score(user_goal_input, normalize_words(prog_row['goal']))
    score_level = overlap_score(user_level_input, normalize_words(prog_row['level']))
    score_equipment = overlap_score(user_equipment_input, prog_row['equipment'])

    user_length, user_time = user_numeric_input[0]
    prog_length = prog_row['program_length_orig']
    prog_time = prog_row['time_per_workout_orig']  
    
    score_length = numeric_similarity(user_length, prog_length)
    score_time = numeric_similarity(user_time, prog_time)

    knn_score = 1 / (1 + distances[0][i])  # lower dist -> higher skor
    
    total_score = (
        WEIGHTS['knn'] * knn_score +
        WEIGHTS['goal'] * score_goal +
        WEIGHTS['level'] * score_level +
        WEIGHTS['equipment'] * score_equipment +
        WEIGHTS['program_length'] * score_length +
        WEIGHTS['time_per_workout'] * score_time
    )
    scored_programs.append((prog_name, total_score, knn_score, score_goal, score_level, score_equipment, score_length, score_time))

for prog_name, total_score, score_goal, score_level, knn_score, score_equipment, score_length, score_time in scored_programs:
    print(f"\nProgram: {prog_name}")
    print(f"""  Total score: {total_score:.3f} 
          (KNN: {knn_score:.3f}, 
           Goal overlap: {score_goal:.3f}, 
           Level overlap: {score_level:.3f}, 
           Equipment overlap: {score_equipment:.3f}, 
           Program length overlap: {score_length:.3f}, 
           Time per workout overlap: {score_time:.3f})""")

best_program = max(scored_programs, key=lambda x: x[1])
best_prog_name = best_program[0]
print(f"\nBest program: {best_prog_name}")

Top 3 suggested programs: ['Full Body at Home' 'Dumbbell Full Body Block' 'GZCL General Gainz']

Program: Full Body at Home
  Total score: 2.542 
          (KNN: 0.500, 
           Goal overlap: 0.842, 
           Level overlap: 0.500, 
           Equipment overlap: 0.000, 
           Program length overlap: 1.000, 
           Time per workout overlap: 0.167)

Program: Dumbbell Full Body Block
  Total score: 2.217 
          (KNN: 0.500, 
           Goal overlap: 0.830, 
           Level overlap: 0.667, 
           Equipment overlap: 0.000, 
           Program length overlap: 0.200, 
           Time per workout overlap: 0.167)

Program: GZCL General Gainz
  Total score: 3.464 
          (KNN: 1.000, 
           Goal overlap: 0.826, 
           Level overlap: 1.000, 
           Equipment overlap: 0.000, 
           Program length overlap: 1.000, 
           Time per workout overlap: 0.062)

Best program: GZCL General Gainz


### Output example

In [42]:
def show_program_with_exercises(program_name, program_summary_df, detailed_df):
    program_info = program_summary_df[program_summary_df['title'] == program_name].iloc[0]
    print(f"\n=== Program: {program_info['title']} ===")
    print(f"Level: {program_info['level']}")
    print(f"Goal: {program_info['goal']}")
    print(f"Equipment: {program_info['equipment']}")
    print(f"Program length: {program_info['program_length_orig']} weeks")
    print(f"Description: {program_info['description']}")
    print(f"Time per workout: {program_info['time_per_workout_orig']} minutes")

    exercises = detailed_df[detailed_df['title'] == program_name]
    exercises_sorted = exercises.sort_values(by=['week', 'day'])

    print("\nExercises in this plan:")
    for _, row in exercises_sorted.iterrows():
        print(f"  - Week {row['week']}, Day {row['day']}: {row['exercise_name']} (Sets: {row['sets']}, Reps: {row['reps']})")

for prog in top_programs:
    show_program_with_exercises(prog, program_summary_df, exercise_df)



=== Program: Full Body at Home ===
Level: ['beginner']
Goal: ['bodyweight fitness']
Equipment: Dumbbell Only
Program length: 8.0 weeks
Description: Unleash your potential with the **Full Body at Home** program, a comprehensive 8-week journey designed to sculpt and strengthen your entire body using just dumbbells. With 24 sessions spread across the weeks, each workout lasts approximately 50 minutes, combining bodyweight exercises and dumbbell movements to build muscle and enhance endurance. Tailored for all fitness levels, this program emphasizes bodybuilding and muscle sculpting, making it perfect for novices and intermediates alike. Get ready to transform your physique and elevate your fitness game from the comfort of your home!
Time per workout: 50.0 minutes

Exercises in this plan:
  - Week 1.0, Day 1.0: Pull-Up (Bodyweight) (Sets: 5.0, Reps: 8.0)
  - Week 1.0, Day 1.0: Front Squat (Dumbbell) (Sets: 3.0, Reps: 8.0)
  - Week 1.0, Day 1.0: Standing Shoulder Press (Dumbbell) (Sets: 3.

### Serialize and export model with preprocessing objects 

In [45]:
SAVE_DIR = "../backend/trained_models/"
os.makedirs(SAVE_DIR, exist_ok=True)

# KNN model
joblib.dump(knn, os.path.join(SAVE_DIR, "knn_model.pkl"))

# MultiLabelBinarizers
joblib.dump(mlb_goal, os.path.join(SAVE_DIR, "mlb_goal.pkl"))
joblib.dump(mlb_level, os.path.join(SAVE_DIR, "mlb_level.pkl"))

# LabelEncoder
joblib.dump(le, os.path.join(SAVE_DIR, "label_encoder.pkl"))

# StandardScaler
joblib.dump(scaler, os.path.join(SAVE_DIR, "scaler.pkl"))

print("Model and preprocessing objects are saved in '{SAVE_DIR}' folder")

Model and preprocessing objects are saved in '{SAVE_DIR}' folder
