In [36]:
import numpy as np
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder

In [37]:
df = pd.read_csv("user_train.csv")
df

Unnamed: 0,Name,Age,Gender,Height_cm,Weight_kg,Fitness_Goals,Fitness_Level,Days_Available,Time_per_Session_mins,Exercise_Preferences,Gym_Equipment,plan
0,David,36,Male,179,75,Maintain overall health and well-being,Intermediate,3 days a week,64,"Varied, including strength and cardio",Full gym access,2
1,John,25,Female,160,62,Build muscle and increase strength,Advanced,3 days a week,43,"Free weights, compound exercises","Bench press, squat rack",1
2,Sarah,25,Female,160,55,Build muscle and increase strength,Intermediate,3 days a week,30,Cardio and bodyweight exercises,"Treadmill, resistance bands",2
3,Michael,37,Male,184,90,Gain endurance and boost cardiovascular health,Advanced,5 days a week,90,"Cardio, running, cycling","Stationary bike, treadmill",2
4,Michael,31,Male,173,70,Lose weight and improve overall fitness,Intermediate,3 days a week,56,"Cardio, running, cycling","Stationary bike, treadmill",2
...,...,...,...,...,...,...,...,...,...,...,...,...
995,David,40,Male,185,90,Maintain overall health and well-being,Intermediate,5 days a week,90,"Varied, including strength and cardio",Full gym access,2
996,Michael,39,Male,185,90,Gain endurance and boost cardiovascular health,Advanced,5 days a week,90,"Cardio, running, cycling","Stationary bike, treadmill",1
997,David,36,Male,177,75,Maintain overall health and well-being,Advanced,5 days a week,62,"Varied, including strength and cardio",Full gym access,1
998,Michael,35,Male,180,85,Lose weight and improve overall fitness,Advanced,3 days a week,81,"Cardio, running, cycling","Treadmill, resistance bands",2


In [38]:
df.drop(columns=["Name"],inplace=True)

features = df.drop(columns="plan").columns

# categorical and numerical featuers
num_features = [col for col in features if df[col].dtype != "object"]

cat_features = [col for col in features if df[col].dtype=="object"]

print(f"numerical features: {num_features}")
print(f"categorical features: {cat_features}")

numerical features: ['Age', 'Height_cm', 'Weight_kg', 'Time_per_Session_mins']
categorical features: ['Gender', 'Fitness_Goals', 'Fitness_Level', 'Days_Available', 'Exercise_Preferences', 'Gym_Equipment']


In [39]:
X = df.drop(columns=["plan"])
y = df["plan"]

In [40]:
X

Unnamed: 0,Age,Gender,Height_cm,Weight_kg,Fitness_Goals,Fitness_Level,Days_Available,Time_per_Session_mins,Exercise_Preferences,Gym_Equipment
0,36,Male,179,75,Maintain overall health and well-being,Intermediate,3 days a week,64,"Varied, including strength and cardio",Full gym access
1,25,Female,160,62,Build muscle and increase strength,Advanced,3 days a week,43,"Free weights, compound exercises","Bench press, squat rack"
2,25,Female,160,55,Build muscle and increase strength,Intermediate,3 days a week,30,Cardio and bodyweight exercises,"Treadmill, resistance bands"
3,37,Male,184,90,Gain endurance and boost cardiovascular health,Advanced,5 days a week,90,"Cardio, running, cycling","Stationary bike, treadmill"
4,31,Male,173,70,Lose weight and improve overall fitness,Intermediate,3 days a week,56,"Cardio, running, cycling","Stationary bike, treadmill"
...,...,...,...,...,...,...,...,...,...,...
995,40,Male,185,90,Maintain overall health and well-being,Intermediate,5 days a week,90,"Varied, including strength and cardio",Full gym access
996,39,Male,185,90,Gain endurance and boost cardiovascular health,Advanced,5 days a week,90,"Cardio, running, cycling","Stationary bike, treadmill"
997,36,Male,177,75,Maintain overall health and well-being,Advanced,5 days a week,62,"Varied, including strength and cardio",Full gym access
998,35,Male,180,85,Lose weight and improve overall fitness,Advanced,3 days a week,81,"Cardio, running, cycling","Treadmill, resistance bands"


In [41]:
y.values

array([2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 1,
       1, 1, 1, 1, 2, 1, 2, 2, 2, 1, 1, 1, 2, 1, 2, 2, 1, 1, 2, 1, 1, 1,
       2, 1, 2, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 2, 2, 2, 2, 1,
       1, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 1, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2,
       2, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2, 2,
       1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1,
       2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 2, 2, 1,
       2, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2,
       1, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 2,
       2, 1, 2, 1, 1, 2, 1, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2,
       1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1,
       2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1,
       1, 2, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2,
       1, 1, 2, 1, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2,

In [42]:
# transforming the data part
preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', num_features),  # pass through numeric features
        ('cat', OneHotEncoder(), cat_features)  # apply OneHotEncoder to categorical features
    ])

# visualizing the transformed data
print(preprocessor.fit_transform(X,y))  

# pipelining the transformed data with the clasifier model into a single model
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier())
])

[[ 36. 179.  75. ...   0.   0.   0.]
 [ 25. 160.  62. ...   0.   0.   0.]
 [ 25. 160.  55. ...   0.   1.   0.]
 ...
 [ 36. 177.  75. ...   0.   0.   0.]
 [ 35. 180.  85. ...   0.   1.   0.]
 [ 30. 171.  71. ...   1.   0.   0.]]


In [43]:
model.fit(X,y)

Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('num', 'passthrough',
                                                  ['Age', 'Height_cm',
                                                   'Weight_kg',
                                                   'Time_per_Session_mins']),
                                                 ('cat', OneHotEncoder(),
                                                  ['Gender', 'Fitness_Goals',
                                                   'Fitness_Level',
                                                   'Days_Available',
                                                   'Exercise_Preferences',
                                                   'Gym_Equipment'])])),
                ('classifier', RandomForestClassifier())])

In [44]:
y_pred = model.predict(X)
y_pred

array([2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 1,
       1, 1, 1, 1, 2, 1, 2, 2, 2, 1, 1, 1, 2, 1, 2, 2, 1, 1, 2, 1, 1, 1,
       2, 2, 2, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 2, 2, 2, 2, 1,
       1, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 1, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2,
       2, 1, 2, 2, 1, 2, 1, 1, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2, 2,
       1, 2, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1,
       2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2, 1, 1, 1, 2, 2, 1,
       2, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2,
       1, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 2,
       2, 1, 2, 1, 1, 2, 1, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2,
       1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 2, 1,
       2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1,
       1, 2, 1, 1, 2, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2,
       1, 2, 2, 1, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2,

In [45]:
from sklearn.metrics import accuracy_score
score = accuracy_score(y,y_pred)
print(score)

0.969


In [46]:
import pickle
with open("classifier.pkl","wb") as pickle_out:
    pickle.dump(model,pickle_out)
    pickle_out.close()

In [47]:
test_user = [{
'Age': 22,
'Gender': 'Male',
'Height_cm': 190,
'Weight_kg': 85,
'Fitness_Goals': 'Maintain overall health and well-being',
'Fitness_Level': 'Beginner',
'Days_Available':'5 days a week' ,
'Time_per_Session_mins': 50,
'Exercise_Preferences': 'Varied, including strength and cardio',
'Gym_Equipment':'Full gym access'
}]
prediction = model.predict(pd.DataFrame(test_user))
print(f'I recommend you play plan number {prediction[0]}')

""""
{
  "Age": 24,
  "Gender": "Male",
  "Height_cm": 180,
  "Weight_kg": 85,
  "Fitness_Goals": "Maintain overall health and well-being",
  "Fitness_Level": "Beginner",
  "Days_Available": "5 days a week",
  "Time_per_Session_mins": 50,
  "Exercise_Preferences": "Varied, including strength and cardio",
  "Gym_Equipment": "Full gym access"
}
"""

I recommend you play plan number 2
