In [341]:
import numpy as np
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder

In [342]:
df = pd.read_csv("user.csv")
df

Unnamed: 0,name,muscle_group,weight_kg,height_cm,level,gender,workout
0,Abdallah,leg,80,185,advanced,male,1
1,Ezzat,leg,70,180,beginner,male,1
2,gom3a,leg,70,190,beginner,male,2
3,Saad,leg,80,185,intermediate,male,2
4,Ahmed,leg,75,170,beginner,male,1
5,Sara,leg,60,150,beginner,female,2


In [343]:
df.drop(columns=["name","muscle_group"],inplace=True)

features = df.drop(columns="workout").columns

# categorical and numerical featuers
num_features = [col for col in features if df[col].dtype != "object"]

cat_features = [col for col in features if df[col].dtype=="object"]

print(f"numerical features: {num_features}")
print(f"categorical features: {cat_features}")

numerical features: ['weight_kg', 'height_cm']
categorical features: ['level', 'gender']


In [344]:
X = df.drop(columns=["workout"])
y = df["workout"]

In [345]:
X

Unnamed: 0,weight_kg,height_cm,level,gender
0,80,185,advanced,male
1,70,180,beginner,male
2,70,190,beginner,male
3,80,185,intermediate,male
4,75,170,beginner,male
5,60,150,beginner,female


In [346]:
y.values

array([1, 1, 2, 2, 1, 2], dtype=int64)

In [347]:
# transforming the data part
preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', num_features),  # pass through numeric features
        ('cat', OneHotEncoder(), cat_features)  # apply OneHotEncoder to categorical features
    ])

# visualizing the transformed data
print(preprocessor.fit_transform(X,y))  

# pipelining the transformed data with the clasifier model into a single model
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier())
])

[[ 80. 185.   1.   0.   0.   0.   1.]
 [ 70. 180.   0.   1.   0.   0.   1.]
 [ 70. 190.   0.   1.   0.   0.   1.]
 [ 80. 185.   0.   0.   1.   0.   1.]
 [ 75. 170.   0.   1.   0.   0.   1.]
 [ 60. 150.   0.   1.   0.   1.   0.]]


In [348]:
model.fit(X,y)

Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('num', 'passthrough',
                                                  ['weight_kg', 'height_cm']),
                                                 ('cat', OneHotEncoder(),
                                                  ['level', 'gender'])])),
                ('classifier', RandomForestClassifier())])

In [349]:
y_pred = model.predict(X)
y_pred

array([1, 1, 2, 2, 1, 2], dtype=int64)

In [350]:
from sklearn.metrics import accuracy_score
score = accuracy_score(y,y_pred)
print(score)

1.0


In [351]:
import pickle
with open("classifier.pkl","wb") as pickle_out:
    pickle.dump(model,pickle_out)
    pickle_out.close()

In [352]:
test_user = [{'weight_kg': 80,'height_cm':180, 'level':'beginner','gender':'male'}]
prediction = model.predict(pd.DataFrame(test_user))
print(f'I recommend you play workout number {prediction[0]}')

I recommend you play workout number 1
