In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("data/gym_members_exercise_tracking.csv")
df.head()

In [None]:
df.isnull().sum()

In [None]:
df = pd.get_dummies(df, columns=["Workout_Type"])
df['Gender'] = df['Gender'].map({'Male': 0, 'Female': 1})
df.head()

In [None]:
X = df.drop(columns=["Fat_Percentage"], axis=1)
Y = df["Fat_Percentage"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [None]:
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))

In [None]:
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2']
}

In [None]:
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, n_jobs=-1, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

In [None]:
model_tuned = grid_search.best_estimator_
y_pred_tuned = model_tuned.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred_tuned))

In [None]:
plt.figure(figsize=(8, 6))
plt.scatter(y_test, y_pred_tuned, color='blue', alpha=0.6, label='Predicted vs Actual')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color='red', linestyle='--', label='Perfect Prediction Line')
plt.title('Actual vs Predicted Fat Percentage', fontsize=14)
plt.xlabel('Actual Fat Percentage', fontsize=12)
plt.ylabel('Predicted Fat Percentage', fontsize=12)
plt.grid(True)
plt.legend()
plt.show()