In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import joblib
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [3]:
df = pd.read_csv(r"C:\Users\ankus\fitness_tracker_ml\data\fitness_data_realistic.csv")
df.head()

Unnamed: 0,Date,Workout Type,Duration (mins),Calories Burned,Intensity (RPE),Avg Heart Rate,Weight,Age,Sex
0,01-01-2024,Strength,33,201,7,142,72,22,M
1,02-01-2024,Rest,0,0,0,63,72,22,M
2,03-01-2024,Cardio,48,389,8,151,72,22,M
3,04-01-2024,HIIT,45,411,8,149,72,22,M
4,05-01-2024,Mobility,40,117,2,94,72,22,M


In [4]:
# Encode 'Sex' as binary
df['Sex'] = df['Sex'].map({'M': 1, 'F': 0})

# Define features and target
X = df[[
    "Workout Type", "Duration (mins)", "Intensity (RPE)",
    "Avg Heart Rate", "Weight", "Age", "Sex"
]]
y = df["Calories Burned"]

# One-hot encode workout type
encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')
workout_encoded = encoder.fit_transform(X[["Workout Type"]])
workout_df = pd.DataFrame(workout_encoded, columns=encoder.get_feature_names_out(["Workout Type"]))

# Combine numerical and encoded features
X_final = pd.concat([X.drop(columns=["Workout Type"]).reset_index(drop=True), workout_df.reset_index(drop=True)], axis=1)
X_final.head()




Unnamed: 0,Duration (mins),Intensity (RPE),Avg Heart Rate,Weight,Age,Sex,Workout Type_Cardio,Workout Type_HIIT,Workout Type_Mobility,Workout Type_Rest,Workout Type_Strength,Workout Type_Yoga
0,33,7,142,72,22,1,0.0,0.0,0.0,0.0,1.0,0.0
1,0,0,63,72,22,1,0.0,0.0,0.0,1.0,0.0,0.0
2,48,8,151,72,22,1,1.0,0.0,0.0,0.0,0.0,0.0
3,45,8,149,72,22,1,0.0,1.0,0.0,0.0,0.0,0.0
4,40,2,94,72,22,1,0.0,0.0,1.0,0.0,0.0,0.0


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X_final, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f"MAE: {mae:.2f} kcal")


MAE: 25.97 kcal


In [6]:
joblib.dump(model, "../models/calorie_rf.pkl")
joblib.dump(encoder, "../models/workout_encoder.pkl")  # Save encoder too


['../models/workout_encoder.pkl']

In [7]:
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"✅ MAE: {mae:.2f} kcal")
print(f"✅ RMSE: {rmse:.2f} kcal")
print(f"✅ R² Score: {r2:.2f}")

✅ MAE: 25.97 kcal
✅ RMSE: 35.85 kcal
✅ R² Score: 0.95
