In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [2]:
exercise_df = pd.read_csv(
    "https://raw.githubusercontent.com/haobo-yuan/IDS706-FinalProject/refs/heads/main/exercise_dataset.csv?token=GHSAT0AAAAAACWVU3DE4LYU3NJ7QJFMAHNGZ2U3LYQ"
)

exercise_df.head()

Unnamed: 0,ID,Exercise,Calories Burn,Dream Weight,Actual Weight,Age,Gender,Duration,Heart Rate,BMI,Weather Conditions,Exercise Intensity
0,1,Exercise 2,286.959851,91.892531,96.301115,45,Male,37,170,29.426275,Rainy,5
1,2,Exercise 7,343.453036,64.165097,61.104668,25,Male,43,142,21.286346,Rainy,5
2,3,Exercise 4,261.223465,70.846224,71.766724,20,Male,20,148,27.899592,Cloudy,4
3,4,Exercise 5,127.183858,79.477008,82.984456,33,Male,39,170,33.729552,Sunny,10
4,5,Exercise 10,416.318374,89.960226,85.643174,29,Female,34,118,23.286113,Cloudy,3


In [3]:
exercise_df.drop(
    columns=["ID", "Exercise", "Weather Conditions"], inplace=True
)
# Data manipulation: calculate height from bmi and weight:
exercise_df["Height"] = np.sqrt(
    exercise_df["Actual Weight"] / exercise_df["BMI"]
)

exercise_df["Estimated MET"] = exercise_df.apply(
    lambda row: row["Calories Burn"]
    / (row["Actual Weight"] * row["Duration"] * 0.0175),
    axis=1,
)


exercise_df.dropna(inplace=True)
exercise_df

Unnamed: 0,Calories Burn,Dream Weight,Actual Weight,Age,Gender,Duration,Heart Rate,BMI,Exercise Intensity,Height,Estimated MET
0,286.959851,91.892531,96.301115,45,Male,37,170,29.426275,5,1.809039,4.602036
1,343.453036,64.165097,61.104668,25,Male,43,142,21.286346,5,1.694286,7.469413
2,261.223465,70.846224,71.766724,20,Male,20,148,27.899592,4,1.603846,10.399705
3,127.183858,79.477008,82.984456,33,Male,39,170,33.729552,10,1.568531,2.245601
4,416.318374,89.960226,85.643174,29,Female,34,118,23.286113,3,1.917776,8.169883
...,...,...,...,...,...,...,...,...,...,...,...
3859,154.113144,98.147291,96.745133,20,Female,22,139,32.447764,7,1.726721,4.137613
3860,486.392768,97.598957,92.700573,21,Female,49,160,26.602475,5,1.866724,6.118861
3861,264.307731,94.946612,96.778936,57,Male,56,167,31.435348,9,1.754613,2.786782
3862,185.951870,64.743906,68.662889,58,Female,60,128,19.774614,1,1.863404,2.579225


In [4]:
def total_calories_to_burn(actual_weight, dream_weight):
    # 1 kilogram of fat = 7700 calories
    calories_per_kg = 7700
    weight_difference = actual_weight - dream_weight
    if weight_difference <= 0:
        return "No need to lose weight!"
    total_calories = weight_difference * calories_per_kg
    return total_calories


# Example usage
actual_weight = 80  # kg
dream_weight = 70  # kg
total_calories = total_calories_to_burn(actual_weight, dream_weight)
print(f"Total Calories Needed to Burn: {total_calories:.2f} kcal")

Total Calories Needed to Burn: 77000.00 kcal


In [5]:
exercise_df = pd.get_dummies(exercise_df, columns=["Gender"], drop_first=True)
print(exercise_df.columns)
X = exercise_df[
    ["Height", "Age", "Exercise Intensity", "Gender_Male", "Actual Weight"]
]
y = exercise_df["Estimated MET"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

Index(['Calories Burn', 'Dream Weight', 'Actual Weight', 'Age', 'Duration',
       'Heart Rate', 'BMI', 'Exercise Intensity', 'Height', 'Estimated MET',
       'Gender_Male'],
      dtype='object')


In [6]:
coefficients = model.coef_
intercept = model.intercept_

print("Intercept:", intercept)
for feature, coef in zip(X.columns, coefficients):
    print(f"Coefficient for {feature}: {coef}")

lr_formula = f"Estimated MET = {intercept:.4f}"
for feature, coef in zip(X.columns, coefficients):
    lr_formula += f" + ({coef:.4f} * {feature})"

print("\nLinear Regression Formula:")
print(lr_formula)

Intercept: 13.779529818527086
Coefficient for Height: -0.04898703607665184
Coefficient for Age: -5.529158271672108e-05
Coefficient for Exercise Intensity: -0.022959345564065634
Coefficient for Gender_Male: 0.0020696764326581974
Coefficient for Actual Weight: -0.09384683185171883

Linear Regression Formula:
Estimated MET = 13.7795 + (-0.0490 * Height) + (-0.0001 * Age) + (-0.0230 * Exercise Intensity) + (0.0021 * Gender_Male) + (-0.0938 * Actual Weight)


In [7]:
def calculate_exercise_duration(
    target_calories, height, age, exercise_intensity, gender, actual_weight
):
    # Coefficients from the linear regression formula
    intercept = 13.7795
    coef_height = -0.0490
    coef_age = -0.0001
    coef_intensity = -0.0230
    coef_gender = 0.0021
    coef_weight = -0.0938

    # Gender_Male: 1 represents Male, 0 represents Female
    gender_male = 1 if gender.lower() == "male" else 0

    # Calculate the Estimated MET
    estimated_met = (
        intercept
        + coef_height * height
        + coef_age * age
        + coef_intensity * exercise_intensity
        + coef_gender * gender_male
        + coef_weight * actual_weight
    )

    # Ensure the estimated MET is positive
    if estimated_met <= 0:
        raise ValueError(
            "Estimated MET value is invalid. Please check your input parameters."
        )

    # Calculate calorie burn rate (kcal/min)
    calorie_burn_rate = estimated_met * actual_weight * 0.0175

    # Calculate the required exercise duration (minutes)
    duration = target_calories / calorie_burn_rate

    return duration, estimated_met

In [8]:
# Example input
height = 1.68  # Height in meters
actual_weight = 60
dream_weight = 50
age = 27  # Age in years
exercise_intensity = 5  # Exercise intensity level (1-10)
gender = "Female"  # Gender ('Male' or 'Female')
total_time = 12  # Weeks
frequency_per_week = 5  # Exercise 3 times per week

# Step 1: Calculate total calories to burn
total_calories = total_calories_to_burn(actual_weight, dream_weight)

# Check if weight loss is needed
if isinstance(total_calories, str):
    print(total_calories)
else:
    print(f"Total calories to burn: {total_calories:.2f} kcal")

    # Step 2: Calculate target calories per session
    target_calories = total_calories / (total_time * frequency_per_week)
    print(f"Target calories per session: {target_calories:.2f} kcal")

    # Step 3: Calculate exercise duration per session
    try:
        duration, estimated_met = calculate_exercise_duration(
            target_calories,
            height,
            age,
            exercise_intensity,
            gender,
            actual_weight,
        )
        print(f"Estimated MET: {estimated_met:.2f}")
        print(
            f"You need to exercise for approximately {duration:.2f} minutes per session to burn {target_calories:.2f} kcal."
        )
    except ValueError as e:
        print(e)

Total calories to burn: 77000.00 kcal
Target calories per session: 1283.33 kcal
Estimated MET: 7.95
You need to exercise for approximately 153.71 minutes per session to burn 1283.33 kcal.
