In [104]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score



df = pd.read_csv("calories.csv")

In [109]:
# View first rows
print("Dataset preview:")
print(df.head())

# Preprocessing:
# Check for missing values
print("\nMissing Values:")
print(df.isnull().sum())

# Select features and target
X = df.drop(["Calories", "User_ID"], axis=1) # Exclude User_ID from features
y = df["Calories"]

# Categorical feature
cat_features = ["Gender"]

# Numerical features
num_features = ["Age", "Weight", "Height", "Duration", "Heart_Rate", "Body_Temp"]

# Preprocess: OneHotEncoding for Activity
preprocess = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(sparse_output=False, handle_unknown="ignore"), cat_features),
    ],
    remainder="passthrough"
)


# Model Building
model = Pipeline([
    ("prep", preprocess),
    ("reg", LinearRegression())
])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train
model.fit(X_train, y_train)

# Evaluation
y_pred = model.predict(X_test)

print("\nModel Evaluation Metrics:")
print("R2 Score:", r2_score(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))

# Prediction for Given Input
input_data = pd.DataFrame({
    "Age": [25],
    "Weight": [70],
    "Height": [175],
    "Duration": [60],
    "Heart_Rate": [130],
    "Body_Temp": [40.0],
    "Gender": ["male"]
})

pred_calories = model.predict(input_data)

print("\nPredicted Calories Burned for:")
print(input_data)
print("==> Calories Burned:", round(pred_calories[0], 2), "kcal")

Dataset preview:
    User_ID  Gender  Age  Height  Weight  Duration  Heart_Rate  Body_Temp  \
0  14733363    male   68   190.0    94.0      29.0       105.0       40.8   
1  14861698  female   20   166.0    60.0      14.0        94.0       40.3   
2  11179863    male   69   179.0    79.0       5.0        88.0       38.7   
3  16180408  female   34   179.0    71.0      13.0       100.0       40.5   
4  17771927  female   27   154.0    58.0      10.0        81.0       39.8   

   Calories  
0     231.0  
1      66.0  
2      26.0  
3      71.0  
4      35.0  

Missing Values:
User_ID       0
Gender        0
Age           0
Height        0
Weight        0
Duration      0
Heart_Rate    0
Body_Temp     0
Calories      0
dtype: int64

Model Evaluation Metrics:
R2 Score: 0.9672937151257293
RMSE: 11.488940149152931

Predicted Calories Burned for:
   Age  Weight  Height  Duration  Heart_Rate  Body_Temp Gender
0   25      70     175        60         130       40.0   male
==> Calories Burned: 44