In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import joblib

# 1) Load datasets
calories = pd.read_csv("../data/calories.csv")
exercise = pd.read_csv("../data/exercise.csv")

# 2) Clean column names
calories.columns = calories.columns.str.strip().str.lower()
exercise.columns = exercise.columns.str.strip().str.lower()

# 3) Merge datasets on user_id
df = pd.merge(exercise, calories, on="user_id")

print(df.head())

# 4) Features and target
FEATURES = ["age", "height", "weight", "duration", "heart_rate", "body_temp"]
TARGET = "calories"

X = df[FEATURES]
y = df[TARGET]

# 5) Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 6) Train model
model = LinearRegression()
model.fit(X_train, y_train)

# 7) Save trained model
joblib.dump(model, "../models/calorie_model.pkl")

print("✅ Model trained and saved to ../models/calorie_model.pkl")


    user_id  gender  age  height  weight  duration  heart_rate  body_temp  \
0  14733363    male   68   190.0    94.0      29.0       105.0       40.8   
1  14861698  female   20   166.0    60.0      14.0        94.0       40.3   
2  11179863    male   69   179.0    79.0       5.0        88.0       38.7   
3  16180408  female   34   179.0    71.0      13.0       100.0       40.5   
4  17771927  female   27   154.0    58.0      10.0        81.0       39.8   

   calories  
0     231.0  
1      66.0  
2      26.0  
3      71.0  
4      35.0  
✅ Model trained and saved to ../models/calorie_model.pkl
