In [13]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import statsmodels.api as sm
import time
import itertools
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.cross_decomposition import PLSRegression
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import plot_tree
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor

In [26]:
df = pd.read_csv('calories.csv')
df['Gender'] = df['Gender'].map({'male': 0, 'female': 1})
df.drop(['User_ID'], axis=1, inplace=True)
df

Unnamed: 0,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories
0,0,68,190.0,94.0,29.0,105.0,40.8,231.0
1,1,20,166.0,60.0,14.0,94.0,40.3,66.0
2,0,69,179.0,79.0,5.0,88.0,38.7,26.0
3,1,34,179.0,71.0,13.0,100.0,40.5,71.0
4,1,27,154.0,58.0,10.0,81.0,39.8,35.0
...,...,...,...,...,...,...,...,...
14995,1,20,193.0,86.0,11.0,92.0,40.4,45.0
14996,1,27,165.0,65.0,6.0,85.0,39.2,23.0
14997,1,43,159.0,58.0,16.0,90.0,40.1,75.0
14998,0,78,193.0,97.0,2.0,84.0,38.3,11.0


In [15]:
X = df.drop(['Calories'], axis=1)
y = df['Calories']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [16]:
bagging_model = RandomForestRegressor(n_estimators=500, random_state=42, oob_score=True)
bagging_model.fit(X_train, y_train)

y_pred_bagging = bagging_model.predict(X_test)
test_mse_bagging = mean_squared_error(y_test, y_pred_bagging)

print(f"Test MSE for Bagging (Random Forest): {test_mse_bagging}")

# Feature Importance
importances = bagging_model.feature_importances_
feature_importances = pd.Series(importances, index=X_train.columns)
feature_importances = feature_importances.sort_values(ascending=False)
print("\nFeature Importances:")
feature_importances

Test MSE for Bagging (Random Forest): 6.916138568

Feature Importances:


Duration      0.914048
Heart_Rate    0.048105
Age           0.026350
Gender        0.006776
Weight        0.002985
Height        0.001248
Body_Temp     0.000489
dtype: float64

In [17]:
# R-squared
# Test Accuracy via MSE
# Training Accuracy via MSE
# AUC?
# F1?


In [18]:
from sklearn.metrics import f1_score, roc_auc_score
from sklearn.metrics import accuracy_score

In [19]:
#Preductions on training data
y_train_pred = bagging_model.predict(X_train)
test_mse_boost = mean_squared_error(y_test, y_pred_bagging)

#MSE
train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_pred_bagging)

#MAE
train_mae = mean_absolute_error(y_train, y_train_pred)
test_mae = mean_absolute_error(y_test, y_pred_bagging)

# R^2
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_pred_bagging)

print(f"Training MSE: {train_mse}")
print(f"Test MSE: {test_mse}")
print(f"Training MAE: {train_mae}")
print(f"Test MAE: {test_mae}")
print(f"Training R-squared: {train_r2}")
print(f"Test R-squared: {test_r2}")

Training MSE: 1.0929336293333332
Test MSE: 6.916138568
Training MAE: 0.637364
Test MAE: 1.6695346666666664
Training R-squared: 0.9997172838314758
Test R-squared: 0.9982862993276923


In [20]:
from xgboost import XGBRegressor

In [None]:
#loading the model
boost_model = XGBRegressor(n_estimators=500)
#training the model with X_train
boost_model.fit(X_train,y_train)

In [24]:
y_pred_boost = boost_model.predict(X_test)
test_mse_boost = mean_squared_error(y_test, y_pred_boost)

#MSE
train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_pred_boost)

#MAE
train_mae = mean_absolute_error(y_train, y_train_pred)
test_mae = mean_absolute_error(y_test, y_pred_boost)

# R^2
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_pred_boost)

print(f"Training MSE: {train_mse}")
print(f"Test MSE: {test_mse}")
print(f"Training MAE: {train_mae}")
print(f"Test MAE: {test_mae}")
print(f"Training R-squared: {train_r2}")
print(f"Test R-squared: {test_r2}")

Training MSE: 1.0929336293333332
Test MSE: 3.4537828599802753
Training MAE: 0.637364
Test MAE: 1.253790535847346
Training R-squared: 0.9997172838314758
Test R-squared: 0.9991442117662971


In [25]:
pd.read_csv('exercise_dataset.csv')

Unnamed: 0,ID,Exercise,Calories Burn,Dream Weight,Actual Weight,Age,Gender,Duration,Heart Rate,BMI,Weather Conditions,Exercise Intensity
0,1,Exercise 2,286.959851,91.892531,96.301115,45,Male,37,170,29.426275,Rainy,5
1,2,Exercise 7,343.453036,64.165097,61.104668,25,Male,43,142,21.286346,Rainy,5
2,3,Exercise 4,261.223465,70.846224,71.766724,20,Male,20,148,27.899592,Cloudy,4
3,4,Exercise 5,127.183858,79.477008,82.984456,33,Male,39,170,33.729552,Sunny,10
4,5,Exercise 10,416.318374,89.960226,85.643174,29,Female,34,118,23.286113,Cloudy,3
...,...,...,...,...,...,...,...,...,...,...,...,...
3859,3860,Exercise 5,154.113144,98.147291,96.745133,20,Female,22,139,32.447764,Cloudy,7
3860,3861,Exercise 4,486.392768,97.598957,92.700573,21,Female,49,160,26.602475,Rainy,5
3861,3862,Exercise 4,264.307731,94.946612,96.778936,57,Male,56,167,31.435348,Rainy,9
3862,3863,Exercise 9,185.951870,64.743906,68.662889,58,Female,60,128,19.774614,Rainy,1


In [27]:
pd.read_csv('activity_data_heartrate.csv')

Unnamed: 0,Id,Date,TotalSteps,TotalDistance,TotalActiveMinutes,Heart_rate,Calories
0,4558609924,2016-04-12,5135,3.39,318,76.639377,1909
1,4558609924,2016-04-14,6799,4.49,279,81.556556,1922
2,4558609924,2016-04-20,4803,3.17,222,76.923272,1788
3,4558609924,2016-04-24,8563,5.66,395,83.969233,2173
4,4558609924,2016-05-01,3428,2.27,190,70.841904,1692
...,...,...,...,...,...,...,...
329,6962181067,2016-04-17,10145,6.71,313,76.664489,2027
330,6962181067,2016-05-04,10147,6.71,335,79.474591,2086
331,6962181067,2016-04-16,13217,8.74,333,82.040182,2173
332,6775888955,2016-04-19,0,0.00,0,99.461538,1841
