# 🏋️‍♂️ Personal Fitness Tracker - Data Analysis & Model Training
This notebook explores the fitness tracker dataset, performs data preprocessing, and trains models for calorie prediction.

In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score

In [None]:
# Load the datasets
calories_data = pd.read_csv('calories.csv')
exercise_data = pd.read_csv('exercise.csv')

# Merge datasets
data = pd.merge(exercise_data, calories_data, on='User_ID')
data.head()

## 📊 Data Visualization

In [None]:
# Scatter plot of Exercise Duration vs Calories Burned
fig = px.scatter(data, x='Duration', y='Calories_Burned', color='BMI', title='Duration vs Calories Burned')
fig.show()

## 🏗️ Model Training & Evaluation

In [None]:
# Prepare data for model training
X = data[['Age', 'BMI', 'Duration', 'Heart_Rate']]
y = data['Calories_Burned']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train RandomForest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Model Performance
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Absolute Error: {mae:.2f}')
print(f'R-squared Score: {r2:.2f}')