### Workflow from data loading to model training

In [17]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer

In [6]:
df = pd.read_csv('data/clean/clean_data.csv')

In [7]:
df.head()

Unnamed: 0,age,gender,height_cm,weight_kg,activity_type,duration_minutes,intensity,calories_burned,avg_heart_rate,hours_sleep,stress_level,daily_steps,hydration_level,bmi,resting_heart_rate,blood_pressure_systolic,blood_pressure_diastolic,smoking_status,fitness_level
0,56,F,165.3,53.7,Dancing,41,Low,3.3,103,6.6,3,7128,1.5,19.6,69.5,110.7,72.9,Never,0.04
1,56,F,165.3,53.9,Swimming,28,Low,2.9,102,8.1,7,7925,1.8,19.6,69.5,110.7,72.9,Never,0.07
2,56,F,165.3,54.2,Swimming,21,Medium,2.6,126,6.2,7,7557,2.7,19.6,69.5,110.7,72.9,Never,0.09
3,56,F,165.3,54.4,Weight Training,99,Medium,10.7,141,7.2,8,11120,2.6,19.6,69.5,110.7,72.9,Never,0.21
4,56,F,165.3,54.7,Swimming,100,Medium,12.7,112,7.1,1,5406,1.5,19.6,69.5,110.7,72.9,Never,0.33


In [10]:
train_data, test_data = train_test_split(df, test_size=0.3, random_state=42)

In [11]:
train_data.shape, test_data.shape

((481390, 19), (206311, 19))

In [12]:
X_train, X_test, y_train, y_test = (
    train_data.drop('fitness_level', axis=1),
    test_data.drop('fitness_level', axis=1),
    train_data['fitness_level'],
    test_data['fitness_level']
)

In [22]:
numerical_columns = X_train.select_dtypes('number').columns
categorical_columns = X_train.select_dtypes('object').columns

In [26]:
numerical_pipeline = Pipeline([
    ('impute', SimpleImputer(strategy='mean')),
    ('scaling', StandardScaler())
])

categorical_pipeline = Pipeline([
    ('impute', SimpleImputer(strategy='most_frequent')),
    ('encoding', OneHotEncoder(sparse_output=False)),
    ('scaling', StandardScaler())
])

In [27]:
preprocessor = ColumnTransformer([
    ('numerical_trf', numerical_pipeline, numerical_columns),
    ('categorical_trf', categorical_pipeline, categorical_columns)
])

In [28]:
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

In [41]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import R2Score

In [35]:
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.5),

    Dense(64, activation='relu'),
    Dropout(0.3),

    Dense(1)
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [42]:
model.compile(loss=MeanSquaredError(), optimizer=Adam(), metrics=[R2Score()])

In [43]:
model.summary()

In [44]:
es_callback = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [45]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, callbacks=[es_callback])

Epoch 1/10
[1m15044/15044[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 3ms/step - loss: 1.5568 - r2_score: 0.9487 - val_loss: 1.2009 - val_r2_score: 0.9604
Epoch 2/10
[1m15044/15044[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 3ms/step - loss: 1.0808 - r2_score: 0.9642 - val_loss: 1.3758 - val_r2_score: 0.9546
Epoch 3/10
[1m15044/15044[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 3ms/step - loss: 1.0030 - r2_score: 0.9669 - val_loss: 1.2217 - val_r2_score: 0.9597
Epoch 4/10
[1m15044/15044[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 3ms/step - loss: 0.9675 - r2_score: 0.9680 - val_loss: 1.2423 - val_r2_score: 0.9590
Epoch 5/10
[1m15044/15044[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 3ms/step - loss: 0.9537 - r2_score: 0.9684 - val_loss: 1.3235 - val_r2_score: 0.9564
Epoch 6/10
[1m15044/15044[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 5ms/step - loss: 0.9565 - r2_score: 0.9684 - val_loss: 1.1196 - val_r2_score: 0.96

<keras.src.callbacks.history.History at 0x23fa76f47c0>