# Lifestyle Models Training: 2A, 2B, 2C

Model 2A: Lifestyle Classifier
Model 2B: Activity Predictor
Model 2C: Health Risk Scorer
Framework: MindSpore


## Import Libraries


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import (mean_squared_error, r2_score, mean_absolute_error,
                             accuracy_score, precision_recall_fscore_support, 
                             confusion_matrix, classification_report)
import warnings
warnings.filterwarnings('ignore')

import mindspore
from mindspore import nn, ops, Tensor, context
from mindspore import save_checkpoint
import mindspore.dataset as ds

context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")



MindSpore version: 2.6.0
Libraries imported successfully!
Execution mode: PyNative (dynamic graph)
Device: CPU


## Load Data


In [None]:
df = pd.read_csv('../data/processed/lifestyle_processed.csv')
print(f"Shape: {df.shape}")
print(df['lifestyle_category'].value_counts())
print(df['next_day_calories_burned'].describe())
print(df['health_risk_score'].describe())


Dataset shape: (20168, 121)

Columns: 121 total
Samples: 20168

TARGET VARIABLES

1. lifestyle_category (Model 2A - Classification):
lifestyle_category
active       10846
sedentary     8303
athletic      1019
Name: count, dtype: int64

2. next_day_calories_burned (Model 2B - Regression):
count    20168.000000
mean      1280.912615
std        502.600894
min        323.990000
25%        910.800000
50%       1232.115000
75%       1554.072500
max       2890.820000
Name: next_day_calories_burned, dtype: float64

3. health_risk_score (Model 2C - Regression):
count    20168.000000
mean        27.041303
std         18.124198
min          0.000000
25%         14.000000
50%         24.000000
75%         38.000000
max         76.000000
Name: health_risk_score, dtype: float64

Data loaded successfully!


## Prepare Features


In [None]:
feature_candidates = [
    'Age_scaled', 'Weight (kg)_scaled', 'Height (m)_scaled', 'BMI_scaled',
    'Fat_Percentage_scaled', 'lean_body_mass_scaled', 'fat_mass_scaled',
    'Max_BPM_scaled', 'Avg_BPM_scaled', 'Resting_BPM_scaled',
    'Session_Duration (hours)_scaled', 'Calories_Burned_scaled',
    'Workout_Frequency (days/week)_scaled', 'workout_intensity',
    'Carbs_scaled', 'Proteins_scaled', 'Fats_scaled', 'Calories_scaled',
    'Water_Intake (liters)_scaled', 'protein_per_kg_scaled',
    'heart_rate_reserve_scaled', 'training_intensity_scaled',
    'calories_per_hour_scaled', 'water_per_kg_scaled',
    'caloric_balance_scaled', 'weekly_activity_load_scaled',
    'Experience_Level', 'Daily meals frequency', 'Physical exercise'
]

available_features = [col for col in feature_candidates if col in df.columns]
X = df[available_features].fillna(0).values.astype(np.float32)
input_dim = X.shape[1]
print(f"Features: {len(available_features)}, Input dim: {input_dim}")


Available features: 29
Features (first 10): ['Age_scaled', 'Weight (kg)_scaled', 'Height (m)_scaled', 'BMI_scaled', 'Fat_Percentage_scaled', 'lean_body_mass_scaled', 'fat_mass_scaled', 'Max_BPM_scaled', 'Avg_BPM_scaled', 'Resting_BPM_scaled']

FEATURE MATRIX PREPARATION
Feature matrix shape: (20168, 29)
Number of features: 29
Number of samples: 20168

Input dimension for all models: 29


# Model 2A: Lifestyle Classifier


## Prepare Data


In [None]:
y_2a = df['lifestyle_category_encoded'].values.astype(np.int32)
lifestyle_classes = sorted(df['lifestyle_category'].unique())
num_classes_2a = len(lifestyle_classes)

X_2a_train, X_2a_test, y_2a_train, y_2a_test = train_test_split(
    X, y_2a, test_size=0.2, random_state=42, stratify=y_2a
)
print(f"Train: {X_2a_train.shape[0]}, Test: {X_2a_test.shape[0]}, Classes: {num_classes_2a}")

MODEL 2A: LIFESTYLE CLASSIFICATION
Target shape: (20168,)
Number of classes: 3
Class names: ['active', 'athletic', 'sedentary']

Class distribution:
  active: 10846 (53.8%)
  athletic: 1019 (5.1%)
  sedentary: 8303 (41.2%)

Train: 16134 | Test: 4034


## Train Model 2A


In [None]:
class LifestyleClassifier(nn.Cell):
    def __init__(self, input_dim, num_classes):
        super(LifestyleClassifier, self).__init__()
        self.fc1 = nn.Dense(input_dim, 128)
        self.fc2 = nn.Dense(128, 64)
        self.fc3 = nn.Dense(64, 32)
        self.fc4 = nn.Dense(32, num_classes)
        self.relu = nn.ReLU()
        self.dropout1 = nn.Dropout(keep_prob=0.7)
        self.dropout2 = nn.Dropout(keep_prob=0.7)
        self.dropout3 = nn.Dropout(keep_prob=0.8)
        
    def construct(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout1(x)
        x = self.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.relu(self.fc3(x))
        x = self.dropout3(x)
        return self.fc4(x)

model_2a = LifestyleClassifier(input_dim, num_classes_2a)
lr_2a, epochs_2a, batch_size_2a = 0.001, 80, 64
loss_fn_2a = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
optimizer_2a = nn.Adam(model_2a.trainable_params(), learning_rate=lr_2a)

def forward_2a(data, label):
    return loss_fn_2a(model_2a(data), label)

grad_fn_2a = ops.value_and_grad(forward_2a, None, optimizer_2a.parameters)

def train_step_2a(data, label):
    loss, grads = grad_fn_2a(data, label)
    optimizer_2a(grads)
    return loss

train_losses_2a = []
train_accs_2a = []
model_2a.set_train(True)

for epoch in range(epochs_2a):
    epoch_loss = 0.0
    epoch_correct = 0
    epoch_total = 0
    num_batches = 0
    
    for i in range(0, len(X_2a_train), batch_size_2a):
        batch_X = Tensor(X_2a_train[i:i+batch_size_2a], mindspore.float32)
        batch_y = Tensor(y_2a_train[i:i+batch_size_2a], mindspore.int32)
        loss = train_step_2a(batch_X, batch_y)
        epoch_loss += loss.asnumpy()
        model_2a.set_train(False)
        preds = model_2a(batch_X)
        pred_classes = ops.argmax(preds, 1).asnumpy()
        epoch_correct += (pred_classes == batch_y.asnumpy()).sum()
        epoch_total += len(batch_y)
        model_2a.set_train(True)
        num_batches += 1
    
    avg_loss = epoch_loss / num_batches
    avg_acc = epoch_correct / epoch_total * 100
    train_losses_2a.append(avg_loss)
    train_accs_2a.append(avg_acc)
    
    if (epoch + 1) % 10 == 0 or epoch == 0:
        print(f"Epoch [{epoch+1:2d}/{epochs_2a}] Loss: {avg_loss:.4f} Acc: {avg_acc:.2f}%")

print(f"Final accuracy: {train_accs_2a[-1]:.2f}%")




Model 2A Architecture: 29 → 128 → 64 → 32 → 3

TRAINING MODEL 2A

Epoch [ 1/80] Loss: 0.5373 Acc: 76.59%
Epoch [10/80] Loss: 0.2788 Acc: 84.90%
Epoch [20/80] Loss: 0.2637 Acc: 85.96%
Epoch [30/80] Loss: 0.2319 Acc: 89.83%
Epoch [40/80] Loss: 0.1976 Acc: 92.32%
Epoch [50/80] Loss: 0.1669 Acc: 94.01%
Epoch [60/80] Loss: 0.1470 Acc: 94.81%
Epoch [70/80] Loss: 0.1481 Acc: 94.66%
Epoch [80/80] Loss: 0.1375 Acc: 95.12%

Training complete! Final accuracy: 95.12%


## Evaluate & Save


In [None]:
model_2a.set_train(False)
y_2a_pred = ops.argmax(model_2a(Tensor(X_2a_test, mindspore.float32)), 1).asnumpy()

acc_2a = accuracy_score(y_2a_test, y_2a_pred)
prec_2a, rec_2a, f1_2a, _ = precision_recall_fscore_support(y_2a_test, y_2a_pred, average='weighted')

print(f"Accuracy: {acc_2a:.4f}, Precision: {prec_2a:.4f}, Recall: {rec_2a:.4f}, F1: {f1_2a:.4f}")
print(confusion_matrix(y_2a_test, y_2a_pred))
print(classification_report(y_2a_test, y_2a_pred, target_names=lifestyle_classes))

import json
save_checkpoint(model_2a, '../models/model_2A_lifestyle_classifier.ckpt')
with open('../models/model_2A_metadata.json', 'w') as f:
    json.dump({
        'model_name': 'Lifestyle Classifier (Model 2A)',
        'model_type': 'Classification',
        'framework': 'MindSpore',
        'input_dim': input_dim,
        'output_dim': num_classes_2a,
        'classes': lifestyle_classes,
        'metrics': {'accuracy': float(acc_2a), 'precision': float(prec_2a), 'recall': float(rec_2a), 'f1_score': float(f1_2a)}
    }, f, indent=2)


MODEL 2A EVALUATION
Accuracy:  0.9460 (94.60%)
Precision: 0.9579
Recall:    0.9460
F1-Score:  0.9492

Confusion Matrix:
[[2001  132   36]
 [   2  202    0]
 [  48    0 1613]]

Classification Report:
              precision    recall  f1-score   support

      active       0.98      0.92      0.95      2169
    athletic       0.60      0.99      0.75       204
   sedentary       0.98      0.97      0.97      1661

    accuracy                           0.95      4034
   macro avg       0.85      0.96      0.89      4034
weighted avg       0.96      0.95      0.95      4034


✓ Model 2A saved to ../models/model_2A_lifestyle_classifier.ckpt


# Model 2B: Activity Predictor


## Train & Evaluate


In [None]:
y_2b = df['next_day_calories_burned'].values.astype(np.float32).reshape(-1, 1)
X_2b_train, X_2b_test, y_2b_train, y_2b_test = train_test_split(X, y_2b, test_size=0.2, random_state=42)

class ActivityPredictor(nn.Cell):
    def __init__(self, input_dim):
        super(ActivityPredictor, self).__init__()
        self.fc1 = nn.Dense(input_dim, 128)
        self.fc2 = nn.Dense(128, 64)
        self.fc3 = nn.Dense(64, 32)
        self.fc4 = nn.Dense(32, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(keep_prob=0.7)
        
    def construct(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.relu(self.fc3(x))
        return self.fc4(x)

model_2b = ActivityPredictor(input_dim)
lr_2b, epochs_2b, batch_size_2b = 0.001, 80, 64
loss_fn_2b = nn.MSELoss()
optimizer_2b = nn.Adam(model_2b.trainable_params(), learning_rate=lr_2b)

def forward_2b(data, label):
    return loss_fn_2b(model_2b(data), label)

grad_fn_2b = ops.value_and_grad(forward_2b, None, optimizer_2b.parameters)

def train_step_2b(data, label):
    loss, grads = grad_fn_2b(data, label)
    optimizer_2b(grads)
    return loss

train_losses_2b = []
model_2b.set_train(True)

for epoch in range(epochs_2b):
    epoch_loss = 0.0
    num_batches = 0
    for i in range(0, len(X_2b_train), batch_size_2b):
        batch_X = Tensor(X_2b_train[i:i+batch_size_2b], mindspore.float32)
        batch_y = Tensor(y_2b_train[i:i+batch_size_2b], mindspore.float32)
        loss = train_step_2b(batch_X, batch_y)
        epoch_loss += loss.asnumpy()
        num_batches += 1
    avg_loss = epoch_loss / num_batches
    train_losses_2b.append(avg_loss)
    if (epoch + 1) % 10 == 0 or epoch == 0:
        print(f"Epoch [{epoch+1:2d}/{epochs_2b}] Loss: {avg_loss:.4f}")

model_2b.set_train(False)
y_2b_pred = model_2b(Tensor(X_2b_test, mindspore.float32)).asnumpy()
mse_2b = mean_squared_error(y_2b_test, y_2b_pred)
rmse_2b = np.sqrt(mse_2b)
mae_2b = mean_absolute_error(y_2b_test, y_2b_pred)
r2_2b = r2_score(y_2b_test, y_2b_pred)

print(f"MSE: {mse_2b:.2f}, RMSE: {rmse_2b:.2f}, MAE: {mae_2b:.2f}, R2: {r2_2b:.4f}")

save_checkpoint(model_2b, '../models/model_2B_activity_predictor.ckpt')
with open('../models/model_2B_metadata.json', 'w') as f:
    json.dump({
        'model_name': 'Activity Predictor (Model 2B)',
        'model_type': 'Regression',
        'framework': 'MindSpore',
        'input_dim': input_dim,
        'metrics': {'MSE': float(mse_2b), 'RMSE': float(rmse_2b), 'MAE': float(mae_2b), 'R2': float(r2_2b)}
    }, f, indent=2)




MODEL 2B: ACTIVITY PREDICTOR (REGRESSION)
Target: next_day_calories_burned
Train: 16134 | Test: 4034
Target range: [324, 2891]

Training Model 2B...
Epoch [ 1/80] Loss: 674076.4307
Epoch [10/80] Loss: 24800.5288
Epoch [20/80] Loss: 22456.1992
Epoch [30/80] Loss: 20612.2045
Epoch [40/80] Loss: 16499.6071
Epoch [50/80] Loss: 12333.1120
Epoch [60/80] Loss: 10928.5067
Epoch [70/80] Loss: 10262.5051
Epoch [80/80] Loss: 9489.3439

MODEL 2B EVALUATION
MSE:  39068.52
RMSE: 197.66 calories
MAE:  180.32 calories
R²:   0.8428

✓ Model 2B saved!


# Model 2C: Health Risk Scorer


## Train & Evaluate


In [None]:
y_2c = df['health_risk_score'].values.astype(np.float32).reshape(-1, 1)
X_2c_train, X_2c_test, y_2c_train, y_2c_test = train_test_split(X, y_2c, test_size=0.2, random_state=42)

class HealthRiskScorer(nn.Cell):
    def __init__(self, input_dim):
        super(HealthRiskScorer, self).__init__()
        self.fc1 = nn.Dense(input_dim, 128)
        self.fc2 = nn.Dense(128, 64)
        self.fc3 = nn.Dense(64, 32)
        self.fc4 = nn.Dense(32, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(keep_prob=0.7)
        
    def construct(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.relu(self.fc3(x))
        return self.fc4(x)

model_2c = HealthRiskScorer(input_dim)
lr_2c, epochs_2c, batch_size_2c = 0.001, 80, 64
loss_fn_2c = nn.MSELoss()
optimizer_2c = nn.Adam(model_2c.trainable_params(), learning_rate=lr_2c)

def forward_2c(data, label):
    return loss_fn_2c(model_2c(data), label)

grad_fn_2c = ops.value_and_grad(forward_2c, None, optimizer_2c.parameters)

def train_step_2c(data, label):
    loss, grads = grad_fn_2c(data, label)
    optimizer_2c(grads)
    return loss

train_losses_2c = []
model_2c.set_train(True)

for epoch in range(epochs_2c):
    epoch_loss = 0.0
    num_batches = 0
    for i in range(0, len(X_2c_train), batch_size_2c):
        batch_X = Tensor(X_2c_train[i:i+batch_size_2c], mindspore.float32)
        batch_y = Tensor(y_2c_train[i:i+batch_size_2c], mindspore.float32)
        loss = train_step_2c(batch_X, batch_y)
        epoch_loss += loss.asnumpy()
        num_batches += 1
    avg_loss = epoch_loss / num_batches
    train_losses_2c.append(avg_loss)
    if (epoch + 1) % 10 == 0 or epoch == 0:
        print(f"Epoch [{epoch+1:2d}/{epochs_2c}] Loss: {avg_loss:.4f}")

model_2c.set_train(False)
y_2c_pred = model_2c(Tensor(X_2c_test, mindspore.float32)).asnumpy()
mse_2c = mean_squared_error(y_2c_test, y_2c_pred)
rmse_2c = np.sqrt(mse_2c)
mae_2c = mean_absolute_error(y_2c_test, y_2c_pred)
r2_2c = r2_score(y_2c_test, y_2c_pred)

print(f"MSE: {mse_2c:.2f}, RMSE: {rmse_2c:.2f}, MAE: {mae_2c:.2f}, R2: {r2_2c:.4f}")

save_checkpoint(model_2c, '../models/model_2C_health_risk_scorer.ckpt')
with open('../models/model_2C_metadata.json', 'w') as f:
    json.dump({
        'model_name': 'Health Risk Scorer (Model 2C)',
        'model_type': 'Regression',
        'framework': 'MindSpore',
        'input_dim': input_dim,
        'metrics': {'MSE': float(mse_2c), 'RMSE': float(rmse_2c), 'MAE': float(mae_2c), 'R2': float(r2_2c)}
    }, f, indent=2)




MODEL 2C: HEALTH RISK SCORER (REGRESSION)
Target: health_risk_score (0-100)
Train: 16134 | Test: 4034
Target range: [0.0, 76.0]
Mean risk: 27.0

Training Model 2C...
Epoch [ 1/80] Loss: 238.4338
Epoch [10/80] Loss: 41.9553
Epoch [20/80] Loss: 34.2099
Epoch [30/80] Loss: 27.3177
Epoch [40/80] Loss: 23.5517
Epoch [50/80] Loss: 20.9839
Epoch [60/80] Loss: 19.7726
Epoch [70/80] Loss: 18.5072
Epoch [80/80] Loss: 17.3190

MODEL 2C EVALUATION
MSE:  31.92
RMSE: 5.65 risk points
MAE:  4.16 risk points
R²:   0.9027

✓ Model 2C saved!


## Summary

Model 2A: Lifestyle Classifier - models/model_2A_lifestyle_classifier.ckpt
Model 2B: Activity Predictor - models/model_2B_activity_predictor.ckpt
Model 2C: Health Risk Scorer - models/model_2C_health_risk_scorer.ckpt
