# 03. Feature Engineering

In this notebook:
- We load the previously split datasets
- Construct domain-specific features such as BMI, Pulse Pressure, and Vision Average
- Apply the same feature transformations consistently across training, validation, and test sets
- Save the updated datasets for further preprocessing

In [1]:
import sys
from pathlib import Path

project_root = Path.cwd().parent
sys.path.append(str(project_root))

In [2]:
import pandas as pd
from pathlib import Path
from scripts.feature_engineering import create_features

TRAIN_DATA_PATH = Path('../data/processed/train.csv')
VAL_DATA_PATH = Path('../data/processed/val.csv')
TEST_DATA_PATH = Path('../data/processed/test.csv')

OUTPUT_TRAIN_PATH = Path('../data/processed/train_fe.csv')
OUTPUT_VAL_PATH = Path('../data/processed/val_fe.csv')
OUTPUT_TEST_PATH = Path('../data/processed/test_fe.csv')

df_train = pd.read_csv(TRAIN_DATA_PATH)
df_val = pd.read_csv(VAL_DATA_PATH)
df_test = pd.read_csv(TEST_DATA_PATH)

print("Training set shape:", df_train.shape)
print("Validation set shape:", df_val.shape)
print("Test set shape:", df_test.shape)

# Apply feature engineering
df_train_fe = create_features(df_train)
df_val_fe = create_features(df_val)
df_test_fe = create_features(df_test)

print("\nFeature engineering completed.")

df_train_fe.to_csv(OUTPUT_TRAIN_PATH, index=False)
df_val_fe.to_csv(OUTPUT_VAL_PATH, index=False)
df_test_fe.to_csv(OUTPUT_TEST_PATH, index=False)

print(f"\nFeature-engineered datasets saved successfully to 'data/processed/'.")


Training set shape: (693942, 24)
Validation set shape: (148702, 24)
Test set shape: (148702, 24)

Feature engineering completed.

Feature-engineered datasets saved successfully to 'data/processed/'.


In [4]:
df_train_fe

Unnamed: 0,sex,age,height,weight,waistline,sight_left,sight_right,hear_left,hear_right,SBP,...,mean_arterial_pressure,vision_avg,hearing_avg,AST_ALT_ratio,risk_waist,risk_bp,risk_tg,risk_chole,risk_glu,metabolic_risk_count
0,Female,45,170,70,85.5,1.0,1.2,1.0,1.0,108.0,...,81.333333,1.10,1.0,2.125000,1,0,0,1,0,2
1,Female,55,160,65,79.0,0.8,0.7,1.0,1.0,114.0,...,83.333333,0.75,1.0,1.384615,0,0,0,1,1,2
2,Female,55,155,80,101.9,0.9,1.0,1.0,1.0,116.0,...,83.333333,0.95,1.0,0.736842,1,0,1,0,1,3
3,Female,65,150,55,75.0,0.9,0.8,1.0,1.0,136.0,...,101.333333,0.85,1.0,1.400000,0,1,1,1,0,3
4,Female,40,155,60,85.0,0.9,1.2,1.0,1.0,117.0,...,85.000000,1.05,1.0,1.545455,1,0,0,1,1,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
693937,Female,55,155,55,78.0,0.3,0.5,1.0,1.0,113.0,...,85.000000,0.40,1.0,1.000000,0,0,0,0,0,0
693938,Male,40,175,90,86.0,0.9,1.0,1.0,1.0,105.0,...,78.333333,0.95,1.0,1.172414,0,0,1,1,1,3
693939,Male,60,170,75,92.0,2.0,1.2,1.0,1.0,126.0,...,95.333333,1.60,1.0,0.916667,1,0,0,0,1,2
693940,Male,55,170,85,92.0,0.8,0.7,1.0,1.0,130.0,...,96.666667,0.75,1.0,0.676471,1,1,1,1,1,5
