In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the processed dataset
data = pd.read_csv("processed_dataset.csv")
data.head()

Unnamed: 0,Gender,Age,family_history_with_overweight,FAVC,FCVC,NCP,CAEC,SMOKE,CH2O,SCC,FAF,TUE,CALC,MTRANS,NObeyesdad,BMI
0,0,-0.522124,1,0,-0.785019,0.404153,2,0,-0.013073,0,-1.188039,0.561997,3,3,1,-0.663421
1,0,-0.522124,1,0,1.088342,0.404153,2,1,1.618759,1,2.33975,-1.080625,2,3,1,-0.681937
2,1,-0.206889,1,0,-0.785019,0.404153,2,0,-0.013073,0,1.16382,0.561997,1,3,1,-0.740967
3,1,0.423582,0,0,1.088342,0.404153,2,0,-0.013073,0,1.16382,-1.080625,1,4,5,-0.355619
4,1,-0.364507,0,0,-0.785019,-2.167023,2,0,-0.013073,0,-1.188039,-1.080625,2,3,6,-0.169522


In [3]:
# Define function to train and test model
def train_and_test_model(data, include_bmi=True):
    # Include or exclude BMI column based on input
    if not include_bmi:
        data = data.drop(columns=["BMI"])

    # Separate features and target
    X = data.drop(columns=["NObeyesdad"])
    y = data["NObeyesdad"]

    # Split into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

    # Train the model
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)

    # Make predictions
    y_pred = model.predict(X_test)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy (with{'out' if not include_bmi else ''} BMI): {accuracy:.2f}")
    print(classification_report(y_test, y_pred))

    return model

In [5]:
# Train and test with BMI
print("Model Performance with BMI:")
model_with_bmi = train_and_test_model(data, include_bmi=True)

Model Performance with BMI:
Accuracy (with BMI): 0.96
              precision    recall  f1-score   support

           0       1.00      0.98      0.99        82
           1       0.85      0.99      0.91        86
           2       0.97      0.99      0.98       106
           3       1.00      0.99      0.99        89
           4       1.00      1.00      1.00        97
           5       0.95      0.86      0.90        87
           6       0.99      0.93      0.96        87

    accuracy                           0.96       634
   macro avg       0.97      0.96      0.96       634
weighted avg       0.97      0.96      0.96       634



In [7]:
# Train and test without BMI
print("\nModel Performance without BMI:")
model_without_bmi = train_and_test_model(data, include_bmi=False)


Model Performance without BMI:
Accuracy (without BMI): 0.84
              precision    recall  f1-score   support

           0       0.88      0.85      0.86        82
           1       0.61      0.76      0.68        86
           2       0.81      0.87      0.84       106
           3       0.94      0.92      0.93        89
           4       0.99      0.99      0.99        97
           5       0.82      0.74      0.78        87
           6       0.86      0.71      0.78        87

    accuracy                           0.84       634
   macro avg       0.84      0.83      0.84       634
weighted avg       0.85      0.84      0.84       634

