In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report

# ML models
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

# --- Load dataset ---
df = pd.read_csv(r"C:\DATA\data_4.1.csv")
df['BMI'] = df['weight_kg'] / (df['height_cm']/100)**2
def bmi_class(bmi):
    if bmi < 18.5: return 'underweight'
    elif bmi < 25: return 'normal'
    else: return 'overweight'
df['BMI_class'] = df['BMI'].apply(bmi_class)

# --- Features / Labels ---
X = df[['job','age','height_cm','weight_kg']]
y = df['BMI_class']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)

# --- Preprocessor: OneHot job + scale numeric ---
categorical = ['job']
numeric = ['age','height_cm','weight_kg']

preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical),
    ('num', StandardScaler(), numeric)
])

# --- Define 5 models ---
models = {
    "LogisticRegression": LogisticRegression(max_iter=1000),
    "DecisionTree": DecisionTreeClassifier(),
    "RandomForest": RandomForestClassifier(n_estimators=100),
    "SVC": SVC(),
    "KNN": KNeighborsClassifier(n_neighbors=5)
}

# --- Train and evaluate ---
for name, model in models.items():
    clf = Pipeline(steps=[('preprocess', preprocessor),
                          ('model', model)])
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {acc:.3f}")
    print(classification_report(y_test, y_pred))
    print("-"*50)


LogisticRegression Accuracy: 0.985
              precision    recall  f1-score   support

      normal       0.98      0.99      0.99       104
  overweight       0.99      1.00      0.99        94
 underweight       0.00      0.00      0.00         2

    accuracy                           0.98       200
   macro avg       0.66      0.66      0.66       200
weighted avg       0.98      0.98      0.98       200

--------------------------------------------------
DecisionTree Accuracy: 0.975
              precision    recall  f1-score   support

      normal       1.00      0.95      0.98       104
  overweight       0.95      1.00      0.97        94
 underweight       1.00      1.00      1.00         2

    accuracy                           0.97       200
   macro avg       0.98      0.98      0.98       200
weighted avg       0.98      0.97      0.98       200

--------------------------------------------------
RandomForest Accuracy: 0.920
              precision    recall  f1-score

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
