In [1]:
from google.colab import drive
drive.mount('/content/drive')

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pandas as pd
import numpy as np

Mounted at /content/drive


In [2]:
file_path = '/content/drive/MyDrive/ML/Classification/bmi_train.csv'

df = pd.read_csv(file_path)
df.columns = df.columns.str.strip().str.lower()
required_columns = {'gender', 'height', 'weight', 'index'}
if not required_columns.issubset(df.columns):
    raise ValueError(f"Your CSV must contain the following columns: {required_columns}")

In [3]:
le = LabelEncoder()
df['gender'] = le.fit_transform(df['gender'])

scaler = StandardScaler()
df[['height', 'weight']] = scaler.fit_transform(df[['height', 'weight']])


In [4]:
X = df[['gender', 'height', 'weight']]
y = df['index']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [5]:
lr_model = LogisticRegression(multi_class='ovr', max_iter=1000)
lr_model.fit(X_train, y_train)

dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)



In [6]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    return y_pred

print("Logistic Regression Performance:")
lr_pred = evaluate_model(lr_model, X_test, y_test)

print("\nDecision Tree Performance:")
dt_pred = evaluate_model(dt_model, X_test, y_test)


Logistic Regression Performance:
Accuracy: 0.63

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.00      0.00      0.00         6
           2       0.32      0.53      0.40        15
           3       0.00      0.00      0.00        15
           4       0.62      0.60      0.61        30
           5       0.83      1.00      0.91        50

    accuracy                           0.63       120
   macro avg       0.30      0.36      0.32       120
weighted avg       0.54      0.63      0.58       120


Confusion Matrix:
[[ 0  0  4  0  0  0]
 [ 0  0  5  1  0  0]
 [ 0  0  8  5  2  0]
 [ 0  0  6  0  9  0]
 [ 0  0  2  0 18 10]
 [ 0  0  0  0  0 50]]

Decision Tree Performance:
Accuracy: 0.83

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.75      0.86         4
           1       0.71      0.83      0.77         6
         

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [8]:
def predict_bmi(model, scaler, le):
    print("\nEnter your information:")
    gender_input = input("Gender (male/female): ").capitalize()
    if gender_input not in le.classes_:
        print(f"Invalid input! Please enter one of: {le.classes_}")
        return

    gender = le.transform([gender_input])[0]
    height = float(input("Your height (cm): "))
    weight = float(input("Your weight (kg): "))

    # Scale features
    scaled_features = scaler.transform([[height, weight]])
    height_scaled = scaled_features[0][0]
    weight_scaled = scaled_features[0][1]

    user_input = np.array([[gender, height_scaled, weight_scaled]])
    prediction = model.predict(user_input)[0]

    labels = ['Extremely Weak', 'Weak', 'Normal', 'Overweight', 'Obesity', 'Extremely Obesity']
    print(f"\nPrediction result: {labels[prediction]}")
predict_bmi(lr_model, scaler, le)


Enter your information:
Gender (male/female): male
Your height (cm): 185
Your weight (kg): 75

Prediction result: Overweight


