In [12]:
#Import libraries
import warnings
warnings.filterwarnings('ignore')
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler
import pandas as pd

In [2]:
data = pd.read_csv('ml_data.csv')
data.head()

Unnamed: 0,bust,hips,waist,height,gender,whr,bhr,body_type
0,8,8,7,24,female,0.875,1.0,Rectangle
1,8,19,14,63,female,0.736842,0.421053,Pear
2,9,20,17,49,female,0.85,0.45,Pear
3,9,45,41,60,female,0.911111,0.2,Pear
4,10,11,9,46,female,0.818182,0.909091,Pear


In [3]:
X = data.drop(['body_type','height','gender'], axis=1) # Feature Matrix

In [4]:
y = data['body_type']  # Target Vector

In [5]:
from imblearn.over_sampling import SMOTE

In [6]:
# Split the data and stratify
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42)

In [7]:
# Apply SMOTE only on training data
sm = SMOTE(random_state=42, k_neighbors=1)

X_train_res, y_train_res = sm.fit_resample(X_train, y_train)

In [8]:
# Scale features after resampling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_res)
X_test_scaled = scaler.transform(X_test)

In [16]:
print("Before SMOTE:", y_train.value_counts())
print("After SMOTE:", y_train_res.value_counts())

Before SMOTE: body_type
Pear                 48
Inverted Triangle    14
Rectangle             8
Hourglass             4
Apple                 2
Name: count, dtype: int64
After SMOTE: body_type
Pear                 48
Inverted Triangle    48
Apple                48
Hourglass            48
Rectangle            48
Name: count, dtype: int64


In [9]:
# Train KNN model
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train_scaled, y_train_res)

In [10]:
# Predictions
y_pred = knn.predict(X_test_scaled)

In [13]:
accuracy_score(y_test, y_pred)

0.9

In [14]:
# Evaluate
print("--- Confusion Matrix ---")
print(confusion_matrix(y_test, y_pred))

--- Confusion Matrix ---
[[ 1  0  0  0  0]
 [ 0  1  0  0  0]
 [ 0  0  2  0  1]
 [ 0  0  0 13  0]
 [ 1  0  0  0  1]]


In [15]:
print("\n--- Classification Report ---")
print(classification_report(y_test, y_pred))


--- Classification Report ---
                   precision    recall  f1-score   support

            Apple       0.50      1.00      0.67         1
        Hourglass       1.00      1.00      1.00         1
Inverted Triangle       1.00      0.67      0.80         3
             Pear       1.00      1.00      1.00        13
        Rectangle       0.50      0.50      0.50         2

         accuracy                           0.90        20
        macro avg       0.80      0.83      0.79        20
     weighted avg       0.93      0.90      0.90        20

