In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, confusion_matrix
from imblearn.over_sampling import SMOTE  # Can't Read the file 

In [2]:
import warnings
warnings.filterwarnings('ignore')
np.random.seed(50)

In [3]:
# Generating synthetic data
height = np.random.randint(140, 200, 200)
weight = np.random.randint(40, 120, 200)
age = np.random.randint(18, 70, 200)
bmi = weight / ((height / 100) ** 2) #body mass index

In [4]:
# Updated category function
def category(bmi):
    if bmi <= 18.5:
        return "underweight"
    elif 18.5 < bmi <= 24.9:
        return "normal"
    elif 25 <= bmi <= 29.9:
        return "overweight"
    else:
        return "obese"  # Added an 'obse' category,
        #"The 'obese' category helps separate people who are very overweight from those who are just slightly overweight

In [5]:
# Apply the updated category function
categories = np.array([category(b) for b in bmi])

# Creating DataFrame
bmi_df = pd.DataFrame({
    "height": height,
    "weight": weight,
    "age": age,
    "bmi": bmi,
    "category": categories
})

In [6]:
# Encoding categorical labels
l_c = LabelEncoder()
bmi_df["category"] = l_c.fit_transform(bmi_df["category"])

In [7]:
# Feature Scaling
scaler = StandardScaler()
scaled_features = scaler.fit_transform(bmi_df[["height", "weight", "age"]])

In [8]:
# Applying PCA for dimensionality reduction
pca = PCA(n_components=2)
principal_components = pca.fit_transform(scaled_features)
bmi_df_pca = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])
bmi_df_pca['category'] = bmi_df['category']

In [9]:
# Handling class imbalance with SMOTE
X = bmi_df_pca[['PC1', 'PC2']]
y = bmi_df_pca['category']
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X, y)

In [10]:
# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=42)

In [11]:
# Building the KNN model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

In [12]:
# Predicting and evaluating
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

In [13]:
# Output results
print(f"Accuracy: {accuracy}")
print("Confusion Matrix:")
print(conf_matrix)

Accuracy: 0.5897435897435898
Confusion Matrix:
[[11  1  2  3]
 [ 0 10  4  2]
 [ 4  6 10  2]
 [ 3  2  3 15]]
