# **K-Nearest Neighbors (KNN) classifier** ( Loan Dataset )

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics

# Step 1: Load dataset
df = pd.read_csv("/content/loan.csv")

# Step 2: Handle missing values (if any)
df.fillna(df.median(numeric_only=True), inplace=True)  # Fill missing numerical values with median

# Step 3: Encode categorical features into numerical format
label_encoders = {}
categorical_columns = ["Gender", "Married", "Education", "Self_Employed", "Property_Area", "Loan_Status", "Dependents"]
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))  # Convert to string before encoding
    label_encoders[col] = le  # Store encoder for inverse transformation if needed

# Step 4: Define features (X) and target variable (Y)
X = df.drop(columns=["Loan_ID", "Loan_Status"])  # Features
y = df["Loan_Status"]  # Target (1 = Loan Approved, 0 = Loan Denied)

# Step 5: Split dataset into training (80%) and testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Normalize the features for better performance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 7: Train K-Nearest Neighbors (KNN) Classifier
k = 5  # Number of neighbors
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)

# Step 8: Make predictions
y_pred = knn.predict(X_test)

# Step 9: Evaluate Model Performance
accuracy = metrics.accuracy_score(y_test, y_pred)
print(f"KNN Model Accuracy: {accuracy:.2f}")

# Step 10: Display Classification Report
print("\nClassification Report:\n", metrics.classification_report(y_test, y_pred))


KNN Model Accuracy: 0.75

Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.40      0.52        43
           1       0.74      0.94      0.83        80

    accuracy                           0.75       123
   macro avg       0.76      0.67      0.68       123
weighted avg       0.75      0.75      0.72       123

