In [2]:
import numpy as np
import pandas as pd
from math import sqrt
from collections import Counter

# Load the dataset
df = pd.read_csv("loan_data.csv")

# Extract X (features) and Y (target/loan sanctioned or not)
x = df.iloc[:, :-1].values  # Features: Age, Income, Number of Credit Cards
y = df.iloc[:, -1].values   # Target: Loan Sanctioned (Yes/No or Regression)

# Define the new loan application with placeholders for unknown values
# Example: Age=31, Income=None (unknown), Number of Credit Cards=2, Loan Status=None (unknown)
new_application = np.array([31, None, 2, None])  # Modify according to your test case

# Euclidean distance calculation function
def euclidean_distance(row, new_app):
    distance = 0
    # Loop through features and skip if the new_app value is None
    for i in range(len(new_app) - 1):  # Last index is for Loan Sanctioned, so exclude it
        if new_app[i] is not None:  # Only consider if new_app has a value
            distance += (row[i] - new_app[i]) ** 2
    return sqrt(distance)

# Calculate Euclidean distances for the new application
df['Euclidean_Distance'] = df.apply(lambda row: euclidean_distance(row[:-1], new_application), axis=1)

# Sort by Euclidean Distance
df_sorted = df.sort_values(by='Euclidean_Distance')

# Function to suggest an odd value of k
def suggest_odd_k():
    k = int(input("Please enter a value for k: "))
    
    if k % 2 == 0:
        below = k - 1
        above = k + 1
        print(f"You entered an even number, which is not valid for KNN!")        
        choice = input(f"Please enter {below} or {above} as your odd value for k: ")
        
        if choice in [str(below), str(above)]:
            return int(choice)
        else:
            print("Invalid choice. Please run the program again.")
            return suggest_odd_k()  # Recursively ask for input again
    else:
        return k

# Get a valid odd value for 'k'
k = suggest_odd_k()

# Select the k nearest neighbors
k_nearest_neighbors = df_sorted.head(k)

# Classification (majority voting for loan sanctioned status)
def majority_voting(neighbors):
    target_values = neighbors.iloc[:, -2].values  # Loan sanctioned column (Yes/No)
    vote_count = Counter(target_values)
    return vote_count.most_common(1)[0][0]  # Return the most common value (Yes/No)

# Regression (average voting for continuous value prediction)
def average_voting(neighbors):
    target_values = neighbors.iloc[:, 1].values  # Income column
    return np.mean(target_values)

# Determine if we need to predict Loan Sanctioned or Income
if new_application[1] is None:  # If Income is unknown, perform regression
    predicted_income = average_voting(k_nearest_neighbors)
    print(f"Regression result (Predicted Income): {predicted_income}")
elif new_application[3] is None:  # If Loan Sanctioned status is unknown, perform classification
    loan_result = majority_voting(k_nearest_neighbors)
    print(f"Classification result (Loan Sanctioned or Not): {loan_result}")
else:
    print("Both Income and Loan Sanctioned Status are known; no prediction needed.")

# Show the k-nearest neighbors
print(f"\nThe {k} nearest neighbors are:\n", k_nearest_neighbors)


  distance += (row[i] - new_app[i]) ** 2


Please enter a value for k:  6


You entered an even number, which is not valid for KNN!


Please enter 5 or 7 as your odd value for k:  7


Regression result (Predicted Income): 69862.0

The 7 nearest neighbors are:
     Age  Income  Number of Credit Cards  Loan Sanctioned  Euclidean_Distance
3    32   87221                       0                0            2.236068
10   28   25311                       5                0            4.242641
11   28  103104                       5                1            4.242641
8    36   84925                       3                1            5.099020
5    25   20769                       2                0            6.000000
6    38   79735                       1                0            7.071068
9    40   87969                       5                1            9.486833
