In [34]:
import csv
from random import shuffle
from math import sqrt
from collections import Counter

class DataHandler:
    def __init__(self, filepath):
        # Constructor to initialize the filepath
        self.filepath = filepath 

    def read_csv(self):
        # Read data from a CSV file and store it in a list
        with open(self.filepath, 'r') as file:
            csv_reader = csv.reader(file)
            next(csv_reader)  # Skip the header row
            dataset = [row for row in csv_reader]
        return dataset

    def train_test_split(self, dataset, test_size=0.2):
        # Shuffle the dataset to ensure randomness
        shuffle(dataset)
        # Determine the split index based on the test size
        split_index = int(len(dataset) * (1 - test_size))
        # Split the dataset into training and testing sets
        return dataset[:split_index], dataset[split_index:]

    def separate_features_labels(self, dataset):
        # Separate the features and labels from the dataset
        # Convert the feature values to floats for computation
        features = [list(map(float, data[1:-1])) for data in dataset]  # Exclude ID and label
        labels = [data[-1] for data in dataset]  # The label is the last element in each row
        return features, labels


In [35]:
class KNNClassifier: # Class to implement the K-Nearest Neighbors classifier
    def __init__(self, k=3): # using 3 as our value for k
        self.k = k # Set the value of k for the KNN classifier
        self.X_train = [] # Initialize a list for the training features
        self.y_train = [] # Initialize a list for the  training labels

    def fit(self, X, y):
        # Set the training features and labels to the provided data
        self.X_train = X 
        self.y_train = y

    def _euclidean_distance(self, point1, point2): #using euclidean to calculate the distance from the test data point to all other training data points.
        # Calculate Euclidean distance between two points
        distance = sqrt(sum((x - y) ** 2 for x, y in zip(point1, point2)))
        return distance

    def _get_neighbors(self, input_features): 
        # Calculate the distance from the input_features to all training data
        distances = [(self._euclidean_distance(input_features, x_train), y_train) for x_train, y_train in zip(self.X_train, self.y_train)]
        # Sort the distances in ascending order and get the k-nearest neighbors
        distances.sort(key=lambda x: x[0])
        neighbors = distances[:self.k]
        # Return the labels of the k-nearest neighbors        
        return [neighbor[1] for neighbor in neighbors]

    def predict_single(self, input_features):
        # Predict the class for a single input instance
        neighbors = self._get_neighbors(input_features)
        # Determine the most common class from the neighbors
        most_common = Counter(neighbors).most_common(1)[0][0]
        return most_common

    def predict(self, X):
        # Predict the class for each instance in the input data
        return [self.predict_single(features) for features in X]

    def classification_report(self, y_true, y_pred):
        # Generate a classification report comparing the true labels and predicted labels
        unique_labels = set(y_true)
        report = {}
        for label in unique_labels:
            tp = sum(1 for i in range(len(y_true)) if y_true[i] == label and y_pred[i] == label)
            fp = sum(1 for i in range(len(y_true)) if y_true[i] != label and y_pred[i] == label)
            fn = sum(1 for i in range(len(y_true)) if y_true[i] == label and y_pred[i] != label)
            tn = sum(1 for i in range(len(y_true)) if y_true[i] != label and y_pred[i] != label)

            precision = tp / (tp + fp) if tp + fp > 0 else 0
            recall = tp / (tp + fn) if tp + fn > 0 else 0
            f1 = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
            accuracy = (tp + tn) / len(y_true)

            report[label] = {
                'Precision': precision,
                'Recall': recall,
                'F1-score': f1,
                'Accuracy': accuracy
            }

        return report


In [36]:
def main():
    # Define the path to the CSV file containing the Bank dataset
    filepath = 'bank.csv'
    
    # Initialize the data handler with the filepath
    # This object will handle all data operations    
    data_handler = DataHandler(filepath)
    
    # Read the dataset from the CSV file using the read_csv method
    # The dataset is returned as a list of lists, where each sublist is a row from the file    
    dataset = data_handler.read_csv()
    
    # Split the dataset into training and testing parts using the train_test_split method
    # Default split is 80% training and 20% testing    
    train_set, test_set = data_handler.train_test_split(dataset)
    
    # Separate features and labels for the training set
    # train_features will contain the data attributes, and train_labels will contain the target labels
    train_features, train_labels = data_handler.separate_features_labels(train_set)
    
    # Separate features and labels for the testing set
    # This setup mirrors the training separation
    test_features, test_labels = data_handler.separate_features_labels(test_set)

    # Initialize the KNN Classifier
    # This object will perform all classification tasks
    classifier = KNNClassifier(k=3)
    
    # Fit the classifier on the training data
    # This process involves calculating necessary statistical parameters for the Naive Bayes algorithm
    classifier.fit(train_features, train_labels)
    
    # Predict the class labels for the test set features
    # The predict method uses the trained model to estimate the labels of unseen data
    predictions = classifier.predict(test_features)
    
    # Generate a classification report comparing the true labels and predicted labels
    # This report includes precision, recall, F1-score, and accuracy for each class
    report = classifier.classification_report(test_labels, predictions)
    
    print("KNN Classification Report for Bank Loan Granting:")
    print("_______________________________________________")
    print()
    print("Class 1 - Individual Is Likely To Be Granted A Loan.")
    print("Class 0 - Individual Is NOT Likely To Be Granted A Loan. \n")
    print("Instructions: \n")
    print("Enter 0/1 When Prompted. For Questions Regarding Mortage/Income Follow This Method ->")
    print("If $$$ is $1,000 Enter 1, $10,000 Enter 10, $100,000 enter 100 etc. \n")

    print("Classification Report:\n")
    # Print out the classification report for each class
    for label, metrics in report.items():
        print(f"Class {label}:")
        for metric, value in metrics.items():
            print(f"  {metric}: {value:.2f}")
        print()
        
    # while loop to gather user inputted data 
    while True:
        print("Enter the following details (or type 'exit' to quit):")
        try:
            age = input("Age: ")
            if age.lower() == 'exit':
                break
            experience = input("Experience: ")
            if experience.lower() == 'exit':
                break
            income = input("Income: ")
            if income.lower() == 'exit':
                break
            zip_code = input("ZIP Code: ")
            if zip_code.lower() == 'exit':
                break
            family = input("Family size: ")
            if family.lower() == 'exit':
                break
            ccavg = input("Average spending on credit cards per month (CCAvg): ")
            if ccavg.lower() == 'exit':
                break
            education = input("Education level (1: Undergraduate, 2: Graduate, 3: Advanced/Professional): ")
            if education.lower() == 'exit':
                break
            mortgage = input("Mortgage value: ")
            if mortgage.lower() == 'exit':
                break
            securities_account = input("Securities Account (0: No, 1: Yes): ")
            if securities_account.lower() == 'exit':
                break
            cd_account = input("CD Account (0: No, 1: Yes): ")
            if cd_account.lower() == 'exit':
                break
            online = input("Online banking (0: No, 1: Yes): ")
            if online.lower() == 'exit':
                break
            credit_card = input("Credit Card (0: No, 1: Yes): ")
            if credit_card.lower() == 'exit':
                break
                
            # user input_features is an array that is stored with user inputted data 
            input_features = [
                float(age),
                float(experience),
                float(income),
                float(zip_code),  # ZIP code as a feature might need to be normalized
                float(family),
                float(ccavg),
                float(education),
                float(mortgage),
                float(securities_account),
                float(cd_account),
                float(online),
                float(credit_card)
            ]
            
            # predictions will be made based off of the user inputted data using the predict.single method from the classifier
            prediction = classifier.predict_single(input_features)
            print(f"Predicted class: {prediction}")
            print()
        except ValueError:
            print("Invalid input. Please enter numeric values where applicable.")
            
# This block checks if this script is the main program and runs the main function
if __name__ == "__main__":
    main()


KNN Classification Report for Bank Loan Granting:
_______________________________________________

Class 1 - Individual Is Likely To Be Granted A Loan.
Class 0 - Individual Is NOT Likely To Be Granted A Loan. 

Instructions: 

Enter 0/1 When Prompted. For Questions Regarding Mortage/Income Follow This Method ->
If $$$ is $1,000 Enter 1, $10,000 Enter 10, $100,000 enter 100 etc. 

Classification Report:

Class 1:
  Precision: 0.23
  Recall: 0.15
  F1-score: 0.19
  Accuracy: 0.60

Class 0:
  Precision: 0.69
  Recall: 0.79
  F1-score: 0.73
  Accuracy: 0.60

Enter the following details (or type 'exit' to quit):
Age: 25
Experience: 2
Income: 3
ZIP Code: 90017
Family size: 4
Average spending on credit cards per month (CCAvg): 1
Education level (1: Undergraduate, 2: Graduate, 3: Advanced/Professional): 1
Mortgage value: 1
Securities Account (0: No, 1: Yes): 1
CD Account (0: No, 1: Yes): 1
Online banking (0: No, 1: Yes): 1
Credit Card (0: No, 1: Yes): 1
Predicted class: 1

Enter the following 