In [8]:
import math
import csv
from random import shuffle

class DataHandler:
    def __init__(self, filepath):
        self.filepath = filepath

    def read_csv(self):
        with open(self.filepath, 'r') as file:
            csv_reader = csv.reader(file)
            next(csv_reader)  # Skip the header row
            dataset = [row for row in csv_reader]
        return dataset

    def train_test_split(self, dataset, test_size=0.2):
        shuffle(dataset)
        split_index = int(len(dataset) * (1 - test_size))
        return dataset[:split_index], dataset[split_index:]

    def separate_features_labels(self, dataset):
        features = [list(map(float, data[1:-1])) for data in dataset]  # Exclude ID and label
        labels = [data[-1] for data in dataset]  # The label is the last element in each row
        return features, labels

class NaiveBayesClassifier:
    def __init__(self):
        self.means = {}
        self.stds = {}
        self.class_probabilities = {}

    def fit(self, X, y):
        self._calculate_class_probabilities(y)
        self._calculate_means_stds(X, y)

    def _calculate_class_probabilities(self, y):
        class_counts = {label: y.count(label) for label in set(y)}
        total_count = len(y)
        self.class_probabilities = {label: count / total_count for label, count in class_counts.items()}

    def _calculate_means_stds(self, X, y):
        for label in self.class_probabilities:
            label_features = [X[i] for i in range(len(X)) if y[i] == label]
            self.means[label] = [sum(f) / len(f) for f in zip(*label_features)]
            self.stds[label] = [math.sqrt(sum([(x - mean)**2 for x in f]) / len(f)) for mean, f in zip(self.means[label], zip(*label_features))]

    def predict_single(self, input_features):
        probabilities = {}
        for label, _ in self.means.items():
            probabilities[label] = self.class_probabilities[label]
            for i, feature in enumerate(input_features):
                probabilities[label] *= self._calculate_probability(feature, self.means[label][i], self.stds[label][i])
        return max(probabilities, key=probabilities.get)

    def _calculate_probability(self, x, mean, std):
        exponent = math.exp(-(math.pow(x-mean, 2) / (2 * math.pow(std, 2))))
        return (1 / (math.sqrt(2 * math.pi) * std)) * exponent

    def predict(self, X):
        return [self.predict_single(features) for features in X]

    def classification_report(self, y_true, y_pred):
        unique_labels = set(y_true)
        report = {}
        for label in unique_labels:
            tp = sum(1 for i in range(len(y_true)) if y_true[i] == label and y_pred[i] == label)
            fp = sum(1 for i in range(len(y_true)) if y_true[i] != label and y_pred[i] == label)
            fn = sum(1 for i in range(len(y_true)) if y_true[i] == label and y_pred[i] != label)
            tn = sum(1 for i in range(len(y_true)) if y_true[i] != label and y_pred[i] != label)

            precision = tp / (tp + fp) if tp + fp > 0 else 0
            recall = tp / (tp + fn) if tp + fn > 0 else 0
            f1 = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
            accuracy = (tp + tn) / len(y_true)

            report[label] = {
                'Precision': precision,
                'Recall': recall,
                'F1-score': f1,
                'Accuracy': accuracy
            }

        return report

def main():
    filepath = 'bank.csv'
    data_handler = DataHandler(filepath)
    dataset = data_handler.read_csv()
    train_set, test_set = data_handler.train_test_split(dataset)
    train_features, train_labels = data_handler.separate_features_labels(train_set)
    test_features, test_labels = data_handler.separate_features_labels(test_set)

    classifier = NaiveBayesClassifier()
    classifier.fit(train_features, train_labels)
    predictions = classifier.predict(test_features)
    report = classifier.classification_report(test_labels, predictions)

    print("Classification Report for Bank Loan Granting:\n")
    for label, metrics in report.items():
        if label == 1:
            print("Class 1 - The individual will be granted a loan by the bank.")
        else:
            print("Class 0 - The individual will not be granted a loan by the bank.")
        print(f"Class {label}:")
        for metric, value in metrics.items():
            print(f"  {metric}: {value:.2f}")
        print()

    # while loop to gather user inputted data 
    while True:
        print("Enter the following details (or type 'exit' to quit):")
        try:
            age = input("Age: ")
            if age.lower() == 'exit':
                break
            experience = input("Experience: ")
            if experience.lower() == 'exit':
                break
            income = input("Income: 1-1,000, 10-10,000 100-100,000 etc.")
            if income.lower() == 'exit':
                break
            zip_code = input("ZIP Code: ")
            if zip_code.lower() == 'exit':
                break
            family = input("Family size: ")
            if family.lower() == 'exit':
                break
            ccavg = input("Average spending on credit cards per month (CCAvg): ")
            if ccavg.lower() == 'exit':
                break
            education = input("Education level (1: Undergraduate, 2: Graduate, 3: Advanced/Professional): ")
            if education.lower() == 'exit':
                break
            mortgage = input("Mortgage value: ")
            if mortgage.lower() == 'exit':
                break
            securities_account = input("Securities Account (0: No, 1: Yes): ")
            if securities_account.lower() == 'exit':
                break
            cd_account = input("CD Account (0: No, 1: Yes): ")
            if cd_account.lower() == 'exit':
                break
            online = input("Online banking (0: No, 1: Yes): ")
            if online.lower() == 'exit':
                break
            credit_card = input("Credit Card (0: No, 1: Yes): ")
            if credit_card.lower() == 'exit':
                break

            # user input_features is an array that is stored with user inputted data 
            input_features = [
                float(age),
                float(experience),
                float(income),
                float(zip_code),  
                float(family),
                float(ccavg),
                float(education),
                float(mortgage),
                float(securities_account),
                float(cd_account),
                float(online),
                float(credit_card)
            ]
            
            # predictions will be made based off of the user inputted data using the predict.single method from the classifier
            prediction = classifier.predict_single(input_features)
            print(f"Predicted class: {prediction}")
            print()
        except ValueError:
            print("Invalid input. Please enter numeric values as prompted.")

if __name__ == "__main__":
    main()


Classification Report:

Class 1 - The individual will not be granted a loan by the bank.
Class 1:
  Precision: 0.92
  Recall: 0.18
  F1-score: 0.30
  Accuracy: 0.74

Class 1 - The individual will not be granted a loan by the bank.
Class 0:
  Precision: 0.73
  Recall: 0.99
  F1-score: 0.84
  Accuracy: 0.74

Enter the following details (or type 'exit' to quit):
Age: 20
Experience: 2
Income: 1-1,000, 10-10,000 100-100,000 etc.1
ZIP Code: 90089
Family size: 6
Average spending on credit cards per month (CCAvg): 0.5
Education level (1: Undergraduate, 2: Graduate, 3: Advanced/Professional): 1
Mortgage value: 0
Securities Account (0: No, 1: Yes): 0
CD Account (0: No, 1: Yes): 0
Online banking (0: No, 1: Yes): 1
Credit Card (0: No, 1: Yes): 0
Predicted class: 1

Enter the following details (or type 'exit' to quit):
Age: exit
