In [4]:
import csv
from random import shuffle

class DataHandler:
    def __init__(self, filepath):
        # Constructor to initialize the filepath
        self.filepath = filepath 

    def read_csv(self):
        # Read data from a CSV file and store it in a list
        with open(self.filepath, 'r') as file:
            csv_reader = csv.reader(file)
            next(csv_reader)  # Skip the header row
            dataset = [row for row in csv_reader]
        return dataset

    def train_test_split(self, dataset, test_size=0.2):
        # Shuffle the dataset to ensure randomness
        shuffle(dataset)
        # Determine the split index based on the test size
        split_index = int(len(dataset) * (1 - test_size))
        # Split the dataset into training and testing sets
        return dataset[:split_index], dataset[split_index:]

    def separate_features_labels(self, dataset):
        # Separate the features and labels from the dataset
        # Convert the feature values to floats for computation
        features = [list(map(float, data[1:-1])) for data in dataset]  # Exclude ID and label
        labels = [data[-1] for data in dataset]  # The label is the last element in each row
        return features, labels

In [5]:
import random
import math

class SVMClassifier:
    def __init__(self, learning_rate=0.01, epochs=1000, lambda_param=0.01):
        # Initialize the SVM classifier with specified parameters
        self.learning_rate = learning_rate  # Learning rate for weight updates
        self.epochs = epochs  # Number of iterations over the training set
        self.lambda_param = lambda_param  # Regularization parameter to prevent overfitting
        self.weights = None  # Placeholder for weights (will be initialized during training)
        self.bias = 0  # Initialize bias term to zero

    def normalize(self, X):
        # Normalize features to a 0-1 range
        X_normalized = []  # List to hold normalized columns
        for i in range(len(X[0])):  # Iterate over each feature/column
            col = [row[i] for row in X]  # Extract the column
            min_val = min(col)  # Find minimum value in the column
            max_val = max(col)  # Find maximum value in the column
            # Normalize the column values
            norm_col = [(x - min_val) / (max_val - min_val) if max_val - min_val != 0 else 0 for x in col]
            X_normalized.append(norm_col)  # Append normalized column to the list
        # Transpose the list to get the original row structure
        X_normalized = list(map(list, zip(*X_normalized)))
        return X_normalized  # Return normalized data

    def fit(self, X, y):
        # Train the SVM model
        X = self.normalize(X)  # Normalize the feature data
        n_features = len(X[0])  # Get the number of features
        self.weights = [0.0] * n_features  # Initialize weights to zero
        # Convert labels to 1 and -1
        y = [1 if label == '1' else -1 for label in y]

        # Perform gradient descent for the specified number of epochs
        for epoch in range(self.epochs):
            for i in range(len(X)):
                # Check if the data point is misclassified
                if y[i] * (sum([self.weights[j] * X[i][j] for j in range(n_features)]) + self.bias) < 1:
                    # Update weights and bias for misclassified points
                    for j in range(n_features):
                        self.weights[j] += self.learning_rate * ((y[i] * X[i][j]) - (2 * self.lambda_param * self.weights[j]))
                    self.bias += self.learning_rate * y[i]
                else:
                    # Update weights for correctly classified points (regularization)
                    for j in range(n_features):
                        self.weights[j] -= self.learning_rate * (2 * self.lambda_param * self.weights[j])

    def predict(self, X):
        # Predict the class labels for input features
        X = self.normalize(X)  # Normalize the feature data
        predictions = []  # List to store predictions
        for i in range(len(X)):
            # Compute the linear combination of weights and features plus bias
            linear_output = sum([self.weights[j] * X[i][j] for j in range(len(self.weights))]) + self.bias
            # Assign class label based on the sign of the linear output
            prediction = '1' if linear_output > 0 else '0'
            predictions.append(prediction)  # Append prediction to the list
        return predictions  # Return the list of predictions

    def classification_report(self, y_true, y_pred):
        # Generate a classification report comparing true and predicted labels
        unique_labels = set(y_true)  # Get the unique class labels
        report = {}  # Dictionary to store the report
        for label in unique_labels:
            # Calculate true positives, false positives, false negatives, and true negatives
            tp = sum(1 for i in range(len(y_true)) if y_true[i] == label and y_pred[i] == label)
            fp = sum(1 for i in range(len(y_true)) if y_true[i] != label and y_pred[i] == label)
            fn = sum(1 for i in range(len(y_true)) if y_true[i] == label and y_pred[i] != label)
            tn = sum(1 for i in range(len(y_true)) if y_true[i] != label and y_pred[i] != label)

            # Calculate precision, recall, F1-score, and accuracy
            precision = tp / (tp + fp) if tp + fp > 0 else 0
            recall = tp / (tp + fn) if tp + fn > 0 else 0
            f1 = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
            accuracy = (tp + tn) / len(y_true)

            # Store the metrics in the report dictionary
            report[label] = {
                'Precision': precision,
                'Recall': recall,
                'F1-score': f1,
                'Accuracy': accuracy
            }

        return report  # Return the classification report



In [None]:
def main():
    # Define the path to the CSV file containing the Bank dataset
    filepath = 'bank.csv'
    
    # Initialize the data handler with the filepath
    # This object will handle all data operations    
    data_handler = DataHandler(filepath)
    
    # Read the dataset from the CSV file using the read_csv method
    # The dataset is returned as a list of lists, where each sublist is a row from the file    
    dataset = data_handler.read_csv()
    
    # Split the dataset into training and testing parts using the train_test_split method
    # Default split is 80% training and 20% testing    
    train_set, test_set = data_handler.train_test_split(dataset)
    
    # Separate features and labels for the training set
    # train_features will contain the data attributes, and train_labels will contain the target labels
    train_features, train_labels = data_handler.separate_features_labels(train_set)
    
    # Separate features and labels for the testing set
    # This setup mirrors the training separation
    test_features, test_labels = data_handler.separate_features_labels(test_set)

    classifier = SVMClassifier()
    
    # Fit the classifier on the training data
    # This process involves calculating necessary statistical parameters for the Naive Bayes algorithm
    classifier.fit(train_features, train_labels)
    
    # Predict the class labels for the test set features
    # The predict method uses the trained model to estimate the labels of unseen data
    predictions = classifier.predict(test_features)
    
    # Generate a classification report comparing the true labels and predicted labels
    # This report includes precision, recall, F1-score, and accuracy for each class
    report = classifier.classification_report(test_labels, predictions)

    print("Classification Report for Bank Loan Granting:")
    print("_______________________________________________")
    print()
    print("Class 1 - Individual Is Likely To Be Granted A Loan.")
    print("Class 0 - Individual Is NOT Likely To Be Granted A Loan. \n")

    print("Classification Report:\n")
    # Print out the classification report for each class
    for label, metrics in report.items():
        print(f"Class {label}:")
        for metric, value in metrics.items():
            print(f"  {metric}: {value:.2f}")
        print()
        
    # while loop to gather user inputted data 
    while True:
        print("Enter the following details (or type 'exit' to quit):")
        try:
            age = input("Age: ")
            if age.lower() == 'exit':
                break
            experience = input("Experience: ")
            if experience.lower() == 'exit':
                break
            income = input("Income: ")
            if income.lower() == 'exit':
                break
            zip_code = input("ZIP Code: ")
            if zip_code.lower() == 'exit':
                break
            family = input("Family size: ")
            if family.lower() == 'exit':
                break
            ccavg = input("Average spending on credit cards per month (CCAvg): ")
            if ccavg.lower() == 'exit':
                break
            education = input("Education level (1: Undergraduate, 2: Graduate, 3: Advanced/Professional): ")
            if education.lower() == 'exit':
                break
            mortgage = input("Mortgage value: ")
            if mortgage.lower() == 'exit':
                break
            securities_account = input("Securities Account (0: No, 1: Yes): ")
            if securities_account.lower() == 'exit':
                break
            cd_account = input("CD Account (0: No, 1: Yes): ")
            if cd_account.lower() == 'exit':
                break
            online = input("Online banking (0: No, 1: Yes): ")
            if online.lower() == 'exit':
                break
            credit_card = input("Credit Card (0: No, 1: Yes): ")
            if credit_card.lower() == 'exit':
                break
                
            # user input_features is an array that is stored with user inputted data 
            input_features = [
                float(age),
                float(experience),
                float(income),
                float(zip_code),  # ZIP code as a feature might need to be normalized
                float(family),
                float(ccavg),
                float(education),
                float(mortgage),
                float(securities_account),
                float(cd_account),
                float(online),
                float(credit_card)
            ]
            
            # predictions will be made based off of the user inputted data using the predict.single method from the classifier
            prediction = classifier.predict_single(input_features)
            print(f"Predicted class: {prediction}")
            print()
        except ValueError:
            print("Invalid input. Please enter numeric values where applicable.")
            
# This block checks if this script is the main program and runs the main function
if __name__ == "__main__":
    main()


Classification Report for Bank Loan Granting:
_______________________________________________

Class 1 .
Class 0 . 

Classification Report:

Class 0:
  Precision: 0.70
  Recall: 0.98
  F1-score: 0.82
  Accuracy: 0.70

Class 1:
  Precision: 0.75
  Recall: 0.11
  F1-score: 0.20
  Accuracy: 0.70

Enter the following details (or type 'exit' to quit):
