In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('loan_approval.csv')
df.drop(columns=['name','city'],inplace=True)

In [3]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB, BernoulliNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

In [4]:
X = df.drop(columns=['loan_approved','credit_score','points'],axis=1)
y = df['loan_approved'].astype(int)

In [5]:
scaler = StandardScaler()
X = pd.DataFrame(scaler.fit_transform(X),columns=scaler.get_feature_names_out(X.columns))

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)
print(X_train.shape)
print(y_train.shape)

(1600, 3)
(1600,)


In [7]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def evaluate_model(true, predicted):
    acc = accuracy_score(true, predicted)
    prec = precision_score(true, predicted)
    rec = recall_score(true, predicted)
    f1 = f1_score(true, predicted)
    return acc, prec, rec, f1

In [8]:
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Linear Discriminant Analysis': LinearDiscriminantAnalysis(),
    'Naive Bayes (Gaussian)': GaussianNB(),
    'Naive Bayes (Bernoulli)': BernoulliNB(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Extra Trees': ExtraTreesClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'Support Vector Machine': SVC(probability=True),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
    'LightGBM': LGBMClassifier(verbose=0),
    'CatBoost': CatBoostClassifier(verbose=0)
}

results = []

for name, model in models.items():
    model.fit(X_train, y_train)

    # predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    # evaluation
    train_acc, train_prec, train_rec, train_f1 = evaluate_model(y_train, y_train_pred)
    test_acc, test_prec, test_rec, test_f1 = evaluate_model(y_test, y_test_pred)

    results.append({
        "Model": name,
        "Train_Accuracy": train_acc,
        "Train_Precision": train_prec,
        "Train_Recall": train_rec,
        "Train_F1": train_f1,
        "Test_Accuracy": test_acc,
        "Test_Precision": test_prec,
        "Test_Recall": test_rec,
        "Test_F1": test_f1
    })

In [9]:
result_df = pd.DataFrame(results)

In [10]:
result_df

Unnamed: 0,Model,Train_Accuracy,Train_Precision,Train_Recall,Train_F1,Test_Accuracy,Test_Precision,Test_Recall,Test_F1
0,Logistic Regression,0.629375,0.589565,0.487069,0.533438,0.615,0.604317,0.459016,0.521739
1,Linear Discriminant Analysis,0.62625,0.584775,0.485632,0.530612,0.62,0.609929,0.469945,0.530864
2,Naive Bayes (Gaussian),0.63875,0.600683,0.505747,0.549142,0.64,0.630872,0.513661,0.566265
3,Naive Bayes (Bernoulli),0.631875,0.587849,0.514368,0.548659,0.605,0.588652,0.453552,0.512346
4,K-Nearest Neighbors,0.763125,0.750395,0.682471,0.714823,0.59,0.556213,0.513661,0.534091
5,Decision Tree,1.0,1.0,1.0,1.0,0.565,0.52356,0.546448,0.534759
6,Random Forest,1.0,1.0,1.0,1.0,0.585,0.552147,0.491803,0.520231
7,Extra Trees,1.0,1.0,1.0,1.0,0.5625,0.524096,0.47541,0.498567
8,Gradient Boosting,0.74,0.755474,0.594828,0.665595,0.6325,0.623288,0.497268,0.553191
9,Support Vector Machine,0.659375,0.700265,0.37931,0.492078,0.6275,0.663462,0.377049,0.480836
