## Imports

In [9]:
# Standard
import json
import random
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import os

# Encoding
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder

# Scale
from sklearn.preprocessing import StandardScaler

# Models
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from imblearn.over_sampling import RandomOverSampler

# Scoring
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from joblib import load


In [3]:
file_name = 'df'
df = pd.read_pickle(f'.\\processed_data\\{file_name}.pki')

In [4]:
df.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,1,0,0,0,0,5849,0.0,128.0,360.0,1.0,2,1
1,LP001003,1,1,1,0,0,4583,1508.0,128.0,360.0,1.0,0,0
2,LP001005,1,1,0,0,1,3000,0.0,66.0,360.0,1.0,2,1
3,LP001006,1,1,0,1,0,2583,2358.0,120.0,360.0,1.0,2,1
4,LP001008,1,0,0,0,0,6000,0.0,141.0,360.0,1.0,2,1


In [8]:
file_name = 'gb_model'
model = load(f'.\\ml_models\\{file_name}.joblib')

In [16]:
acc = 0.0
f1 = 0.0
iters = 10

for it in range(iters):
    r = int.from_bytes(os.urandom(8), byteorder='little') % 4294967295
    train, test = train_test_split(df, test_size=.25, random_state=r)

    test['TotalIncome']=test['ApplicantIncome']+test['CoapplicantIncome']
    test['EMI']=test['LoanAmount']/test['Loan_Amount_Term']
    test['BalanceIncome']=test['TotalIncome']-test['EMI']

    test.drop(['ApplicantIncome','CoapplicantIncome','Loan_Amount_Term','LoanAmount'],axis=1,inplace=True)

    X_test, y_test = test.drop(columns=['Loan_ID', 'Loan_Status']), test['Loan_Status']

    y_pred = model.predict(X_test)
    
    curr_f1 = f1_score(y_pred = y_pred, y_true=y_test)
    curr_acc = accuracy_score(y_pred = y_pred, y_true=y_test)

    f1 += curr_f1
    acc += curr_acc

    print(f'It: {it}')
    print(f'Acc: {curr_acc}')
    print(f'F1: {curr_f1}')



print(f'Accuracy of model: {file_name} is: {acc / iters}')
print(f'f1 score of model: {file_name} is: {f1 / iters}')


It: 0
Acc: 0.7987012987012987
F1: 0.8622222222222222
It: 1
Acc: 0.7987012987012987
F1: 0.8691983122362869
It: 2
Acc: 0.8506493506493507
F1: 0.9053497942386832
It: 3
Acc: 0.8246753246753247
F1: 0.8888888888888888
It: 4
Acc: 0.7857142857142857
F1: 0.8583690987124464
It: 5
Acc: 0.8246753246753247
F1: 0.889795918367347
It: 6
Acc: 0.8376623376623377
F1: 0.8953974895397491
It: 7
Acc: 0.8051948051948052
F1: 0.8728813559322034
It: 8
Acc: 0.8376623376623377
F1: 0.8908296943231442
It: 9
Acc: 0.8636363636363636
F1: 0.9169960474308301
Accuracy of model: gb_model is: 0.8227272727272729
f1 score of model: gb_model is: 0.8849928821891802
