In [None]:
# !pip install fancyimpute==0.7.0
!pip install imblearn==0.0
!pip install joblib==1.0.1
!pip install matplotlib==3.3.4
!pip install numpy==1.19.5
!pip install pandas==1.3.5
!pip install scikit_learn
!pip install scipy==1.6.2
!pip install seaborn==0.11.1
!pip install six==1.15.0
!pip install xgboost==1.5.1
!pip install fancyimpute 

# Loan Eligibility Prediction using Gradient Boosting Classifier

This data science in python project predicts if a loan should be given to an applicant or not. We predict if the customer is eligible for loan based on several factors like credit score and past history. 

## Dataset Description
In this dataset, you must explore and cleanse a dataset
consisting of over 1,00,000 loan records to determine the
best way to predict whether a loan applicant should be
granted a loan or not. You must then build a machine
learning model that returns the unique customer ID and a
loan status label that indicates whether the loan should be
given to that individual or not.

- Loan ID: A unique Identifier for the loan information.
- Customer ID: A unique identifier for the customer.
Customers may have more than one loan.
- Loan Status: A categorical variable indicating if the
loan was given to this customer
- Current Loan Amount: This is the loan amount
that was either completely paid off, or the amount that
was defaulted. This data is for previous loan
- Term: A categorical variable indicating if it is a short
term or long term loan.
- Credit Score: A value between 0 and 800 indicating
the riskiness of the borrower’s credit history.
- Years in current job: A categorical variable indicating
how many years the customer has been in their current job.
- Home Ownership: Categorical variable indicating home
ownership. Values are "Rent", "Home Mortgage", and
"Own". If the value is OWN, then the customer is a home
owner with no mortgage
- Annual Income: The customer's annual income
- Purpose: A description of the purpose of the loan.
- Monthly Debt: The customer's monthly
payment for their existing loansYears of Credit
- History: The years since the first entry in the
customer’s credit history.
- Months since last delinquent: Months since the last loan
delinquent payment
- Number of Open Accounts: The total number of open credit cards
- Number of Credit Problems: The number of credit
problems in the customer records.
- Current Credit Balance: The current total debt for the customer
- Maximum Open Credit: The maximum credit limit
for all credit sources.
- Bankruptcies: The number of bankruptcies
- Tax Liens: The number of tax liens.


## Evaluation criteria
To achieve a passing grade, the accuracy of the model
has to be at least 70 (percent).

## Predicting Output Of The Model

In [None]:
from fancyimpute import KNN, SoftImpute

In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
import statistics
from sklearn.model_selection import train_test_split,GridSearchCV,cross_val_score
from sklearn.preprocessing import LabelBinarizer,StandardScaler,OrdinalEncoder
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from scipy.stats import boxcox
from sklearn.linear_model import LogisticRegression,RidgeClassifier, PassiveAggressiveClassifier
from sklearn import metrics
from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import plot_importance
from matplotlib import pyplot
from sklearn.naive_bayes import BernoulliNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
import joblib

#%matplotlib inline

In [None]:
import operator

In [None]:
import six
import sys
sys.modules['sklearn.externals.six'] = six
import sklearn.neighbors._base
sys.modules['sklearn.neighbors.base'] = sklearn.neighbors._base
from sklearn.utils import _safe_indexing
sys.modules['sklearn.utils.safe_indexing'] = sklearn.utils._safe_indexing

In [None]:
from imblearn.over_sampling import SMOTE

In [None]:
test = pd.read_csv("https://s3.amazonaws.com/hackerday.datascience/358/test_data.csv")

In [None]:
cat_cols = ['Term','Years in current job','Home Ownership','Purpose']

for c in cat_cols:
    test[c] = pd.factorize(test[c])[0]


In [None]:
#Imputing missing data with soft impute
updated_test_data = pd.DataFrame(data=SoftImpute().fit_transform(test[test.columns[3:19]],), columns=test[test.columns[3:19]].columns, index=test.index)


[SoftImpute] Max Singular Value of X_init = 220732.319189
[SoftImpute] Iter 1: observed MAE=393.868714 rank=4
[SoftImpute] Iter 2: observed MAE=393.835770 rank=4
[SoftImpute] Iter 3: observed MAE=393.823069 rank=4
[SoftImpute] Iter 4: observed MAE=393.816892 rank=4
[SoftImpute] Iter 5: observed MAE=393.813120 rank=4
[SoftImpute] Iter 6: observed MAE=393.810394 rank=4
[SoftImpute] Iter 7: observed MAE=393.808205 rank=4
[SoftImpute] Iter 8: observed MAE=393.806333 rank=4
[SoftImpute] Iter 9: observed MAE=393.804668 rank=4
[SoftImpute] Iter 10: observed MAE=393.803152 rank=4
[SoftImpute] Iter 11: observed MAE=393.801748 rank=4
[SoftImpute] Iter 12: observed MAE=393.800437 rank=4
[SoftImpute] Iter 13: observed MAE=393.799203 rank=4
[SoftImpute] Iter 14: observed MAE=393.798037 rank=4
[SoftImpute] Iter 15: observed MAE=393.796931 rank=4
[SoftImpute] Iter 16: observed MAE=393.795881 rank=4
[SoftImpute] Iter 17: observed MAE=393.794882 rank=4
[SoftImpute] Iter 18: observed MAE=393.793931 rank

In [None]:
#Getting the dataset ready pd.get dummies function for dropping the dummy variables
 
test_data = pd.get_dummies(updated_test_data, drop_first=True)

In [None]:
gbm_pickle = joblib.load('/content/GBM_Model_version1.pkl')

In [None]:
y_pred = gbm_pickle.predict(test_data)




In [None]:
y_prob= gbm_pickle.predict_proba(test_data)




In [None]:
# at 0 index probability greater than 0.5 then predicted class 0: Loan Approved
# at 1 index probability greater than 0.5 then predicted class 1 Loan Rejected

y_prob

array([[0.61290255, 0.38709745],
       [0.38711263, 0.61288737],
       [0.59317013, 0.40682987],
       [0.61534105, 0.38465895],
       [0.37406606, 0.62593394],
       [0.85571182, 0.14428818],
       [0.61534105, 0.38465895],
       [0.43632241, 0.56367759],
       [0.61534105, 0.38465895]])

In [None]:
y_pred

array([0, 1, 0, 0, 1, 0, 0, 1, 0])


- 1: Loan Rejected
- 0: Loan Approved

In [None]:
y_pred_1 = np.where(y_pred == 0, 'Loan Approved', 'Loan Rejected')
y_pred_1

array(['Loan Approved', 'Loan Rejected', 'Loan Approved', 'Loan Approved',
       'Loan Rejected', 'Loan Approved', 'Loan Approved', 'Loan Rejected',
       'Loan Approved'], dtype='<U13')

In [None]:
y_pred_2 = ['class 0: Loan Approved Probability: '+ str(round(i[0],2)) \
            if i[0] >= 0.5 else 'class 1 Loan Rejected Probability: '+ str(round(i[0],2)) \
            for i in y_prob ]
y_pred_2

['class 0: Loan Approved Probability: 0.61',
 'class 1 Loan Rejected Probability: 0.39',
 'class 0: Loan Approved Probability: 0.59',
 'class 0: Loan Approved Probability: 0.62',
 'class 1 Loan Rejected Probability: 0.37',
 'class 0: Loan Approved Probability: 0.86',
 'class 0: Loan Approved Probability: 0.62',
 'class 1 Loan Rejected Probability: 0.44',
 'class 0: Loan Approved Probability: 0.62']

In [None]:
test_data

Unnamed: 0,Current Loan Amount,Term,Credit Score,Years in current job,Home Ownership,Annual Income,Purpose,Monthly Debt,Years of Credit History,Months since last delinquent,Number of Open Accounts,Number of Credit Problems,Current Credit Balance,Maximum Open Credit,Bankruptcies,Tax Liens
0,12232.0,0.0,7280.0,0.0,0.0,46643.0,0.0,777.39,18.0,10.0,12.0,0.0,6762.0,7946.0,0.0,0.0
1,25014.0,1.0,7330.0,1.0,1.0,81099.0,0.0,892.09,26.7,35.077343,14.0,0.0,35706.0,77961.0,0.0,0.0
2,16117.0,0.0,7240.0,2.0,1.0,60438.0,1.0,1244.02,16.7,32.0,11.0,1.0,11275.0,14815.0,1.0,0.0
3,11716.0,0.0,7400.0,3.0,0.0,34171.0,0.0,990.94,10.0,11.082999,21.0,0.0,7009.0,43533.0,0.0,0.0
4,9789.0,1.0,6860.0,1.0,1.0,47003.0,1.0,503.71,16.7,25.0,13.0,1.0,16913.0,19553.0,1.0,0.0
5,11911.0,0.0,7420.0,4.0,1.0,70475.0,2.0,886.81,17.7,30.167739,13.0,0.0,28212.0,59897.0,0.0,0.0
6,28988.0,0.0,7420.0,3.0,1.0,58074.0,0.0,871.11,22.8,21.292206,9.0,0.0,14423.0,54018.0,0.0,0.0
7,17705.0,1.0,6630.0,3.0,2.0,49180.0,0.0,274.59,30.2,16.935017,10.0,1.0,4252.0,25012.0,1.0,0.0
8,16812.0,0.0,7360.0,5.0,0.0,50945.0,0.0,590.12,14.6,21.31719,9.0,0.0,12903.0,15379.0,0.0,0.0


In [None]:
test

Unnamed: 0,Loan ID,Customer ID,Loan Status,Current Loan Amount,Term,Credit Score,Years in current job,Home Ownership,Annual Income,Purpose,Monthly Debt,Years of Credit History,Months since last delinquent,Number of Open Accounts,Number of Credit Problems,Current Credit Balance,Maximum Open Credit,Bankruptcies,Tax Liens
0,6cf51492-02a2-423e-b93d-676f05b9ad53,7c202b37-2add-44e8-9aea-d5b119aea935,Charged Off,12232,0,7280,0,0,46643,0,777.39,18.0,10.0,12,0,6762,7946,0,0
1,552e7ade-4292-4354-9ff9-c48031697d72,e7217b0a-07ac-47dd-b379-577b5a35b7c6,Charged Off,25014,1,7330,1,1,81099,0,892.09,26.7,,14,0,35706,77961,0,0
2,9b5e32b3-8d76-4801-afc8-d729d5a2e6b9,0a62fc41-16c8-40b5-92ff-9e4b763ce714,Charged Off,16117,0,7240,2,1,60438,1,1244.02,16.7,32.0,11,1,11275,14815,1,0
3,5419b7c7-ac11-4be2-a8a7-b131fb6d6dbe,30f36c59-5182-4482-8bbb-5b736849ae43,Charged Off,11716,0,7400,3,0,34171,0,990.94,10.0,,21,0,7009,43533,0,0
4,1450910f-9495-4fc9-afaf-9bdf4b9821df,70c26012-bba5-42c0-8dcb-75295ada31bb,Charged Off,9789,1,6860,1,1,47003,1,503.71,16.7,25.0,13,1,16913,19553,1,0
5,7e9f6759-6a13-48ef-adfa-ef9fbcfcfcc8,05301ee5-29ec-48b6-b3e5-0813ea25d684,Charged Off,11911,0,7420,4,1,70475,2,886.81,17.7,,13,0,28212,59897,0,0
6,c2b0aa3e-555b-441b-a4aa-e0e4a52c87d3,db3e74ec-2123-4608-be8d-a93e98cabfcf,Charged Off,28988,0,7420,3,1,58074,0,871.11,22.8,,9,0,14423,54018,0,0
7,233e0119-6dfe-47c0-a212-b4cc00371c8d,def66e46-951e-4bff-bf96-87ae6e092e19,Charged Off,17705,1,6630,3,2,49180,0,274.59,30.2,,10,1,4252,25012,1,0
8,fc9d109b-15c2-4fe3-b7d8-27610b89f80b,f54c53e4-acd7-4e29-8f55-d2a247a4b62a,Charged Off,16812,0,7360,5,0,50945,0,590.12,14.6,,9,0,12903,15379,0,0


In [None]:
y_pred_1

array(['Loan Approved', 'Loan Rejected', 'Loan Approved', 'Loan Approved',
       'Loan Rejected', 'Loan Approved', 'Loan Approved', 'Loan Rejected',
       'Loan Approved'], dtype='<U13')

In [None]:
test.columns

Index(['Loan ID', 'Customer ID', 'Loan Status', 'Current Loan Amount', 'Term',
       'Credit Score', 'Years in current job', 'Home Ownership',
       'Annual Income', 'Purpose', 'Monthly Debt', 'Years of Credit History',
       'Months since last delinquent', 'Number of Open Accounts',
       'Number of Credit Problems', 'Current Credit Balance',
       'Maximum Open Credit', 'Bankruptcies', 'Tax Liens', 'Loan Status Pred',
       'Loan Status with Prob'],
      dtype='object')

In [None]:
test['Loan Status Pred'] = y_pred_1
test['Loan Status with Prob'] = y_pred_2
test1 = test[['Loan ID', 'Customer ID', 'Loan Status', 'Loan Status Pred', 'Loan Status with Prob']]



In [None]:
test1

Unnamed: 0,Loan ID,Customer ID,Loan Status,Loan Status Pred,Loan Status with Prob
0,6cf51492-02a2-423e-b93d-676f05b9ad53,7c202b37-2add-44e8-9aea-d5b119aea935,Charged Off,Loan Approved,class 0: Loan Approved Probability: 0.61
1,552e7ade-4292-4354-9ff9-c48031697d72,e7217b0a-07ac-47dd-b379-577b5a35b7c6,Charged Off,Loan Rejected,class 1 Loan Rejected Probability: 0.39
2,9b5e32b3-8d76-4801-afc8-d729d5a2e6b9,0a62fc41-16c8-40b5-92ff-9e4b763ce714,Charged Off,Loan Approved,class 0: Loan Approved Probability: 0.59
3,5419b7c7-ac11-4be2-a8a7-b131fb6d6dbe,30f36c59-5182-4482-8bbb-5b736849ae43,Charged Off,Loan Approved,class 0: Loan Approved Probability: 0.62
4,1450910f-9495-4fc9-afaf-9bdf4b9821df,70c26012-bba5-42c0-8dcb-75295ada31bb,Charged Off,Loan Rejected,class 1 Loan Rejected Probability: 0.37
5,7e9f6759-6a13-48ef-adfa-ef9fbcfcfcc8,05301ee5-29ec-48b6-b3e5-0813ea25d684,Charged Off,Loan Approved,class 0: Loan Approved Probability: 0.86
6,c2b0aa3e-555b-441b-a4aa-e0e4a52c87d3,db3e74ec-2123-4608-be8d-a93e98cabfcf,Charged Off,Loan Approved,class 0: Loan Approved Probability: 0.62
7,233e0119-6dfe-47c0-a212-b4cc00371c8d,def66e46-951e-4bff-bf96-87ae6e092e19,Charged Off,Loan Rejected,class 1 Loan Rejected Probability: 0.44
8,fc9d109b-15c2-4fe3-b7d8-27610b89f80b,f54c53e4-acd7-4e29-8f55-d2a247a4b62a,Charged Off,Loan Approved,class 0: Loan Approved Probability: 0.62


In [None]:
test.to_csv('Output_Test.csv',index=False)

In [None]:
test1.to_csv('Output_Test1.csv',index=False)
