# Loan Application 

In [274]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [276]:
df = pd.read_csv('loan_applications.csv')

In [278]:
df.head()

Unnamed: 0,application_id,customer_id,application_date,loan_type,loan_amount_requested,loan_tenure_months,interest_rate_offered,purpose_of_loan,employment_status,monthly_income,...,existing_emis_monthly,debt_to_income_ratio,property_ownership_status,residential_address,applicant_age,gender,number_of_dependents,loan_status,fraud_flag,fraud_type
0,c8bf0bea-70e6-4870-9125-41b8210c527f,CUST109427,2023-04-09,Business Loan,604000.0,12,11.66,Medical Emergency,Retired,34700.0,...,1100.0,3.17,Rented,"94/31, Sehgal Zila, Vadodara-380521, Anantapur...",28,Female,3,Approved,0,
1,91224cec-3544-4bc7-ac15-a9792da54c02,CUST106146,2023-09-23,Car Loan,100000.0,240,13.62,Education,Unemployed,51600.0,...,0.0,0.0,Owned,"H.No. 00, Sheth Chowk, Ichalkaranji 006728, Im...",44,Other,3,Approved,0,
2,4efcd02d-4a03-4ab7-9bd1-0ff430493d0c,CUST100674,2023-05-22,Education Loan,431000.0,60,11.4,Medical Emergency,Self-Employed,14800.0,...,4600.0,31.08,Rented,"H.No. 81, Dutta Path, Kozhikode-340301, Tadepa...",56,Other,4,Approved,0,
3,a61337d4-ba04-4a68-b492-2cb8266e6ed7,CUST106466,2024-07-09,Car Loan,324000.0,120,10.36,Debt Consolidation,Self-Employed,28800.0,...,4000.0,13.89,Rented,"H.No. 022, Rege Road, Tiruvottiyur-927857, Aur...",27,Other,4,Declined,0,
4,a8d1639e-170b-41b2-826a-55c7dae38d16,CUST112319,2023-11-20,Personal Loan,100000.0,36,14.14,Business Expansion,Salaried,43900.0,...,1100.0,2.51,Rented,"85/24, Bali Zila, Sambalpur 922071, Tumkur, Ke...",50,Other,0,Declined,0,


In [280]:
df.shape

(50000, 21)

In [282]:
df.columns

Index(['application_id', 'customer_id', 'application_date', 'loan_type',
       'loan_amount_requested', 'loan_tenure_months', 'interest_rate_offered',
       'purpose_of_loan', 'employment_status', 'monthly_income', 'cibil_score',
       'existing_emis_monthly', 'debt_to_income_ratio',
       'property_ownership_status', 'residential_address', 'applicant_age',
       'gender', 'number_of_dependents', 'loan_status', 'fraud_flag',
       'fraud_type'],
      dtype='object')

In [290]:
df['interest_amount'] = (
    df['loan_amount_requested'] *
    df['interest_rate_offered'] *
    (df['loan_tenure_months'] / 12)
) / 100

# Calculate total loan repayment (principal + interest)
df['total_payable_amount'] = df['loan_amount_requested'] + df['interest_amount']

In [292]:
df.head()

Unnamed: 0,application_id,customer_id,application_date,loan_type,loan_amount_requested,loan_tenure_months,interest_rate_offered,purpose_of_loan,employment_status,monthly_income,...,property_ownership_status,residential_address,applicant_age,gender,number_of_dependents,loan_status,fraud_flag,fraud_type,interest_amount,total_payable_amount
0,c8bf0bea-70e6-4870-9125-41b8210c527f,CUST109427,2023-04-09,Business Loan,604000.0,12,11.66,Medical Emergency,Retired,34700.0,...,Rented,"94/31, Sehgal Zila, Vadodara-380521, Anantapur...",28,Female,3,Approved,0,,70426.4,674426.4
1,91224cec-3544-4bc7-ac15-a9792da54c02,CUST106146,2023-09-23,Car Loan,100000.0,240,13.62,Education,Unemployed,51600.0,...,Owned,"H.No. 00, Sheth Chowk, Ichalkaranji 006728, Im...",44,Other,3,Approved,0,,272400.0,372400.0
2,4efcd02d-4a03-4ab7-9bd1-0ff430493d0c,CUST100674,2023-05-22,Education Loan,431000.0,60,11.4,Medical Emergency,Self-Employed,14800.0,...,Rented,"H.No. 81, Dutta Path, Kozhikode-340301, Tadepa...",56,Other,4,Approved,0,,245670.0,676670.0
3,a61337d4-ba04-4a68-b492-2cb8266e6ed7,CUST106466,2024-07-09,Car Loan,324000.0,120,10.36,Debt Consolidation,Self-Employed,28800.0,...,Rented,"H.No. 022, Rege Road, Tiruvottiyur-927857, Aur...",27,Other,4,Declined,0,,335664.0,659664.0
4,a8d1639e-170b-41b2-826a-55c7dae38d16,CUST112319,2023-11-20,Personal Loan,100000.0,36,14.14,Business Expansion,Salaried,43900.0,...,Rented,"85/24, Bali Zila, Sambalpur 922071, Tumkur, Ke...",50,Other,0,Declined,0,,42420.0,142420.0


In [294]:
df = df.drop(columns = ['application_id', 'customer_id', 'application_date','cibil_score','existing_emis_monthly', 'debt_to_income_ratio',
                        'property_ownership_status', 'residential_address','number_of_dependents','fraud_flag','interest_amount',
       'fraud_type'])

In [296]:
df.head()

Unnamed: 0,loan_type,loan_amount_requested,loan_tenure_months,interest_rate_offered,purpose_of_loan,employment_status,monthly_income,applicant_age,gender,loan_status,total_payable_amount
0,Business Loan,604000.0,12,11.66,Medical Emergency,Retired,34700.0,28,Female,Approved,674426.4
1,Car Loan,100000.0,240,13.62,Education,Unemployed,51600.0,44,Other,Approved,372400.0
2,Education Loan,431000.0,60,11.4,Medical Emergency,Self-Employed,14800.0,56,Other,Approved,676670.0
3,Car Loan,324000.0,120,10.36,Debt Consolidation,Self-Employed,28800.0,27,Other,Declined,659664.0
4,Personal Loan,100000.0,36,14.14,Business Expansion,Salaried,43900.0,50,Other,Declined,142420.0


In [298]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   loan_type              50000 non-null  object 
 1   loan_amount_requested  50000 non-null  float64
 2   loan_tenure_months     50000 non-null  int64  
 3   interest_rate_offered  50000 non-null  float64
 4   purpose_of_loan        50000 non-null  object 
 5   employment_status      50000 non-null  object 
 6   monthly_income         50000 non-null  float64
 7   applicant_age          50000 non-null  int64  
 8   gender                 50000 non-null  object 
 9   loan_status            50000 non-null  object 
 10  total_payable_amount   50000 non-null  float64
dtypes: float64(4), int64(2), object(5)
memory usage: 4.2+ MB


In [300]:
df.isnull().sum()

loan_type                0
loan_amount_requested    0
loan_tenure_months       0
interest_rate_offered    0
purpose_of_loan          0
employment_status        0
monthly_income           0
applicant_age            0
gender                   0
loan_status              0
total_payable_amount     0
dtype: int64

In [302]:
df['loan_type'].unique()

array(['Business Loan', 'Car Loan', 'Education Loan', 'Personal Loan',
       'Home Loan'], dtype=object)

In [304]:
df['purpose_of_loan'].unique()

array(['Medical Emergency', 'Education', 'Debt Consolidation',
       'Business Expansion', 'Wedding', 'Vehicle Purchase',
       'Home Renovation'], dtype=object)

In [306]:
df['employment_status'].unique()

array(['Retired', 'Unemployed', 'Self-Employed', 'Salaried',
       'Business Owner', 'Student'], dtype=object)

In [308]:
df['gender'].unique()

array(['Female', 'Other', 'Male'], dtype=object)

In [312]:
df['loan_status'].unique()

array(['Approved', 'Declined', 'Fraudulent - Detected',
       'Fraudulent - Undetected'], dtype=object)

In [314]:
df['loan_type'] = df['loan_type'].map({'Business Loan':0,'Car Loan':1,'Education Loan':2,'Personal Loan':3,'Home Loan':4})

In [316]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   loan_type              50000 non-null  int64  
 1   loan_amount_requested  50000 non-null  float64
 2   loan_tenure_months     50000 non-null  int64  
 3   interest_rate_offered  50000 non-null  float64
 4   purpose_of_loan        50000 non-null  object 
 5   employment_status      50000 non-null  object 
 6   monthly_income         50000 non-null  float64
 7   applicant_age          50000 non-null  int64  
 8   gender                 50000 non-null  object 
 9   loan_status            50000 non-null  object 
 10  total_payable_amount   50000 non-null  float64
dtypes: float64(4), int64(3), object(4)
memory usage: 4.2+ MB


In [318]:
df.head()

Unnamed: 0,loan_type,loan_amount_requested,loan_tenure_months,interest_rate_offered,purpose_of_loan,employment_status,monthly_income,applicant_age,gender,loan_status,total_payable_amount
0,0,604000.0,12,11.66,Medical Emergency,Retired,34700.0,28,Female,Approved,674426.4
1,1,100000.0,240,13.62,Education,Unemployed,51600.0,44,Other,Approved,372400.0
2,2,431000.0,60,11.4,Medical Emergency,Self-Employed,14800.0,56,Other,Approved,676670.0
3,1,324000.0,120,10.36,Debt Consolidation,Self-Employed,28800.0,27,Other,Declined,659664.0
4,3,100000.0,36,14.14,Business Expansion,Salaried,43900.0,50,Other,Declined,142420.0


In [320]:
df['purpose_of_loan'] = df['purpose_of_loan'].map({'Medical Emergency':0, 'Education':1, 'Debt Consolidation':2,
       'Business Expansion':3, 'Wedding':4, 'Vehicle Purchase':5,
       'Home Renovation' :6})

In [322]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   loan_type              50000 non-null  int64  
 1   loan_amount_requested  50000 non-null  float64
 2   loan_tenure_months     50000 non-null  int64  
 3   interest_rate_offered  50000 non-null  float64
 4   purpose_of_loan        50000 non-null  int64  
 5   employment_status      50000 non-null  object 
 6   monthly_income         50000 non-null  float64
 7   applicant_age          50000 non-null  int64  
 8   gender                 50000 non-null  object 
 9   loan_status            50000 non-null  object 
 10  total_payable_amount   50000 non-null  float64
dtypes: float64(4), int64(4), object(3)
memory usage: 4.2+ MB


In [324]:
df.head()

Unnamed: 0,loan_type,loan_amount_requested,loan_tenure_months,interest_rate_offered,purpose_of_loan,employment_status,monthly_income,applicant_age,gender,loan_status,total_payable_amount
0,0,604000.0,12,11.66,0,Retired,34700.0,28,Female,Approved,674426.4
1,1,100000.0,240,13.62,1,Unemployed,51600.0,44,Other,Approved,372400.0
2,2,431000.0,60,11.4,0,Self-Employed,14800.0,56,Other,Approved,676670.0
3,1,324000.0,120,10.36,2,Self-Employed,28800.0,27,Other,Declined,659664.0
4,3,100000.0,36,14.14,3,Salaried,43900.0,50,Other,Declined,142420.0


In [326]:
df['employment_status'] = df['employment_status'].map({'Retired':0,'Unemployed':1, 'Self-Employed':2, 'Salaried':3,
       'Business Owner':4, 'Student':5})

In [328]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   loan_type              50000 non-null  int64  
 1   loan_amount_requested  50000 non-null  float64
 2   loan_tenure_months     50000 non-null  int64  
 3   interest_rate_offered  50000 non-null  float64
 4   purpose_of_loan        50000 non-null  int64  
 5   employment_status      50000 non-null  int64  
 6   monthly_income         50000 non-null  float64
 7   applicant_age          50000 non-null  int64  
 8   gender                 50000 non-null  object 
 9   loan_status            50000 non-null  object 
 10  total_payable_amount   50000 non-null  float64
dtypes: float64(4), int64(5), object(2)
memory usage: 4.2+ MB


In [330]:
df.head()

Unnamed: 0,loan_type,loan_amount_requested,loan_tenure_months,interest_rate_offered,purpose_of_loan,employment_status,monthly_income,applicant_age,gender,loan_status,total_payable_amount
0,0,604000.0,12,11.66,0,0,34700.0,28,Female,Approved,674426.4
1,1,100000.0,240,13.62,1,1,51600.0,44,Other,Approved,372400.0
2,2,431000.0,60,11.4,0,2,14800.0,56,Other,Approved,676670.0
3,1,324000.0,120,10.36,2,2,28800.0,27,Other,Declined,659664.0
4,3,100000.0,36,14.14,3,3,43900.0,50,Other,Declined,142420.0


In [332]:
df['gender'] = df['gender'] .map({ 'Male': 0,'Female' :1,'Other':2 })

In [334]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   loan_type              50000 non-null  int64  
 1   loan_amount_requested  50000 non-null  float64
 2   loan_tenure_months     50000 non-null  int64  
 3   interest_rate_offered  50000 non-null  float64
 4   purpose_of_loan        50000 non-null  int64  
 5   employment_status      50000 non-null  int64  
 6   monthly_income         50000 non-null  float64
 7   applicant_age          50000 non-null  int64  
 8   gender                 50000 non-null  int64  
 9   loan_status            50000 non-null  object 
 10  total_payable_amount   50000 non-null  float64
dtypes: float64(4), int64(6), object(1)
memory usage: 4.2+ MB


In [336]:
df.head()

Unnamed: 0,loan_type,loan_amount_requested,loan_tenure_months,interest_rate_offered,purpose_of_loan,employment_status,monthly_income,applicant_age,gender,loan_status,total_payable_amount
0,0,604000.0,12,11.66,0,0,34700.0,28,1,Approved,674426.4
1,1,100000.0,240,13.62,1,1,51600.0,44,2,Approved,372400.0
2,2,431000.0,60,11.4,0,2,14800.0,56,2,Approved,676670.0
3,1,324000.0,120,10.36,2,2,28800.0,27,2,Declined,659664.0
4,3,100000.0,36,14.14,3,3,43900.0,50,2,Declined,142420.0


In [338]:
df['loan_status'] = df['loan_status'].map({'Declined': 0 ,'Approved':1, 'Fraudulent - Detected':2, 'Fraudulent - Undetected':3})

In [340]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   loan_type              50000 non-null  int64  
 1   loan_amount_requested  50000 non-null  float64
 2   loan_tenure_months     50000 non-null  int64  
 3   interest_rate_offered  50000 non-null  float64
 4   purpose_of_loan        50000 non-null  int64  
 5   employment_status      50000 non-null  int64  
 6   monthly_income         50000 non-null  float64
 7   applicant_age          50000 non-null  int64  
 8   gender                 50000 non-null  int64  
 9   loan_status            50000 non-null  int64  
 10  total_payable_amount   50000 non-null  float64
dtypes: float64(4), int64(7)
memory usage: 4.2 MB


In [342]:
df.head()

Unnamed: 0,loan_type,loan_amount_requested,loan_tenure_months,interest_rate_offered,purpose_of_loan,employment_status,monthly_income,applicant_age,gender,loan_status,total_payable_amount
0,0,604000.0,12,11.66,0,0,34700.0,28,1,1,674426.4
1,1,100000.0,240,13.62,1,1,51600.0,44,2,1,372400.0
2,2,431000.0,60,11.4,0,2,14800.0,56,2,1,676670.0
3,1,324000.0,120,10.36,2,2,28800.0,27,2,0,659664.0
4,3,100000.0,36,14.14,3,3,43900.0,50,2,0,142420.0


In [344]:
from sklearn.model_selection import train_test_split

In [346]:
X = df.drop(['total_payable_amount'], axis =1)
y = df['total_payable_amount']

In [350]:
X.shape

(50000, 10)

In [352]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2, random_state = 42)

In [354]:
from sklearn.linear_model import LinearRegression

In [356]:
lr = LinearRegression()
lr.fit(X_train,y_train)

In [360]:
lr.score(X_test,y_test)

0.8628627816621157

In [362]:
y_pred = lr.predict(X_test)

In [364]:
y_pred

array([1107074.15416792,  827204.2297124 , -392809.73643881, ...,
       1587779.00480304, 1179963.77834315,  850858.11022824])

In [366]:
from sklearn.model_selection import cross_val_score

In [368]:
print(lr.score(X_train,y_train))
print(lr.score(X_test,y_test))
print(cross_val_score(lr,X,y,cv=5).mean())

0.8610329664275936
0.8628627816621157
0.861351241780703


In [370]:
from xgboost import XGBRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score

In [372]:
xgmodel = XGBRegressor()
xgmodel.fit(X_train, y_train)

In [374]:
# Predict on training and test data
ypredtrain1 = xgmodel.predict(X_train)
ypredtest1 = xgmodel.predict(X_test)

In [376]:
print("Train R²:", r2_score(y_train, ypredtrain1))
print("Test R²:", r2_score(y_test, ypredtest1))

Train R²: 0.9996977458903517
Test R²: 0.999155377503744


In [378]:
cv_score = cross_val_score(xgmodel, X, y, cv=5, scoring='r2').mean()
print("Cross-validation R² score:", cv_score)

Cross-validation R² score: 0.9991389054546289


In [392]:
import pickle
pickle.dump(xgmodel,open("loan application approved.pkl","wb"))

In [394]:
import pickle
pickle.dump(lr,open("loan application approved1.pkl","wb"))

In [398]:
new_data = pd.DataFrame({
    'loan_type': [1],
    'loan_amount_requested': [100000],
    'loan_tenure_months': [240],
    'interest_rate_offered':[13.62],
    'purpose_of_loan': [1],
    'employment_status': [0],
    'monthly_income':[51600],
    'applicant_age': [44],
    'gender':[1],
    'loan_status':[1]
})
xgmodel.predict(new_data)

array([369675.84], dtype=float32)