# Loan Prediction

In [1]:
import pandas as pd
import numpy as np

In [129]:
df = pd.read_csv("loan_data_set.csv")

In [130]:
df.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [131]:
df.isna().sum()

Loan_ID               0
Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64

In [132]:
df['Gender'].fillna(np.random.choice(['Male', 'Female']), inplace=True)

In [133]:
df['Married'].fillna(np.random.choice(['Yes', 'No']), inplace=True)

In [134]:
df['Dependents'].fillna(np.random.choice(['0', '1', '2', '3+']), inplace=True)

In [135]:
df['Self_Employed'].fillna(np.random.choice(['Yes', 'No']), inplace=True)

In [136]:
df['LoanAmount'].fillna(df['LoanAmount'].mean(), inplace=True)

In [137]:
df['Loan_Amount_Term'].fillna(df['Loan_Amount_Term'].mean(), inplace=True)

In [138]:
df['Credit_History'].fillna(np.random.choice(['0', '1']), inplace=True)

In [139]:
df.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,146.412162,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [140]:
df['Dependents'].dtype

dtype('O')

In [141]:
df['Dependents'] = df['Dependents'].apply(lambda x: x if x!='3+' else 3)

In [142]:
df['Dependents'].unique()

array(['0', '1', '2', 3], dtype=object)

In [143]:
df['Property_Area'].unique()

array(['Urban', 'Rural', 'Semiurban'], dtype=object)

In [144]:
df = pd.get_dummies(data=df, columns=['Property_Area'])

In [145]:
df = pd.get_dummies(data=df, columns=['Gender', 'Married', 'Education', 'Self_Employed'], drop_first=True)

In [146]:
df.head()

Unnamed: 0,Loan_ID,Dependents,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Loan_Status,Property_Area_Rural,Property_Area_Semiurban,Property_Area_Urban,Gender_Male,Married_Yes,Education_Not Graduate,Self_Employed_Yes
0,LP001002,0,5849,0.0,146.412162,360.0,1.0,Y,0,0,1,1,0,0,0
1,LP001003,1,4583,1508.0,128.0,360.0,1.0,N,1,0,0,1,1,0,0
2,LP001005,0,3000,0.0,66.0,360.0,1.0,Y,0,0,1,1,1,0,1
3,LP001006,0,2583,2358.0,120.0,360.0,1.0,Y,0,0,1,1,1,1,0
4,LP001008,0,6000,0.0,141.0,360.0,1.0,Y,0,0,1,1,0,0,0


In [148]:
X = df[['ApplicantIncome', 'CoapplicantIncome','LoanAmount','Loan_Amount_Term','Credit_History','Property_Area_Rural','Property_Area_Semiurban','Property_Area_Urban', 'Gender_Male', 'Married_Yes','Education_Not Graduate','Self_Employed_Yes']]
y = df['Loan_Status']

In [149]:
from sklearn.model_selection import train_test_split

In [150]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [151]:
from sklearn.ensemble import RandomForestClassifier

In [152]:
model = RandomForestClassifier()

In [153]:
model.fit(X_train, y_train)

In [154]:
predictions = model.predict(X_test)

In [155]:
from sklearn.metrics import classification_report, confusion_matrix

In [156]:
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           N       0.62      0.44      0.51        64
           Y       0.74      0.86      0.80       121

    accuracy                           0.71       185
   macro avg       0.68      0.65      0.66       185
weighted avg       0.70      0.71      0.70       185



In [157]:
print(confusion_matrix(y_test, predictions))

[[ 28  36]
 [ 17 104]]


In [160]:
test_person = {
    'ApplicantIncome': 50, 
    'CoapplicantIncome': 100,
    'LoanAmount': 10000,
    'Loan_Amount_Term': 300,
    'Credit_History': 0,
    'Property_Area_Rural': 0,
    'Property_Area_Semiurban':1,
    'Property_Area_Urban': 0,
    'Gender_Male': 1, 
    'Married_Yes': 1,
    'Education_Not Graduate': 1,
    'Self_Employed_Yes': 0
}

test_df = pd.DataFrame([test_person])
test_df

Unnamed: 0,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area_Rural,Property_Area_Semiurban,Property_Area_Urban,Gender_Male,Married_Yes,Education_Not Graduate,Self_Employed_Yes
0,50,100,10000,300,0,0,1,0,1,1,1,0


In [162]:
model.predict(test_df)[0]

'N'