# Mortgage Loans: Predict on new data

In [100]:
import pandas as pd
import numpy as np
import pickle

## Read in the pickle files

In [101]:
# dataframes for visualization
filename = open('approved_loans.pkl', 'rb')
approved = pickle.load(filename)
filename.close()

In [102]:
# dataframes for visualization
filename = open('denied_loans.pkl', 'rb')
denied = pickle.load(filename)
filename.close()

In [103]:
# random forest model
filename = open('loan_approval_rf_model.pkl', 'rb')
rf = pickle.load(filename)
filename.close()

In [6]:
# encoder1
filename = open('loan_approval_onehot_encoder.pkl', 'rb')
encoder1 = pickle.load(filename)
filename.close()

In [7]:
# ss_scaler1: monthly_return
filename = open('loan_approval_ss_scaler1.pkl', 'rb')
ss_scaler1 = pickle.load(filename)
filename.close()

In [8]:
# ss_scaler2: ln_total_income
filename = open('loan_approval_total_income.pkl', 'rb')
ss_scaler2 = pickle.load(filename)
filename.close()

In [49]:
# ss_scaler3: loan_amount
filename = open('loan_approval_loan_amount.pkl', 'rb')
ss_scaler3 = pickle.load(filename)
filename.close()

In [50]:
# Remember: the order of arguments must match the order of features
features = ['Credit_History',
'LoanAmount',
'Loan_Amount_Term',
'ApplicantIncome',
'CoapplicantIncome',
 'Property_Area',
 'Gender',
 'Education',
  'Self_Employed'
 ]
features

['Credit_History',
 'LoanAmount',
 'Loan_Amount_Term',
 'ApplicantIncome',
 'CoapplicantIncome',
 'Property_Area',
 'Gender',
 'Education',
 'Self_Employed']

## Write a function to preprocess and predict

In [77]:
## Create a function that can take any 8 valid inputs & make a prediction

def make_predictions(listofargs):
    try:
        # the order of the arguments must match the order of the features
        df = pd.DataFrame(columns=features) 
        df.loc[0] = listofargs
        
        # convert arguments from integers to floats:
        for var in ['Credit_History', 'LoanAmount', 'Loan_Amount_Term', 'ApplicantIncome', 'CoapplicantIncome']:
            df[var]=int(df[var])

        # recode a few columns using the same steps we employed on the training data
        df['Gender'].replace({'Male': 1, 'Female': 0}, inplace = True)
        df['Education'].replace({'Graduate': 1, 'Not Graduate': 0}, inplace = True)
        df['Self_Employed'].replace({'Yes': 1, 'No': 0}, inplace = True)
        df['LoanAmount'] = df['LoanAmount']*1000

        # transform the categorical variable using the same encoder we trained previously
        ohe=pd.DataFrame(encoder1.transform(df[['Property_Area']]).toarray())
        col_list = ['Property_Area_{}'.format(item) for item in ['Semiurban', 'Urban', 'Rural']]
        ohe.columns=col_list
        df = pd.concat([df, ohe],axis=1)

        # create new features using the scalers we trained earlier
        ln_monthly_return_raw  = np.log(df['LoanAmount']/df['Loan_Amount_Term']).values
        ln_total_income_raw = np.log(int(df['ApplicantIncome']) + int(df['CoapplicantIncome']))
        ln_LoanAmount_raw = np.log(1000*df['LoanAmount'])
        df['ln_monthly_return'] = ss_scaler1.transform(np.array(ln_monthly_return_raw).reshape(-1, 1))
        df['ln_total_income'] = ss_scaler2.transform(np.array(ln_total_income_raw).reshape(-1, 1)) 
        df['ln_LoanAmount'] = ss_scaler3.transform(np.array(ln_LoanAmount_raw).reshape(-1, 1)) 

        # drop & rearrange the columns in the order expected by your trained model!
        df=df[['Gender', 'Education', 'Self_Employed', 'Credit_History',
           'Property_Area_Semiurban', 'Property_Area_Urban', 'ln_monthly_return',
           'ln_total_income', 'ln_LoanAmount']]

        pred = rf.predict(df)
        prob = rf.predict_proba(df)
        return pred, prob

    except:
        return 'Please provide 8 valid inputs', 'Please provide 8 valid inputs'

## Predicting on new data

In [85]:
features

['Credit_History',
 'LoanAmount',
 'Loan_Amount_Term',
 'ApplicantIncome',
 'CoapplicantIncome',
 'Property_Area',
 'Gender',
 'Education',
 'Self_Employed']

In [78]:
# make some fake data
fake1=[1, 130.0, 360.0, 4500.0, 1500.0, 'Rural', 'Female', 'Graduate', 'Yes']
fake2=[0, 60, 180, 3000, 5000, 'Urban', 'Male', 'Graduate', 'Yes']
fake3=[1, 2000, 360, 6500, 3000, 'Semiurban', 'Male', 'Not Graduate', 'No']

In [79]:
# example 1
make_predictions(fake1)

(array([0]), array([[0.76, 0.24]]))

In [80]:
# example 2
make_predictions(fake2)

(array([0]), array([[0.98, 0.02]]))

In [94]:
# example 3
make_predictions(fake3)[1][0][0]

0.72

In [97]:
# change the threshold
Threshold=77
for data in [fake1, fake2, fake3]:
        rawprob=100*make_predictions(data)[1][0][0]
        func = lambda rawprob: 'Denied' if rawprob>Threshold else 'Approved'
        formatted_y = func(rawprob)
        print(rawprob)
        print(formatted_y)

76.0
Approved
98.0
Denied
72.0
Approved


## Visualize the new data

In [98]:
newdata=pd.DataFrame([fake1], columns=cols)
newdata['Combined_Income']=newdata['ApplicantIncome'] + newdata['CoapplicantIncome']
newdata

Unnamed: 0,Credit_History,LoanAmount,Loan_Amount_Term,ApplicantIncome,CoapplicantIncome,Property_Area,Gender,Education,Self_Employed,Combined_Income
0,1,130.0,360.0,4500.0,1500.0,Rural,Female,Graduate,Yes,6000.0


In [106]:
# plotly visualization of Loan Status
import plotly.graph_objects as go

trace0=go.Scatter3d(
    x=approved['LoanAmount'],
    y=approved['Combined_Income'],
    z=approved['Loan_Amount_Term'],
    name='approved',
    mode='markers',
    text = list(zip(
            ["Education: {}".format(x) for x in approved['Education']],
            ["Property Area: {}".format(x) for x in approved['Property_Area']],
            ["Gender: {}".format(x) for x in approved['Gender']]
            )) ,
    hoverinfo='text',
    marker=dict(size=3, color='blue', opacity=0.4))

trace1=go.Scatter3d(
    x=denied['LoanAmount'],
    y=denied['Combined_Income'],
    z=denied['Loan_Amount_Term'],
    name='denied',
    mode='markers',
    text = list(zip(
            ["Education: {}".format(x) for x in denied['Education']],
            ["Property Area: {}".format(x) for x in denied['Property_Area']],
            ["Loan Status: {}".format(x) for x in denied['Loan_Status']]
            )) ,
    hoverinfo='text',
    marker=dict(size=3, color='red', opacity=0.4))

trace2=go.Scatter3d(
    x=newdata['LoanAmount'],
    y=newdata['Combined_Income'],
    z=newdata['Loan_Amount_Term'],
    name='new data',
    mode='markers',
    text = list(zip(
            ["Education: {}".format(x) for x in newdata['Education']],
            ["Property Area: {}".format(x) for x in newdata['Property_Area']]
            )) ,
    hoverinfo='text',
    marker=dict(size=15, color='yellow'))


layout = go.Layout(title="Loan Status",
                    showlegend=True,
                        scene = dict(
                        xaxis=dict(title='Loan Amount'),
                        yaxis=dict(title='Combined Income'),
                        zaxis=dict(title='Term')
                ))
fig=go.Figure([trace0, trace1, trace2], layout)
fig