## **Make predictions on Test Data**

## Import Libraries

In [1]:
import pandas as pd
import numpy as np
import sklearn.linear_model as logistic_regression
from sklearn.model_selection import cross_val_score, GridSearchCV, KFold,train_test_split
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, mean_squared_error
from sklearn.preprocessing import  MinMaxScaler,OneHotEncoder,LabelEncoder,RobustScaler,StandardScaler
import pickle
from sklearn.impute import SimpleImputer

In [5]:
from functions import *

## **Load Data**

In [3]:
import os
os.chdir(r'C:\Users\Dell\Loan-Eligibility-Prediction-Project')

input_data = pd.read_csv('data/loan-test.csv')

## **Data Preparation**

In [None]:
df = prepare_data(input_data)

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area
0,Male,Yes,0,Graduate,No,5720,0,110.0,360.0,1.0,Urban
1,Male,Yes,1,Graduate,No,3076,1500,126.0,360.0,1.0,Urban
2,Male,Yes,2,Graduate,No,5000,1800,208.0,360.0,1.0,Urban


In [7]:
df_imputed = data_imput(df)

df_imputed.head()

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area
0,Male,Yes,0,Graduate,No,5720.0,0.0,110.0,360.0,1.0,Urban
1,Male,Yes,1,Graduate,No,3076.0,1500.0,126.0,360.0,1.0,Urban
2,Male,Yes,2,Graduate,No,5000.0,1800.0,208.0,360.0,1.0,Urban
3,Male,Yes,2,Graduate,No,2340.0,2546.0,100.0,360.0,1.0,Urban
4,Male,No,0,Not Graduate,No,3276.0,0.0,78.0,360.0,1.0,Urban


In [8]:
df_impu_dummy = var_dummy(df_imputed)
df_impu_dummy.head()

Unnamed: 0,Gender,Married,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Dependents_1,Dependents_2,Dependents_3+,Property_Area_Semiurban,Property_Area_Urban
0,Male,Yes,Graduate,No,5720.0,0.0,110.0,360.0,1.0,0,0,0,0,1
1,Male,Yes,Graduate,No,3076.0,1500.0,126.0,360.0,1.0,1,0,0,0,1
2,Male,Yes,Graduate,No,5000.0,1800.0,208.0,360.0,1.0,0,1,0,0,1
3,Male,Yes,Graduate,No,2340.0,2546.0,100.0,360.0,1.0,0,1,0,0,1
4,Male,No,Not Graduate,No,3276.0,0.0,78.0,360.0,1.0,0,0,0,0,1


In [10]:

colmns = ['Gender','Married','Education','Self_Employed','Credit_History']

df_impu_dummy_encod = label_encod_test(df_impu_dummy,colmns)
df_impu_dummy_encod.head()

Unnamed: 0,Gender,Married,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Dependents_1,Dependents_2,Dependents_3+,Property_Area_Semiurban,Property_Area_Urban
0,1,1,0,0,5720.0,0.0,110.0,360.0,1,0,0,0,0,1
1,1,1,0,0,3076.0,1500.0,126.0,360.0,1,1,0,0,0,1
2,1,1,0,0,5000.0,1800.0,208.0,360.0,1,0,1,0,0,1
3,1,1,0,0,2340.0,2546.0,100.0,360.0,1,0,1,0,0,1
4,1,0,1,0,3276.0,0.0,78.0,360.0,1,0,0,0,0,1


In [11]:

colmns = ['Gender','Married','Education','Self_Employed','Credit_History']

df_impu_dummy_encod = label_encod(df_impu_dummy,colmns)
df_impu_dummy_encod.head()

Unnamed: 0,Gender,Married,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Dependents_1,Dependents_2,Dependents_3+,Property_Area_Semiurban,Property_Area_Urban
0,1,1,0,0,5720.0,0.0,110.0,360.0,1,0,0,0,0,1
1,1,1,0,0,3076.0,1500.0,126.0,360.0,1,1,0,0,0,1
2,1,1,0,0,5000.0,1800.0,208.0,360.0,1,0,1,0,0,1
3,1,1,0,0,2340.0,2546.0,100.0,360.0,1,0,1,0,0,1
4,1,0,1,0,3276.0,0.0,78.0,360.0,1,0,0,0,0,1


In [31]:
colms = ['ApplicantIncome','CoapplicantIncome','LoanAmount','Loan_Amount_Term']

df_impu_dummy_encod_scaled = data_scaler(df_impu_dummy_encod,colms)

In [32]:
df_impu_dummy_encod_scaled.head()

Unnamed: 0,Gender,Married,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Dependents_1,Dependents_2,Dependents_3+,Property_Area_Semiurban,Property_Area_Urban
0,1,1,0,0,0.078865,0.0,0.157088,0.746835,1,0,0,0,0,1
1,1,1,0,0,0.042411,0.0625,0.187739,0.746835,1,1,0,0,0,1
2,1,1,0,0,0.068938,0.075,0.344828,0.746835,1,0,1,0,0,1
3,1,1,0,0,0.032263,0.106083,0.137931,0.746835,1,0,1,0,0,1
4,1,0,1,0,0.045168,0.0,0.095785,0.746835,1,0,0,0,0,1


### Load Model

In [33]:
# Load our saved model
with open('model/best_model.pkl','rb') as f:
    loaded_model = pickle.load(f)

In [43]:

df_predicted = df_impu_dummy_encod_scaler.copy()

## **Make Predictions**

In [45]:
# Get predictions
prediction = loaded_model.predict(df_impu_dummy_encod_scaler)

df_predicted['Predictions'] = pd.DataFrame(prediction)
df_predicted.head()

Unnamed: 0,Gender,Married,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Dependents_1,Dependents_2,Dependents_3+,Property_Area_Semiurban,Property_Area_Urban,Predictions
0,1,1,0,0,0.078865,0.0,0.157088,0.746835,1,0,0,0,0,1,1
1,1,1,0,0,0.042411,0.0625,0.187739,0.746835,1,1,0,0,0,1,1
2,1,1,0,0,0.068938,0.075,0.344828,0.746835,1,0,1,0,0,1,1
3,1,1,0,0,0.032263,0.106083,0.137931,0.746835,1,0,1,0,0,1,1
4,1,0,1,0,0.045168,0.0,0.095785,0.746835,1,0,0,0,0,1,1


In [46]:
# prediction = loaded_model.predict(df_impu_dummy_encod_scaled)

probability = loaded_model.predict_proba(df_impu_dummy_encod_scaler)
df_predicted[['Probability_0','Probability_1']] = pd.DataFrame(probability)

df_predicted.head()


Unnamed: 0,Gender,Married,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Dependents_1,Dependents_2,Dependents_3+,Property_Area_Semiurban,Property_Area_Urban,Predictions,Probability_0,Probability_1
0,1,1,0,0,0.078865,0.0,0.157088,0.746835,1,0,0,0,0,1,1,0.230162,0.769838
1,1,1,0,0,0.042411,0.0625,0.187739,0.746835,1,1,0,0,0,1,1,0.230162,0.769838
2,1,1,0,0,0.068938,0.075,0.344828,0.746835,1,0,1,0,0,1,1,0.230162,0.769838
3,1,1,0,0,0.032263,0.106083,0.137931,0.746835,1,0,1,0,0,1,1,0.230162,0.769838
4,1,0,1,0,0.045168,0.0,0.095785,0.746835,1,0,0,0,0,1,1,0.230162,0.769838


In [48]:
#Save the model
df_predicted.to_csv('model/loan_predictions.csv')
