## 1. Loading Data and Packages

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import warnings
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.metrics import accuracy_score,mean_squared_error,r2_score,mean_absolute_error
from math import sqrt
warnings.filterwarnings('ignore')


In [2]:
from google import colab
colab.drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
# Load train and Test set
import os 
path = '/content/gdrive/My Drive/colab/credit_score_zalda'


In [86]:
data = pd.read_csv(path+'/loan_data.csv')

In [54]:
r = 0.13
interestRate=r/data['Number_of_installments(months)']
data['loan_limit']            = abs(np.pv(interestRate/data['Number_of_installments(months)'],

                                data['Number_of_installments(months)']*1,

                                data['monthly_installments'],

                                when='end').round(0)
)

In [55]:
data.head()

Unnamed: 0,gender,Age,Years_in_job,Income,Province,Savings,Home_ownership,Credit_history,Number_of_accs,Credit_cards,Home_loan,Overdraft,Student_Loan,Non_perfoming_Accs,Open Accounts,Current_In_Arrears,Current_balance_Amt,Past_due_Amt,No_of_enquiries,Score,Def,monthly_installments,Number_of_installments(months),loan_limit
0,Male,48,23,123970,Nairobi,264110,Own Home,18,79,2,0,1,0,4,2,0,12400,0.0,23,728,0,37191.0,5,183089.0
1,Female,37,14,101907,Nairobi,101922,Rent,6,22,0,0,1,0,7,2,2,15300,4590.0,23,428,1,30572.1,8,242356.0
2,Female,39,14,97331,Nairobi,112457,Mortgage,11,36,0,1,1,0,11,3,0,14600,0.0,10,630,0,29199.3,3,85127.0
3,Female,38,11,106189,Nairobi,108758,Rent,10,32,0,0,0,0,10,2,2,21200,6360.0,27,504,0,31856.7,1,28192.0
4,Male,25,2,55705,Central,3793,Rent,1,1,0,0,0,1,0,1,1,16700,11690.0,2,592,0,16711.5,5,82270.0


In [56]:
objList=data.select_dtypes(include='object')

In [57]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

for feat in objList:
    data[feat] = le.fit_transform(data[feat].astype(str))

print (data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 24 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   gender                          10000 non-null  int64  
 1   Age                             10000 non-null  int64  
 2   Years_in_job                    10000 non-null  int64  
 3   Income                          10000 non-null  int64  
 4   Province                        10000 non-null  int64  
 5   Savings                         10000 non-null  int64  
 6   Home_ownership                  10000 non-null  int64  
 7   Credit_history                  10000 non-null  int64  
 8   Number_of_accs                  10000 non-null  int64  
 9   Credit_cards                    10000 non-null  int64  
 10  Home_loan                       10000 non-null  int64  
 11  Overdraft                       10000 non-null  int64  
 12  Student_Loan                    1

In [63]:
X=data.drop(['Score','loan_limit'],1)
Y=data[['Score','loan_limit']]


In [64]:
X.shape, Y.shape

((10000, 22), (10000, 2))

## 6. Modeling and Predictions

In [65]:
# !pip install catboost

In [67]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state=300)

In [75]:
etree=ExtraTreesRegressor(random_state=2)
etree.fit(x_train,y_train)
y_pred=etree.predict(x_test)

In [76]:
MAE=mean_absolute_error(y_test,y_pred),
RMSE=np.sqrt(mean_squared_error(y_test,y_pred)),
R2_SCORE=r2_score(y_test, y_pred)
print(MAE,RMSE,R2_SCORE)

(83.1688300000001,) (145.51899492660797,) 0.8258674255659617


In [77]:
pred=np.int_(etree.predict(data.drop(['Score','loan_limit'],axis=1)))

In [78]:
pred=pd.DataFrame(pred, columns=['Score_pred','loan_limit_pred'])
pred['Score']=data['Score']
pred['loan_limit']=data['loan_limit']
pred.head(20)

Unnamed: 0,Score_pred,loan_limit_pred,Score,loan_limit
0,728,183089,728,183089.0
1,428,242356,428,242356.0
2,630,85127,630,85127.0
3,504,28192,504,28192.0
4,509,82207,592,82270.0
5,656,77935,656,77935.0
6,677,305969,750,305974.0
7,661,154586,661,154586.0
8,658,121053,658,121053.0
9,479,21666,391,21670.0


In [79]:
pred.shape

(10000, 4)

In [80]:
# Import pickle Package

import pickle

In [83]:
# Save the Modle to file in the current working directory
Pkl_Filename = "Pickle_RL_Model.pkl"  

with open(Pkl_Filename, 'wb') as file:  
    pickle.dump(etree, file)

In [84]:
# Load the Model back from file
with open(Pkl_Filename, 'rb') as file:  
    Pickled_ET_Model = pickle.load(file)

Pickled_ET_Model

ExtraTreesRegressor(bootstrap=False, ccp_alpha=0.0, criterion='mse',
                    max_depth=None, max_features='auto', max_leaf_nodes=None,
                    max_samples=None, min_impurity_decrease=0.0,
                    min_impurity_split=None, min_samples_leaf=1,
                    min_samples_split=2, min_weight_fraction_leaf=0.0,
                    n_estimators=100, n_jobs=None, oob_score=False,
                    random_state=2, verbose=0, warm_start=False)

In [85]:
x_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8000 entries, 2857 to 1233
Data columns (total 22 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   gender                          8000 non-null   int64  
 1   Age                             8000 non-null   int64  
 2   Years_in_job                    8000 non-null   int64  
 3   Income                          8000 non-null   int64  
 4   Province                        8000 non-null   int64  
 5   Savings                         8000 non-null   int64  
 6   Home_ownership                  8000 non-null   int64  
 7   Credit_history                  8000 non-null   int64  
 8   Number_of_accs                  8000 non-null   int64  
 9   Credit_cards                    8000 non-null   int64  
 10  Home_loan                       8000 non-null   int64  
 11  Overdraft                       8000 non-null   int64  
 12  Student_Loan                   