# Importing the packages

In [1]:
import pandas as pd
import numpy as np
from sklearn import tree
from sklearn import preprocessing

# Loading the Dataset

In [2]:
bank_loan = pd.read_excel("Bank_Personal_Loan_Modelling.xlsx",sheet_name=1)

In [3]:
bank_loan.head()

Unnamed: 0,ID,Age,Experience,Income,ZIP Code,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard
0,1,25,1,49,91107,4,1.6,1,0,0,1,0,0,0
1,2,45,19,34,90089,3,1.5,1,0,0,1,0,0,0
2,3,39,15,11,94720,1,1.0,1,0,0,0,0,0,0
3,4,35,9,100,94112,1,2.7,2,0,0,0,0,0,0
4,5,35,8,45,91330,4,1.0,2,0,0,0,0,0,1


# Checking for Null Values

In [4]:
bank_loan.isna().sum()

ID                    0
Age                   0
Experience            0
Income                0
ZIP Code              0
Family                0
CCAvg                 0
Education             0
Mortgage              0
Personal Loan         0
Securities Account    0
CD Account            0
Online                0
CreditCard            0
dtype: int64

# Random Forest

In [5]:
from sklearn.ensemble import RandomForestClassifier

In [6]:
bank_loan.columns

Index(['ID', 'Age', 'Experience', 'Income', 'ZIP Code', 'Family', 'CCAvg',
       'Education', 'Mortgage', 'Personal Loan', 'Securities Account',
       'CD Account', 'Online', 'CreditCard'],
      dtype='object')

In [7]:
features = ['ID', 'Age', 'Experience', 'Income', 'Family', 'CCAvg',
       'Education', 'Mortgage', 'Securities Account',
       'CD Account', 'Online', 'CreditCard']

In [8]:
rf_model = RandomForestClassifier(n_estimators = 1000, max_features = 2, oob_score=True)

In [9]:
rf_model.fit(X=bank_loan[features],y=bank_loan['Personal Loan'])

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features=2, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=1000,
                       n_jobs=None, oob_score=True, random_state=None,
                       verbose=0, warm_start=False)

In [10]:
print("OOB ACCURACY", rf_model.oob_score_)

OOB ACCURACY 0.9868


In [11]:
for feature,imp in zip(features,rf_model.feature_importances_):
    print(feature,imp)

ID 0.04695366136892308
Age 0.04184222920684076
Experience 0.04044662822213664
Income 0.32566476896249924
Family 0.09279487920011134
CCAvg 0.17815561701536303
Education 0.1531120549562629
Mortgage 0.04283266370754661
Securities Account 0.005413489711297393
CD Account 0.05510523805961532
Online 0.008175404588232528
CreditCard 0.009503365001171115


# INFERENCE : This shows that the important features are Income,CCAvg and Education

# Decision Tree

In [18]:
tree_model = tree.DecisionTreeClassifier(max_depth=6)

In [19]:
predictors = pd.DataFrame([bank_loan["Income"],bank_loan["CCAvg"],bank_loan["Education"]]).T

In [20]:
tree_model.fit(X = predictors, y= bank_loan["Personal Loan"])

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=6,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best')

In [21]:
tree_model.score(X = predictors, y= bank_loan["Personal Loan"])

0.9738

In [22]:
with open("Dtree3.dot","w") as f:
    f=tree.export_graphviz(tree_model,feature_names=["Income","CCAvg","Education"],out_file=f)