# Important Library

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree

# Loading Data set

In [5]:
data = pd.read_excel("Bank_personal_loan_Modelling.xlsx",sheet_name=1)

In [6]:
data.head(2)

Unnamed: 0,ID,Age,Experience,Income,ZIP Code,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard
0,1,25,1,49,91107,4,1.6,1,0,0,1,0,0,0
1,2,45,19,34,90089,3,1.5,1,0,0,1,0,0,0


In [7]:
data.shape

(5000, 14)

In [8]:
data.isna().sum()

ID                    0
Age                   0
Experience            0
Income                0
ZIP Code              0
Family                0
CCAvg                 0
Education             0
Mortgage              0
Personal Loan         0
Securities Account    0
CD Account            0
Online                0
CreditCard            0
dtype: int64

In [9]:
data.dtypes

ID                      int64
Age                     int64
Experience              int64
Income                  int64
ZIP Code                int64
Family                  int64
CCAvg                 float64
Education               int64
Mortgage                int64
Personal Loan           int64
Securities Account      int64
CD Account              int64
Online                  int64
CreditCard              int64
dtype: object

# Splitting Data into Train and Test

In [13]:
data1 = data.drop('ZIP Code',axis=1)

In [14]:
X = data1.drop('Personal Loan',axis=1)
y = data1['Personal Loan']

In [16]:
X_train , X_test , y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=0)

In [17]:
print(X_train.shape , X_test.shape , y_train.shape , y_test.shape)

(3750, 12) (1250, 12) (3750,) (1250,)


In [19]:
X_train = X_train.drop('ID',axis=1)

# Random Forest Classifier

In [24]:
rf_model = RandomForestClassifier(n_estimators=1000,max_features=2,oob_score=True)

In [25]:
X_train.columns

Index(['Age', 'Experience', 'Income', 'Family', 'CCAvg', 'Education',
       'Mortgage', 'Securities Account', 'CD Account', 'Online', 'CreditCard'],
      dtype='object')

In [26]:
X_features = X_train[['Age','Experience', 'Income', 'Family', 'CCAvg', 
        'Education','Mortgage', 'Securities Account', 'CD Account',
         'Online', 'CreditCard']]

In [27]:
rf_model.fit(X=X_features,y=y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features=2,
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=1000,
                       n_jobs=None, oob_score=True, random_state=None,
                       verbose=0, warm_start=False)

In [28]:
print(rf_model.oob_score_)

0.9864


In [29]:
for feature , imp in zip(X_features ,rf_model.feature_importances_):
    print(feature , imp)

Age 0.04708572106593238
Experience 0.04707217130179107
Income 0.3459302955581849
Family 0.0910139094226927
CCAvg 0.18771405599640684
Education 0.15672454646598002
Mortgage 0.04417289114644953
Securities Account 0.006200332340307796
CD Account 0.054787967809147874
Online 0.008997610298225411
CreditCard 0.010300498594881602


In [30]:
features = X_train[['Income','CCAvg','Education']]

In [31]:
tree_model = tree.DecisionTreeClassifier()

In [32]:
tree_model.fit(X=features,y=y_train)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

In [34]:
with open("DecisionTree.dot",'w') as f:
    f = tree.export_graphviz(tree_model, feature_names=['Income','CCAvg','Education'],out_file=f)

In [36]:
print(tree_model.score(X=features , y=y_train))

0.9984


In [37]:
features1 = X_train[['Age','Experience','Income','Family','CCAvg',
                     'Mortgage','CD Account','Education']]

In [38]:
tree_model.fit(X=features1,y=y_train)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

In [40]:
with open("DecisionTree1.dot",'w') as f:
    f = tree.export_graphviz(tree_model, feature_names=['Age','Experience','Income','Family','CCAvg',
                     'Mortgage','CD Account','Education'],out_file=f)

In [41]:
print(tree_model.score(X=features1 , y=y_train))

1.0


# Prediction

In [42]:
test_features = X_test[['Age','Experience','Income','Family','CCAvg',
                     'Mortgage','CD Account','Education']]

In [43]:
test_predict = tree_model.predict(X=test_features)

In [44]:
predicted_output = pd.DataFrame({"ID":X_test['ID'],"Personal Loan":test_predict}) 

In [45]:
predicted_output.to_csv("Output.csv",index=False)