In [None]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree

dataset = pd.read_csv('data/bank-customers-for-analysis.csv')
dataset.sample()

In [None]:
dataset['job'].replace(['admin.', 'entrepreneur', 'management', 'services', 'technician'], 0, inplace=True)
dataset['job'].replace(['blue-collar', 'housemaid', 'self-employed'], 1, inplace=True)
dataset['job'].replace(['retired', 'student', 'unemployed', 'unknown'], 2, inplace=True)
dataset['education'].replace({'primary': 0, 'secondary': 1, 'tertiary': 2, 'unknown': 3}, inplace=True)
dataset['marital'].replace({'single': 0, 'married': 1, 'divorced': 2}, inplace=True)
dataset['default'].replace({'no': 0, 'yes': 1}, inplace=True)
dataset['housing'].replace({'no': 0, 'yes': 1}, inplace=True)
dataset['loan'].replace({'no': 0, 'yes': 1}, inplace=True)
dataset['income_bracket'].replace({'<40K': 0, '40K-60K': 1, '60K-100K': 2, '100K+': 3}, inplace=True)

dataset.drop(columns=['contact', 'day', 'month', 'campaign', 'pdays', 'previous', 'poutcome', 'deposit', 'credit_history'], axis=1, inplace=True)

dataset.sample()

In [None]:
X, y = dataset.drop('cc_with_points', axis=1), dataset['cc_with_points']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=60)

In [None]:
dtree = DecisionTreeClassifier()
dtree.fit(X_train, y_train)

predictions = dtree.predict(X_test)

print(classification_report(y_test, predictions))

In [None]:
plt.figure(figsize=(14, 8))
corr = dataset.corr(method='kendall')

sns.heatmap(data=corr, xticklabels=corr.columns.values, yticklabels=corr.columns.values, cmap='YlGnBu', annot=True)
plt.show()

In [None]:
customer1 = {
    'age': [20],
    'job': [2],
    'marital': [0],
    'education': [3],
    'default': [0],
    'balance': [500],
    'housing': [0],
    'loan': [0],
    'duration': [12],
    'credit_score': [400],
    'psychographic': [2],
    'income_bracket': [0]
}

customer2 = {
    'age': [42],
    'job': [0],
    'marital': [1],
    'education': [3],
    'default': [0],
    'balance': [20000],
    'housing': [0],
    'loan': [1],
    'duration': [500],
    'credit_score': [600],
    'psychographic': [1],
    'income_bracket': [3]
}

customer1_df = pd.DataFrame(customer1)
customer2_df = pd.DataFrame(customer2)

test1 = dtree.predict(customer1_df)
test2 = dtree.predict(customer2_df)

print(f'CLV of more than $1000:\nThomas: {test1}\nHarleen: {test2}')

In [None]:
dataset.head()

In [None]:
plt.figure(figsize=(50, 20))
fn = ['age', 'job', 'marital', 'education', 'default', 'balance', 'housing', 'loan', 'duration', 'credit_score', 'psychographic', 'income_bracket']
cn = ['False', 'True']

plot_tree(dtree, filled=True, feature_names=fn, class_names=cn)