# ***Coustomer Behaviour Prediction***

***Loding Required Libraries***

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import graphviz
from ucimlrepo import fetch_ucirepo
from matplotlib import pyplot as plt
import seaborn as sns

***Loding the data from UCI Repository***

In [5]:
bank_marketing = fetch_ucirepo(id=222)   
print(bank_marketing.variables) 

           name     role         type      demographic  \
0           age  Feature      Integer              Age   
1           job  Feature  Categorical       Occupation   
2       marital  Feature  Categorical   Marital Status   
3     education  Feature  Categorical  Education Level   
4       default  Feature       Binary             None   
5       balance  Feature      Integer             None   
6       housing  Feature       Binary             None   
7          loan  Feature       Binary             None   
8       contact  Feature  Categorical             None   
9   day_of_week  Feature         Date             None   
10        month  Feature         Date             None   
11     duration  Feature      Integer             None   
12     campaign  Feature      Integer             None   
13        pdays  Feature      Integer             None   
14     previous  Feature      Integer             None   
15     poutcome  Feature  Categorical             None   
16            

***Data processing***

In [6]:
bank_data = pd.DataFrame(bank_marketing.data.features)
bank_data.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day_of_week,month,duration,campaign,pdays,previous,poutcome
0,58,management,married,tertiary,no,2143,yes,no,,5,may,261,1,-1,0,
1,44,technician,single,secondary,no,29,yes,no,,5,may,151,1,-1,0,
2,33,entrepreneur,married,secondary,no,2,yes,yes,,5,may,76,1,-1,0,
3,47,blue-collar,married,,no,1506,yes,no,,5,may,92,1,-1,0,
4,33,,single,,no,1,no,no,,5,may,198,1,-1,0,


In [7]:
bank_data['Target']=pd.DataFrame(bank_marketing.data.targets)

In [8]:
bank_data.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day_of_week,month,duration,campaign,pdays,previous,poutcome,Target
0,58,management,married,tertiary,no,2143,yes,no,,5,may,261,1,-1,0,,no
1,44,technician,single,secondary,no,29,yes,no,,5,may,151,1,-1,0,,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,,5,may,76,1,-1,0,,no
3,47,blue-collar,married,,no,1506,yes,no,,5,may,92,1,-1,0,,no
4,33,,single,,no,1,no,no,,5,may,198,1,-1,0,,no


In [9]:
bank_data.describe()

Unnamed: 0,age,balance,day_of_week,duration,campaign,pdays,previous
count,45211.0,45211.0,45211.0,45211.0,45211.0,45211.0,45211.0
mean,40.93621,1362.272058,15.806419,258.16308,2.763841,40.197828,0.580323
std,10.618762,3044.765829,8.322476,257.527812,3.098021,100.128746,2.303441
min,18.0,-8019.0,1.0,0.0,1.0,-1.0,0.0
25%,33.0,72.0,8.0,103.0,1.0,-1.0,0.0
50%,39.0,448.0,16.0,180.0,2.0,-1.0,0.0
75%,48.0,1428.0,21.0,319.0,3.0,-1.0,0.0
max,95.0,102127.0,31.0,4918.0,63.0,871.0,275.0


In [10]:
bank_data.isna().sum()

age                0
job              288
marital            0
education       1857
default            0
balance            0
housing            0
loan               0
contact        13020
day_of_week        0
month              0
duration           0
campaign           0
pdays              0
previous           0
poutcome       36959
Target             0
dtype: int64

In [11]:
bank_data=bank_data.dropna()
bank_data=bank_data.reset_index()
bank_data.head()

Unnamed: 0,index,age,job,marital,education,default,balance,housing,loan,contact,day_of_week,month,duration,campaign,pdays,previous,poutcome,Target
0,24060,33,admin.,married,tertiary,no,882,no,no,telephone,21,oct,39,1,151,3,failure,no
1,24062,42,admin.,single,secondary,no,-247,yes,yes,telephone,21,oct,519,1,166,1,other,yes
2,24064,33,services,married,secondary,no,3444,yes,no,telephone,21,oct,144,1,91,4,failure,yes
3,24072,36,management,married,tertiary,no,2415,yes,no,telephone,22,oct,73,1,86,4,other,no
4,24077,36,management,married,tertiary,no,0,yes,no,telephone,23,oct,140,1,143,3,failure,yes


In [12]:
bank_data.isnull().sum()

index          0
age            0
job            0
marital        0
education      0
default        0
balance        0
housing        0
loan           0
contact        0
day_of_week    0
month          0
duration       0
campaign       0
pdays          0
previous       0
poutcome       0
Target         0
dtype: int64

In [13]:
bank_data = pd.get_dummies(bank_data, drop_first=True)
bank_data.columns

Index(['index', 'age', 'balance', 'day_of_week', 'duration', 'campaign',
       'pdays', 'previous', 'job_blue-collar', 'job_entrepreneur',
       'job_housemaid', 'job_management', 'job_retired', 'job_self-employed',
       'job_services', 'job_student', 'job_technician', 'job_unemployed',
       'marital_married', 'marital_single', 'education_secondary',
       'education_tertiary', 'default_yes', 'housing_yes', 'loan_yes',
       'contact_telephone', 'month_aug', 'month_dec', 'month_feb', 'month_jan',
       'month_jul', 'month_jun', 'month_mar', 'month_may', 'month_nov',
       'month_oct', 'month_sep', 'poutcome_other', 'poutcome_success',
       'Target_yes'],
      dtype='object')

In [14]:
bank_data.head()

Unnamed: 0,index,age,balance,day_of_week,duration,campaign,pdays,previous,job_blue-collar,job_entrepreneur,...,month_jul,month_jun,month_mar,month_may,month_nov,month_oct,month_sep,poutcome_other,poutcome_success,Target_yes
0,24060,33,882,21,39,1,151,3,False,False,...,False,False,False,False,False,True,False,False,False,False
1,24062,42,-247,21,519,1,166,1,False,False,...,False,False,False,False,False,True,False,True,False,True
2,24064,33,3444,21,144,1,91,4,False,False,...,False,False,False,False,False,True,False,False,False,True
3,24072,36,2415,22,73,1,86,4,False,False,...,False,False,False,False,False,True,False,True,False,False
4,24077,36,0,23,140,1,143,3,False,False,...,False,False,False,False,False,True,False,False,False,True


***Data Preprapation***

In [15]:
X = bank_data.drop('Target_yes', axis=1)  # Features (excluding the 'y_yes' column)X
y = bank_data['Target_yes']  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [16]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(6273, 39)
(6273,)
(1569, 39)
(1569,)


***Model Building and Training***

In [17]:
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

***Model Evaluation***

In [18]:
y_pred = clf.predict(X_test)

In [19]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.82


***Metrics Evaluation***

In [21]:
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)

In [22]:
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

Precision: 0.60
Recall: 0.63
F1 Score: 0.61


***Confusion Matrix***

In [23]:
print("Confusion Matrix:\n", confusion)

Confusion Matrix:
 [[1057  150]
 [ 135  227]]


In [24]:
classification_rep = classification_report(y_test, y_pred)
print("Classification Report:\n", classification_rep)

Classification Report:
               precision    recall  f1-score   support

       False       0.89      0.88      0.88      1207
        True       0.60      0.63      0.61       362

    accuracy                           0.82      1569
   macro avg       0.74      0.75      0.75      1569
weighted avg       0.82      0.82      0.82      1569



***Visualizing the DecisionTree Workflow***

In [None]:
from sklearn import tree
import matplotlib.pyplot as plt
plt.figure(figsize=(12, 6))
tree.plot_tree(clf)
plt.title('Decision Tree Visualization')
plt.savefig('Decision_tree.jpg')
plt.show()