In [1]:
# Importing the libraries 

import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 

# Ignore harmless warnings 

import warnings 
warnings.filterwarnings("ignore")

import pandasql as psql

# pip install pandasql

# import datetime class from datetime module

from datetime import datetime

In [2]:
# Load the Universal bank data

bankdata = pd.read_csv(r"D:\00 Datasets\Bank\Universalbank.csv", header=0) 
bankdata.head()

Unnamed: 0,ID,Age,Experience,Income,ZIP Code,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard
0,1,25,1,49,91107,4,1.6,1,0,0,1,0,0,0
1,2,45,19,34,90089,3,1.5,1,0,0,1,0,0,0
2,3,39,15,11,94720,1,1.0,1,0,0,0,0,0,0
3,4,35,9,100,94112,1,2.7,2,0,0,0,0,0,0
4,5,35,8,45,91330,4,1.0,2,0,0,0,0,0,1


In [3]:
bankdata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 14 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   ID                  5000 non-null   int64  
 1   Age                 5000 non-null   int64  
 2   Experience          5000 non-null   int64  
 3   Income              5000 non-null   int64  
 4   ZIP Code            5000 non-null   int64  
 5   Family              5000 non-null   int64  
 6   CCAvg               5000 non-null   float64
 7   Education           5000 non-null   int64  
 8   Mortgage            5000 non-null   int64  
 9   Personal Loan       5000 non-null   int64  
 10  Securities Account  5000 non-null   int64  
 11  CD Account          5000 non-null   int64  
 12  Online              5000 non-null   int64  
 13  CreditCard          5000 non-null   int64  
dtypes: float64(1), int64(13)
memory usage: 547.0 KB


In [4]:
# Count the target or dependent variable by '0' & '1' and their proportion 
# (> 10 : 1, then the dataset is imbalance data)

CreditCard_count = bankdata.CreditCard.value_counts()
print('Class 0:', CreditCard_count[0])
print('Class 1:', CreditCard_count[1])
print('Proportion:', round(CreditCard_count[0] / CreditCard_count[1], 2), ': 1')
print('Total Bank records:', len(bankdata))

Class 0: 3530
Class 1: 1470
Proportion: 2.4 : 1
Total Bank records: 5000


In [5]:
# Delete the columns which are not influencing the target variable

del bankdata['ID']
del bankdata['ZIP Code']

In [6]:
bankdata.columns

Index(['Age', 'Experience', 'Income', 'Family', 'CCAvg', 'Education',
       'Mortgage', 'Personal Loan', 'Securities Account', 'CD Account',
       'Online', 'CreditCard'],
      dtype='object')

In [7]:
# cols1 is variables - crating a dummy variables

cols1 = ['Family', 'Education']
print(cols1)

['Family', 'Education']


In [8]:
# cols2 variables - MinMaxScalar function

cols2 = ['Age', 'Experience', 'Income', 'CCAvg', 'Mortgage']
print(cols2)

['Age', 'Experience', 'Income', 'CCAvg', 'Mortgage']


In [9]:
# Create dummy variable for all range values

bankdata = pd.get_dummies(bankdata, columns=cols1)
bankdata.head().T

Unnamed: 0,0,1,2,3,4
Age,25.0,45.0,39.0,35.0,35.0
Experience,1.0,19.0,15.0,9.0,8.0
Income,49.0,34.0,11.0,100.0,45.0
CCAvg,1.6,1.5,1.0,2.7,1.0
Mortgage,0.0,0.0,0.0,0.0,0.0
Personal Loan,0.0,0.0,0.0,0.0,0.0
Securities Account,1.0,1.0,0.0,0.0,0.0
CD Account,0.0,0.0,0.0,0.0,0.0
Online,0.0,0.0,0.0,0.0,0.0
CreditCard,0.0,0.0,0.0,0.0,1.0


In [10]:
# Identify the dependent and Target variables

IndepVar = []
for col in bankdata.columns:
    if col != 'CreditCard':
        IndepVar.append(col)

TargetVar = 'CreditCard'

x = bankdata[IndepVar]
y = bankdata[TargetVar]

In [11]:
# Splitting the dataset into train and test 

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.30, random_state = 42)
x_test_F1 = x_test.copy()

In [59]:
# Feature Scaling - Each independent variable is in different range. The process of transforming all the 
# features in the given data set to a fixed range is known as ‘Scaling’

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

# Conver x_train values

x_train[cols2] = sc.fit_transform(x_train[cols2])


# Conver x_test values

x_test[cols2] = sc.fit_transform(x_test[cols2])

# Convert to dataframes

x_train = pd.DataFrame(x_train)
x_test = pd.DataFrame(x_test)

In [12]:
# Scaling the features by using MinMaxScaler

from sklearn.preprocessing import MinMaxScaler

mmscaler = MinMaxScaler(feature_range=(0, 1))

x_train[cols2] = mmscaler.fit_transform(x_train[cols2])
x_train = pd.DataFrame(x_train)

x_test[cols2] = mmscaler.fit_transform(x_test[cols2])
x_test = pd.DataFrame(x_test)

In [13]:
x_test.head()

Unnamed: 0,Age,Experience,Income,CCAvg,Mortgage,Personal Loan,Securities Account,CD Account,Online,Family_1,Family_2,Family_3,Family_4,Education_1,Education_2,Education_3
1501,0.159091,0.152174,0.137755,0.03,0.0,0,1,0,0,0,1,0,0,0,1,0
2586,0.545455,0.565217,0.719388,0.61,0.0,1,0,0,0,0,0,0,1,1,0,0
2653,0.159091,0.173913,0.576531,0.31,0.64252,0,0,0,1,0,1,0,0,1,0,0
1055,0.181818,0.195652,0.27551,0.1,0.0,0,1,0,1,1,0,0,0,1,0,0
705,0.886364,0.847826,0.112245,0.07,0.0,0,0,0,1,0,0,1,0,0,1,0


In [14]:
# Initialize an array that stores the Accuracy

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import sklearn.metrics as metrics
from sklearn.metrics import roc_curve, roc_auc_score

from sklearn.metrics import classification_report, confusion_matrix

accuracy = []

for a in range(1, 10, 1):
    k = a
    bankdataKNN = KNeighborsClassifier(n_neighbors=k)
    bankdataKNN.fit(x_train, y_train)
    y_pred = bankdataKNN.predict(x_test)
    print('KNN_K_value = ', a)
    print("Accuracy:", (round(metrics.accuracy_score(y_test, y_pred) * 100, 2)), "%")
    print("Precision:", (round(metrics.precision_score(y_test, y_pred, average='micro') * 100, 2)), '%')
    print("Recall:", (round(metrics.recall_score(y_test, y_pred, average='micro') * 100, 2)), "%")
    print("f1-score:", (round(metrics.f1_score(y_test, y_pred, average='micro') * 100, 2)), '%')
    print('roc_auc_score:', round(roc_auc_score(y_test, y_pred), 3))
    print(confusion_matrix(y_test, y_pred))
    print('----------------------------------------------------------------------------------------------')

KNN_K_value =  1
Accuracy: 63.73 %
Precision: 63.73 %
Recall: 63.73 %
f1-score: 63.73 %
roc_auc_score: 0.565
[[786 287]
 [257 170]]
----------------------------------------------------------------------------------------------
KNN_K_value =  2
Accuracy: 71.8 %
Precision: 71.8 %
Recall: 71.8 %
f1-score: 71.8 %
roc_auc_score: 0.567
[[985  88]
 [335  92]]
----------------------------------------------------------------------------------------------
KNN_K_value =  3
Accuracy: 68.2 %
Precision: 68.2 %
Recall: 68.2 %
f1-score: 68.2 %
roc_auc_score: 0.587
[[866 207]
 [270 157]]
----------------------------------------------------------------------------------------------
KNN_K_value =  4
Accuracy: 71.27 %
Precision: 71.27 %
Recall: 71.27 %
f1-score: 71.27 %
roc_auc_score: 0.56
[[981  92]
 [339  88]]
----------------------------------------------------------------------------------------------
KNN_K_value =  5
Accuracy: 69.27 %
Precision: 69.27 %
Recall: 69.27 %
f1-score: 69.27 %
roc_auc_score

# KNN with stratified sampling

In [15]:
# Splitting the dataset into train and test 

from sklearn.model_selection import train_test_split

x1_train, x1_test, y1_train, y1_test = train_test_split(x, y, test_size = 0.30, random_state = 42, stratify=y)
x1_test_F1 = x_test.copy()

In [16]:
# Scaling the features by using MinMaxScaler

from sklearn.preprocessing import MinMaxScaler

mmscaler = MinMaxScaler(feature_range=(0, 1))

x1_train[cols2] = mmscaler.fit_transform(x1_train[cols2])
x1_train = pd.DataFrame(x1_train)

x1_test[cols2] = mmscaler.fit_transform(x1_test[cols2])
x1_test = pd.DataFrame(x1_test)

In [17]:
# Initialize an array that stores the Accuracy

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import sklearn.metrics as metrics
from sklearn.metrics import roc_curve, roc_auc_score

from sklearn.metrics import classification_report, confusion_matrix

accuracy = []

for a in range(1, 10, 1):
    k = a
    bankdataKNN = KNeighborsClassifier(n_neighbors=k)
    bankdataKNN.fit(x1_train, y1_train)
    ys_pred = bankdataKNN.predict(x1_test)
    print('KNN_K_value = ', a)
    print("Accuracy:", (round(metrics.accuracy_score(y_test, ys_pred) * 100, 2)), "%")
    print("Precision:", (round(metrics.precision_score(y_test, ys_pred, average='micro') * 100, 2)), '%')
    print("Recall:", (round(metrics.recall_score(y_test, ys_pred, average='micro') * 100, 2)), "%")
    print("f1-score:", (round(metrics.f1_score(y_test, ys_pred, average='micro') * 100, 2)), '%')
    print('roc_auc_score:', round(roc_auc_score(y_test, ys_pred), 3))
    print(confusion_matrix(y_test, ys_pred))
    print('----------------------------------------------------------------------------------------------')

KNN_K_value =  1
Accuracy: 57.93 %
Precision: 57.93 %
Recall: 57.93 %
f1-score: 57.93 %
roc_auc_score: 0.484
[[757 316]
 [315 112]]
----------------------------------------------------------------------------------------------
KNN_K_value =  2
Accuracy: 67.07 %
Precision: 67.07 %
Recall: 67.07 %
f1-score: 67.07 %
roc_auc_score: 0.503
[[958 115]
 [379  48]]
----------------------------------------------------------------------------------------------
KNN_K_value =  3
Accuracy: 61.93 %
Precision: 61.93 %
Recall: 61.93 %
f1-score: 61.93 %
roc_auc_score: 0.494
[[843 230]
 [341  86]]
----------------------------------------------------------------------------------------------
KNN_K_value =  4
Accuracy: 67.67 %
Precision: 67.67 %
Recall: 67.67 %
f1-score: 67.67 %
roc_auc_score: 0.508
[[965 108]
 [377  50]]
----------------------------------------------------------------------------------------------
KNN_K_value =  5
Accuracy: 64.4 %
Precision: 64.4 %
Recall: 64.4 %
f1-score: 64.4 %
roc_auc_

# Random forest with random sampling

In [18]:
# Build Random Forest classification model and Train the model using the training sets

from sklearn.ensemble import RandomForestClassifier  

bankdataRF = RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                                    criterion='gini', max_depth=None, max_features='auto',
                                    max_leaf_nodes=None, max_samples=None,
                                    min_impurity_decrease=0.0, min_impurity_split=None,
                                    min_samples_leaf=1, min_samples_split=2,
                                    min_weight_fraction_leaf=0.0, n_estimators=500,
                                    n_jobs=None, oob_score=False, random_state=0, verbose=0,
                                    warm_start=False)

bankdataRF = bankdataRF.fit(x_train, y_train)

# Predict the model with test data set

y1_pred = bankdataRF.predict(x_test)

# Display confusion matrix and classifiction report

from sklearn.metrics import classification_report, confusion_matrix

print(confusion_matrix(y_test, y1_pred))
print(classification_report(y_test, y1_pred))

# Evaluate the model performance by metrics

from sklearn import metrics
from sklearn.metrics import roc_curve, roc_auc_score

# Model Accuracy: how often is the classifier correct?
print("Accuracy:", (round(metrics.accuracy_score(y_test, y1_pred) * 100, 2)), "%")

# Model Precision: what percentage of positive tuples are labeled as such?
print("Precision:", (round(metrics.precision_score(y_test, y1_pred, average='micro') * 100, 2)), '%')

# Model Recall: what percentage of positive tuples are labelled as such?
print("Recall:", (round(metrics.recall_score(y_test, y1_pred, average='micro') * 100, 2)), "%")

# Model f1-score: weighted average of Precision & Recall
print("f1-score:", (round(metrics.f1_score(y_test, y1_pred, average='micro') * 100, 2)), '%')

# Area under ROC curve 
print('roc_auc_score:', round(roc_auc_score(y_test, y1_pred), 3))

[[964 109]
 [308 119]]
              precision    recall  f1-score   support

           0       0.76      0.90      0.82      1073
           1       0.52      0.28      0.36       427

    accuracy                           0.72      1500
   macro avg       0.64      0.59      0.59      1500
weighted avg       0.69      0.72      0.69      1500

Accuracy: 72.2 %
Precision: 72.2 %
Recall: 72.2 %
f1-score: 72.2 %
roc_auc_score: 0.589


In [19]:
# Build Random Forest classification model and Train the model using the training sets

from sklearn.ensemble import RandomForestClassifier  

bankdataRF1 = RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                                     criterion='gini', max_depth=None, max_features='auto',
                                     max_leaf_nodes=None, max_samples=None,
                                     min_impurity_decrease=0.0, min_impurity_split=None,
                                     min_samples_leaf=1, min_samples_split=2,
                                     min_weight_fraction_leaf=0.0, n_estimators=500,
                                     n_jobs=None, oob_score=False, random_state=0, verbose=0,
                                     warm_start=False)

bankdataRF1 = bankdataRF.fit(x1_train, y1_train)

# Predict the model with test data set

y2_pred = bankdataRF1.predict(x1_test)

# Display confusion matrix and classifiction report

from sklearn.metrics import classification_report, confusion_matrix

print(confusion_matrix(y_test, y2_pred))
print(classification_report(y_test, y2_pred))

# Evaluate the model performance by metrics

from sklearn import metrics
from sklearn.metrics import roc_curve, roc_auc_score

# Model Accuracy: how often is the classifier correct?
print("Accuracy:", (round(metrics.accuracy_score(y_test, y2_pred) * 100, 2)), "%")

# Model Precision: what percentage of positive tuples are labeled as such?
print("Precision:", (round(metrics.precision_score(y_test, y2_pred, average='micro') * 100, 2)), '%')

# Model Recall: what percentage of positive tuples are labelled as such?
print("Recall:", (round(metrics.recall_score(y_test, y2_pred, average='micro') * 100, 2)), "%")

# Model f1-score: weighted average of Precision & Recall
print("f1-score:", (round(metrics.f1_score(y_test, y2_pred, average='micro') * 100, 2)), '%')

# Area under ROC curve 
print('roc_auc_score:', round(roc_auc_score(y_test, y2_pred), 3))

[[940 133]
 [373  54]]
              precision    recall  f1-score   support

           0       0.72      0.88      0.79      1073
           1       0.29      0.13      0.18       427

    accuracy                           0.66      1500
   macro avg       0.50      0.50      0.48      1500
weighted avg       0.59      0.66      0.61      1500

Accuracy: 66.27 %
Precision: 66.27 %
Recall: 66.27 %
f1-score: 66.27 %
roc_auc_score: 0.501


# Decision Tree with Random Sampling

In [21]:
# To build the decision tree model with Over sampling 

from sklearn.tree import DecisionTreeClassifier 

bankdataDT = DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                                    max_depth=None, max_features=None, max_leaf_nodes=None,
                                    min_impurity_decrease=0.0, min_impurity_split=None,
                                    min_samples_leaf=1, min_samples_split=2,min_weight_fraction_leaf=0.0,
                                    random_state=None, splitter='best')

bankdataDT = bankdataDT.fit(x_train,y_train)

# Predict with test data

y3_pred = bankdataDT.predict(x_test)

# Display confusion matrix and classifiction report

from sklearn.metrics import classification_report, confusion_matrix

print(confusion_matrix(y_test, y3_pred))
print(classification_report(y_test, y3_pred))

# Evaluate the model performance by metrics

from sklearn import metrics
from sklearn.metrics import roc_curve, roc_auc_score

# Model Accuracy: how often is the classifier correct?
print("Accuracy:", (round(metrics.accuracy_score(y_test, y3_pred) * 100, 2)), "%")

# Model Precision: what percentage of positive tuples are labeled as such?
print("Precision:", (round(metrics.precision_score(y_test, y3_pred) * 100, 2)), '%')

# Model Recall: what percentage of positive tuples are labelled as such?
print("Recall:", (round(metrics.recall_score(y_test, y3_pred) * 100, 2)), "%")

# Model f1-score: weighted average of Precision & Recall
print("f1-score:", (round(metrics.f1_score(y_test, y3_pred) * 100, 2)), '%')

# Area under ROC curve 
print('roc_auc_score:', round(roc_auc_score(y_test, y3_pred), 3))

[[786 287]
 [250 177]]
              precision    recall  f1-score   support

           0       0.76      0.73      0.75      1073
           1       0.38      0.41      0.40       427

    accuracy                           0.64      1500
   macro avg       0.57      0.57      0.57      1500
weighted avg       0.65      0.64      0.65      1500

Accuracy: 64.2 %
Precision: 38.15 %
Recall: 41.45 %
f1-score: 39.73 %
roc_auc_score: 0.574


In [None]:
# Decision Tree with stratified Sampling

In [22]:
# To build the decision tree model with Over sampling 

from sklearn.tree import DecisionTreeClassifier 

bankdataDT1 = DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                                     max_depth=None, max_features=None, max_leaf_nodes=None,
                                     min_impurity_decrease=0.0, min_impurity_split=None,
                                     min_samples_leaf=1, min_samples_split=2,min_weight_fraction_leaf=0.0,
                                     random_state=None, splitter='best')

bankdataDT1 = bankdataDT1.fit(x1_train,y1_train)

# Predict with test data

y4_pred = bankdataDT.predict(x1_test)

# Display confusion matrix and classifiction report

from sklearn.metrics import classification_report, confusion_matrix

print(confusion_matrix(y_test, y4_pred))
print(classification_report(y_test, y4_pred))

# Evaluate the model performance by metrics

from sklearn import metrics
from sklearn.metrics import roc_curve, roc_auc_score

# Model Accuracy: how often is the classifier correct?
print("Accuracy:", (round(metrics.accuracy_score(y_test, y4_pred) * 100, 2)), "%")

# Model Precision: what percentage of positive tuples are labeled as such?
print("Precision:", (round(metrics.precision_score(y_test, y4_pred) * 100, 2)), '%')

# Model Recall: what percentage of positive tuples are labelled as such?
print("Recall:", (round(metrics.recall_score(y_test, y4_pred) * 100, 2)), "%")

# Model f1-score: weighted average of Precision & Recall
print("f1-score:", (round(metrics.f1_score(y_test, y4_pred) * 100, 2)), '%')

# Area under ROC curve 
print('roc_auc_score:', round(roc_auc_score(y_test, y4_pred), 3))

[[776 297]
 [290 137]]
              precision    recall  f1-score   support

           0       0.73      0.72      0.73      1073
           1       0.32      0.32      0.32       427

    accuracy                           0.61      1500
   macro avg       0.52      0.52      0.52      1500
weighted avg       0.61      0.61      0.61      1500

Accuracy: 60.87 %
Precision: 31.57 %
Recall: 32.08 %
f1-score: 31.82 %
roc_auc_score: 0.522


# Logistic Regression with Random Sampling

In [23]:
# To build the 'Logistic Regression' model with random sampling

from sklearn.linear_model import LogisticRegression

bankdataLR = LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                                intercept_scaling=1, max_iter=100, multi_class='auto', 
                                n_jobs=None, penalty='l2', random_state=None,
                                solver='lbfgs', tol=0.0001, verbose=0, warm_start=False)

bankdataLR = bankdataLR.fit(x_train,y_train)

# Predict the model with test data set

y5_pred = bankdataLR.predict(x_test)

# Display confusion matrix and classifiction report

from sklearn.metrics import classification_report, confusion_matrix

print(confusion_matrix(y_test, y5_pred))
print(classification_report(y_test, y5_pred))

# Evaluate the model performance by metrics

from sklearn import metrics
from sklearn.metrics import roc_curve, roc_auc_score

# Model Accuracy: how often is the classifier correct?
print("Accuracy:", (round(metrics.accuracy_score(y_test, y5_pred) * 100, 2)), "%")

# Model Precision: what percentage of positive tuples are labeled as such?
print("Precision:", (round(metrics.precision_score(y_test, y5_pred, average='micro') * 100, 2)), '%')

# Model Recall: what percentage of positive tuples are labelled as such?
print("Recall:", (round(metrics.recall_score(y_test, y5_pred, average='micro') * 100, 2)), "%")

# Model f1-score: weighted average of Precision & Recall
print("f1-score:", (round(metrics.f1_score(y_test, y5_pred, average='micro') * 100, 2)), '%')

# Area under ROC curve 
print('roc_auc_score:', round(roc_auc_score(y_test, y5_pred), 3))

[[1056   17]
 [ 356   71]]
              precision    recall  f1-score   support

           0       0.75      0.98      0.85      1073
           1       0.81      0.17      0.28       427

    accuracy                           0.75      1500
   macro avg       0.78      0.58      0.56      1500
weighted avg       0.76      0.75      0.69      1500

Accuracy: 75.13 %
Precision: 75.13 %
Recall: 75.13 %
f1-score: 75.13 %
roc_auc_score: 0.575


# Logistic Regression with Stratified Sampling

In [24]:
# To build the 'Logistic Regression' model with random sampling

from sklearn.linear_model import LogisticRegression

bankdataLR1 = LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                                 intercept_scaling=1, max_iter=100, multi_class='auto', 
                                 n_jobs=None, penalty='l2', random_state=None,
                                 solver='lbfgs', tol=0.0001, verbose=0, warm_start=False)

bankdataLR1 = bankdataLR1.fit(x1_train,y1_train)

# Predict the model with test data set

y6_pred = bankdataLR.predict(x1_test)

# Display confusion matrix and classifiction report

from sklearn.metrics import classification_report, confusion_matrix

print(confusion_matrix(y_test, y6_pred))
print(classification_report(y_test, y6_pred))

# Evaluate the model performance by metrics

from sklearn import metrics
from sklearn.metrics import roc_curve, roc_auc_score

# Model Accuracy: how often is the classifier correct?
print("Accuracy:", (round(metrics.accuracy_score(y_test, y6_pred) * 100, 2)), "%")

# Model Precision: what percentage of positive tuples are labeled as such?
print("Precision:", (round(metrics.precision_score(y_test, y6_pred, average='micro') * 100, 2)), '%')

# Model Recall: what percentage of positive tuples are labelled as such?
print("Recall:", (round(metrics.recall_score(y_test, y6_pred, average='micro') * 100, 2)), "%")

# Model f1-score: weighted average of Precision & Recall
print("f1-score:", (round(metrics.f1_score(y_test, y6_pred, average='micro') * 100, 2)), '%')

# Area under ROC curve 
print('roc_auc_score:', round(roc_auc_score(y_test, y6_pred), 3))

[[1016   57]
 [ 406   21]]
              precision    recall  f1-score   support

           0       0.71      0.95      0.81      1073
           1       0.27      0.05      0.08       427

    accuracy                           0.69      1500
   macro avg       0.49      0.50      0.45      1500
weighted avg       0.59      0.69      0.61      1500

Accuracy: 69.13 %
Precision: 69.13 %
Recall: 69.13 %
f1-score: 69.13 %
roc_auc_score: 0.498
