## Telemarketing Effectiveness Prediction

Given *data about subjects' responses to a bank's marketing campaign*, let's try to predict whether a given subject will **place a deposit** or not.

We will use a Tensorflow ANN to make our predictions.

Data source: https://www.kaggle.com/datasets/volodymyrgavrysh/bank-marketing-campaigns-dataset

### Importing Libraries

In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf

from sklearn.metrics import classification_report, confusion_matrix

In [29]:
tf.random.set_seed(100)

In [2]:
data = pd.read_csv('archive/bank-additional-full.csv', delimiter=';')
data

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,duration,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,56,housemaid,married,basic.4y,no,no,no,telephone,may,mon,261,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
1,57,services,married,high.school,unknown,no,no,telephone,may,mon,149,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
2,37,services,married,high.school,no,yes,no,telephone,may,mon,226,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
3,40,admin.,married,basic.6y,no,no,no,telephone,may,mon,151,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
4,56,services,married,high.school,no,no,yes,telephone,may,mon,307,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41183,73,retired,married,professional.course,no,yes,no,cellular,nov,fri,334,1,999,0,nonexistent,-1.1,94.767,-50.8,1.028,4963.6,yes
41184,46,blue-collar,married,professional.course,no,no,no,cellular,nov,fri,383,1,999,0,nonexistent,-1.1,94.767,-50.8,1.028,4963.6,no
41185,56,retired,married,university.degree,no,yes,no,cellular,nov,fri,189,2,999,0,nonexistent,-1.1,94.767,-50.8,1.028,4963.6,no
41186,44,technician,married,professional.course,no,no,no,cellular,nov,fri,442,1,999,0,nonexistent,-1.1,94.767,-50.8,1.028,4963.6,yes


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41188 entries, 0 to 41187
Data columns (total 21 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   age             41188 non-null  int64  
 1   job             41188 non-null  object 
 2   marital         41188 non-null  object 
 3   education       41188 non-null  object 
 4   default         41188 non-null  object 
 5   housing         41188 non-null  object 
 6   loan            41188 non-null  object 
 7   contact         41188 non-null  object 
 8   month           41188 non-null  object 
 9   day_of_week     41188 non-null  object 
 10  duration        41188 non-null  int64  
 11  campaign        41188 non-null  int64  
 12  pdays           41188 non-null  int64  
 13  previous        41188 non-null  int64  
 14  poutcome        41188 non-null  object 
 15  emp.var.rate    41188 non-null  float64
 16  cons.price.idx  41188 non-null  float64
 17  cons.conf.idx   41188 non-null 

### Preprocessing

In [4]:
df = data.copy()

In [5]:
df.isna().sum()

age               0
job               0
marital           0
education         0
default           0
housing           0
loan              0
contact           0
month             0
day_of_week       0
duration          0
campaign          0
pdays             0
previous          0
poutcome          0
emp.var.rate      0
cons.price.idx    0
cons.conf.idx     0
euribor3m         0
nr.employed       0
y                 0
dtype: int64

#### Encoding

In [6]:
df['y'].unique()

array(['no', 'yes'], dtype=object)

In [7]:
df['y'] = df['y'].apply(lambda x: 1 if x == 'yes' else 0)

#### Encoding categorical variables

In [8]:
df.select_dtypes('object')

Unnamed: 0,job,marital,education,default,housing,loan,contact,month,day_of_week,poutcome
0,housemaid,married,basic.4y,no,no,no,telephone,may,mon,nonexistent
1,services,married,high.school,unknown,no,no,telephone,may,mon,nonexistent
2,services,married,high.school,no,yes,no,telephone,may,mon,nonexistent
3,admin.,married,basic.6y,no,no,no,telephone,may,mon,nonexistent
4,services,married,high.school,no,no,yes,telephone,may,mon,nonexistent
...,...,...,...,...,...,...,...,...,...,...
41183,retired,married,professional.course,no,yes,no,cellular,nov,fri,nonexistent
41184,blue-collar,married,professional.course,no,no,no,cellular,nov,fri,nonexistent
41185,retired,married,university.degree,no,yes,no,cellular,nov,fri,nonexistent
41186,technician,married,professional.course,no,no,no,cellular,nov,fri,nonexistent


In [9]:
{column: len(df[column].unique()) for column in df.select_dtypes('object').columns}

{'job': 12,
 'marital': 4,
 'education': 8,
 'default': 3,
 'housing': 3,
 'loan': 3,
 'contact': 2,
 'month': 10,
 'day_of_week': 5,
 'poutcome': 3}

In [10]:
{column: list(df[column].unique()) for column in df.select_dtypes('object').columns}

{'job': ['housemaid',
  'services',
  'admin.',
  'blue-collar',
  'technician',
  'retired',
  'management',
  'unemployed',
  'self-employed',
  'unknown',
  'entrepreneur',
  'student'],
 'marital': ['married', 'single', 'divorced', 'unknown'],
 'education': ['basic.4y',
  'high.school',
  'basic.6y',
  'basic.9y',
  'professional.course',
  'unknown',
  'university.degree',
  'illiterate'],
 'default': ['no', 'unknown', 'yes'],
 'housing': ['no', 'yes', 'unknown'],
 'loan': ['no', 'yes', 'unknown'],
 'contact': ['telephone', 'cellular'],
 'month': ['may',
  'jun',
  'jul',
  'aug',
  'oct',
  'nov',
  'dec',
  'mar',
  'apr',
  'sep'],
 'day_of_week': ['mon', 'tue', 'wed', 'thu', 'fri'],
 'poutcome': ['nonexistent', 'failure', 'success']}

In [11]:
df = df.replace('unknown', np.nan)

In [12]:
{column: list(df[column].unique()) for column in df.select_dtypes('object').columns}

{'job': ['housemaid',
  'services',
  'admin.',
  'blue-collar',
  'technician',
  'retired',
  'management',
  'unemployed',
  'self-employed',
  nan,
  'entrepreneur',
  'student'],
 'marital': ['married', 'single', 'divorced', nan],
 'education': ['basic.4y',
  'high.school',
  'basic.6y',
  'basic.9y',
  'professional.course',
  nan,
  'university.degree',
  'illiterate'],
 'default': ['no', nan, 'yes'],
 'housing': ['no', 'yes', nan],
 'loan': ['no', 'yes', nan],
 'contact': ['telephone', 'cellular'],
 'month': ['may',
  'jun',
  'jul',
  'aug',
  'oct',
  'nov',
  'dec',
  'mar',
  'apr',
  'sep'],
 'day_of_week': ['mon', 'tue', 'wed', 'thu', 'fri'],
 'poutcome': ['nonexistent', 'failure', 'success']}

In [13]:
nominal_features = [
    'job',
    'marital',
    'education',
    'day_of_week',
    'poutcome'
] 

ordinal_features = [
    'month'
]

binary_features = [
    'default',
    'housing',
    'loan',
    'contact'
]

In [14]:
def onehot_encode(df, columns, prefixes):
    df = df.copy()
    for column, prefix in zip(columns, prefixes):
        dummies = pd.get_dummies(df[column], prefix=prefix, dtype=int)
        df = pd.concat([df, dummies], axis=1)
        df = df.drop(column, axis=1)
    return df

def ordinal_encode(df, columns, orderings):
    df = df.copy()
    for column, ordering in zip(columns, orderings):
        df[column] = df[column].apply(lambda x: ordering.index(x) if str(x) != 'nan' else x)
    return df

def binary_encode(df, columns, positive_values):
    df = df.copy()
    for column, positive_value in zip(columns, positive_values):
        df[column] = df[column].apply(lambda x: 1 if x == positive_value else x)
        df[column] = df[column].apply(lambda x: 0 if str(x) != 'nan' else x)
    return df

In [15]:
df.isna().sum()

age                  0
job                330
marital             80
education         1731
default           8597
housing            990
loan               990
contact              0
month                0
day_of_week          0
duration             0
campaign             0
pdays                0
previous             0
poutcome             0
emp.var.rate         0
cons.price.idx       0
cons.conf.idx        0
euribor3m            0
nr.employed          0
y                    0
dtype: int64

In [16]:
prefixes = ['J', 'M', 'E', 'D', 'P']

orderings = [
    ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
]

positive_values = [
    'yes', 'yes', 'yes', 'cellular'
]

In [17]:
df = onehot_encode(
    df,
    columns = nominal_features,
    prefixes = prefixes
)

df = ordinal_encode(
    df,
    columns = ordinal_features,
    orderings = orderings
)

df = binary_encode(
    df,
    columns = binary_features,
    positive_values = positive_values
)

In [18]:
df

Unnamed: 0,age,default,housing,loan,contact,month,duration,campaign,pdays,previous,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y,J_admin.,J_blue-collar,J_entrepreneur,J_housemaid,J_management,J_retired,J_self-employed,J_services,J_student,J_technician,J_unemployed,M_divorced,M_married,M_single,E_basic.4y,E_basic.6y,E_basic.9y,E_high.school,E_illiterate,E_professional.course,E_university.degree,D_fri,D_mon,D_thu,D_tue,D_wed,P_failure,P_nonexistent,P_success
0,56,0.0,0.0,0.0,0,4,261,1,999,0,1.1,93.994,-36.4,4.857,5191.0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0
1,57,,0.0,0.0,0,4,149,1,999,0,1.1,93.994,-36.4,4.857,5191.0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0
2,37,0.0,0.0,0.0,0,4,226,1,999,0,1.1,93.994,-36.4,4.857,5191.0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0
3,40,0.0,0.0,0.0,0,4,151,1,999,0,1.1,93.994,-36.4,4.857,5191.0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0
4,56,0.0,0.0,0.0,0,4,307,1,999,0,1.1,93.994,-36.4,4.857,5191.0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41183,73,0.0,0.0,0.0,0,10,334,1,999,0,-1.1,94.767,-50.8,1.028,4963.6,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0
41184,46,0.0,0.0,0.0,0,10,383,1,999,0,-1.1,94.767,-50.8,1.028,4963.6,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0
41185,56,0.0,0.0,0.0,0,10,189,2,999,0,-1.1,94.767,-50.8,1.028,4963.6,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0
41186,44,0.0,0.0,0.0,0,10,442,1,999,0,-1.1,94.767,-50.8,1.028,4963.6,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0


#### Filling missing values

In [21]:
df.isna().sum()[df.isna().sum() > 0]

default    8597
housing     990
loan        990
dtype: int64

In [22]:
for column in ['default', 'housing', 'loan']:
    df[column] = df[column].fillna(df[column].mean())

In [24]:
print("Remaining missing values: ", df.isna().sum().sum())

Remaining missing values:  0


In [25]:
print("Remaining non-numeric columns:", len(df.select_dtypes('object').columns))

Remaining non-numeric columns: 0


#### Splitting/Scaling

In [26]:
df

Unnamed: 0,age,default,housing,loan,contact,month,duration,campaign,pdays,previous,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y,J_admin.,J_blue-collar,J_entrepreneur,J_housemaid,J_management,J_retired,J_self-employed,J_services,J_student,J_technician,J_unemployed,M_divorced,M_married,M_single,E_basic.4y,E_basic.6y,E_basic.9y,E_high.school,E_illiterate,E_professional.course,E_university.degree,D_fri,D_mon,D_thu,D_tue,D_wed,P_failure,P_nonexistent,P_success
0,56,0.0,0.0,0.0,0,4,261,1,999,0,1.1,93.994,-36.4,4.857,5191.0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0
1,57,0.0,0.0,0.0,0,4,149,1,999,0,1.1,93.994,-36.4,4.857,5191.0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0
2,37,0.0,0.0,0.0,0,4,226,1,999,0,1.1,93.994,-36.4,4.857,5191.0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0
3,40,0.0,0.0,0.0,0,4,151,1,999,0,1.1,93.994,-36.4,4.857,5191.0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0
4,56,0.0,0.0,0.0,0,4,307,1,999,0,1.1,93.994,-36.4,4.857,5191.0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41183,73,0.0,0.0,0.0,0,10,334,1,999,0,-1.1,94.767,-50.8,1.028,4963.6,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0
41184,46,0.0,0.0,0.0,0,10,383,1,999,0,-1.1,94.767,-50.8,1.028,4963.6,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0
41185,56,0.0,0.0,0.0,0,10,189,2,999,0,-1.1,94.767,-50.8,1.028,4963.6,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0
41186,44,0.0,0.0,0.0,0,10,442,1,999,0,-1.1,94.767,-50.8,1.028,4963.6,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0


In [27]:
y = df['y'].copy()
X = df.drop('y', axis=1).copy()

In [28]:
scaler = StandardScaler()

X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

In [30]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=100)

In [32]:
X_train

Unnamed: 0,age,default,housing,loan,contact,month,duration,campaign,pdays,previous,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,J_admin.,J_blue-collar,J_entrepreneur,J_housemaid,J_management,J_retired,J_self-employed,J_services,J_student,J_technician,J_unemployed,M_divorced,M_married,M_single,E_basic.4y,E_basic.6y,E_basic.9y,E_high.school,E_illiterate,E_professional.course,E_university.degree,D_fri,D_mon,D_thu,D_tue,D_wed,P_failure,P_nonexistent,P_success
7019,-0.865939,0.0,0.0,0.0,0.0,-0.787808,-0.602774,0.517118,0.195414,-0.349494,0.648092,0.722722,0.886447,0.714190,0.331680,1.718146,-0.538317,-0.19143,-0.162528,-0.276435,-0.208757,-0.189032,-0.326556,-0.147327,-0.442449,-0.158872,-0.355097,0.807638,-0.624938,-0.335899,-0.242748,-0.414743,-0.548100,-0.02091,-0.381918,1.544326,-0.484371,-0.510465,1.943329,-0.494394,-0.496067,-0.339290,0.397706,-0.1857
27175,1.053240,0.0,0.0,0.0,0.0,2.151966,-0.155375,0.156105,0.195414,-0.349494,-0.115781,-0.649003,-0.323542,0.230456,0.398115,-0.582023,-0.538317,-0.19143,-0.162528,-0.276435,-0.208757,-0.189032,-0.326556,-0.147327,2.260146,-0.158872,-0.355097,-1.238179,1.600160,-0.335899,-0.242748,2.411134,-0.548100,-0.02091,-0.381918,-0.647531,2.064533,-0.510465,-0.514581,-0.494394,-0.496067,-0.339290,0.397706,-0.1857
15817,-0.098268,0.0,0.0,0.0,0.0,0.192116,0.288168,0.156105,0.195414,-0.349494,0.839061,0.591424,-0.474791,0.771846,0.845170,1.718146,-0.538317,-0.19143,-0.162528,-0.276435,-0.208757,-0.189032,-0.326556,-0.147327,-0.442449,-0.158872,-0.355097,-1.238179,1.600160,-0.335899,-0.242748,-0.414743,-0.548100,-0.02091,-0.381918,1.544326,-0.484371,1.959000,-0.514581,-0.494394,-0.496067,-0.339290,0.397706,-0.1857
40971,2.684542,0.0,0.0,0.0,0.0,1.662004,-0.494781,0.517118,0.195414,3.691766,-0.752343,1.771384,-1.944063,-1.511330,-2.815697,-0.582023,-0.538317,-0.19143,-0.162528,-0.276435,4.790252,-0.189032,-0.326556,-0.147327,-0.442449,-0.158872,2.816135,-1.238179,-0.624938,-0.335899,-0.242748,-0.414743,1.824485,-0.02091,-0.381918,-0.647531,-0.484371,1.959000,-0.514581,-0.494394,-0.496067,2.947327,-2.514421,-0.1857
8467,-0.290186,0.0,0.0,0.0,0.0,-0.297846,0.033613,0.156105,0.195414,-0.349494,0.839061,1.536429,-0.280328,0.716496,0.845170,-0.582023,-0.538317,-0.19143,-0.162528,-0.276435,-0.208757,-0.189032,-0.326556,-0.147327,2.260146,-0.158872,-0.355097,0.807638,-0.624938,-0.335899,-0.242748,-0.414743,1.824485,-0.02091,-0.381918,-0.647531,-0.484371,-0.510465,-0.514581,2.022677,-0.496067,-0.339290,0.397706,-0.1857
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16304,0.381527,0.0,0.0,0.0,0.0,0.192116,-0.228656,-0.204909,0.195414,-0.349494,0.839061,0.591424,-0.474791,0.772422,0.845170,-0.582023,-0.538317,-0.19143,-0.162528,-0.276435,-0.208757,-0.189032,-0.326556,-0.147327,-0.442449,-0.158872,-0.355097,0.807638,-0.624938,-0.335899,4.119506,-0.414743,-0.548100,-0.02091,-0.381918,-0.647531,-0.484371,-0.510465,-0.514581,2.022677,-0.496067,-0.339290,0.397706,-0.1857
79,0.189609,0.0,0.0,0.0,0.0,-0.787808,-0.193944,-0.565922,0.195414,-0.349494,0.648092,0.722722,0.886447,0.712460,0.331680,-0.582023,1.857642,-0.19143,-0.162528,-0.276435,-0.208757,-0.189032,-0.326556,-0.147327,-0.442449,-0.158872,-0.355097,0.807638,-0.624938,-0.335899,-0.242748,2.411134,-0.548100,-0.02091,-0.381918,-0.647531,-0.484371,1.959000,-0.514581,-0.494394,-0.496067,-0.339290,0.397706,-0.1857
12119,-0.098268,0.0,0.0,0.0,0.0,-0.297846,-0.213228,-0.565922,0.195414,-0.349494,0.839061,1.536429,-0.280328,0.764350,0.845170,-0.582023,-0.538317,-0.19143,-0.162528,-0.276435,-0.208757,-0.189032,-0.326556,-0.147327,-0.442449,6.294389,-0.355097,0.807638,-0.624938,2.977084,-0.242748,-0.414743,-0.548100,-0.02091,-0.381918,-0.647531,-0.484371,1.959000,-0.514581,-0.494394,-0.496067,-0.339290,0.397706,-0.1857
14147,-1.729570,0.0,0.0,0.0,0.0,0.192116,1.962059,3.405226,0.195414,-0.349494,0.839061,0.591424,-0.474791,0.772999,0.845170,-0.582023,-0.538317,-0.19143,6.152772,-0.276435,-0.208757,-0.189032,-0.326556,-0.147327,-0.442449,-0.158872,-0.355097,-1.238179,1.600160,-0.335899,-0.242748,-0.414743,1.824485,-0.02091,-0.381918,-0.647531,-0.484371,1.959000,-0.514581,-0.494394,-0.496067,-0.339290,0.397706,-0.1857


In [31]:
X_train.shape

(28831, 44)

### Modeling/Training

In [37]:
print("Positive examples: {}".format(y.sum()))
print("Negative examples: {}".format(len(y) - y.sum()))
print("\nClass Distribution: {:.1f}% / {:.1f}%".format(y.mean() * 100, (1 - y.mean())*100))

Positive examples: 4640
Negative examples: 36548

Class Distribution: 11.3% / 88.7%


In [38]:
inputs = tf.keras.Input(shape=(X.shape[1],))
x = tf.keras.layers.Dense(64, activation='relu')(inputs)
x = tf.keras.layers.Dense(64, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(inputs, outputs)

model.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics = [
        'accuracy',
        tf.keras.metrics.AUC(name='auc')
    ]
)

batch_size = 32
epochs = 100

history = model.fit(
    X_train,
    y_train,
    validation_split = 0.2,
    batch_size = batch_size,
    epochs = epochs,
    callbacks = [
        tf.keras.callbacks.EarlyStopping(
            monitor = 'val_loss',
            patience = 3,
            restore_best_weights = True
        )
    ]
)

Epoch 1/100
[1m721/721[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9019 - auc: 0.8958 - loss: 0.2333 - val_accuracy: 0.9121 - val_auc: 0.9302 - val_loss: 0.1958
Epoch 2/100
[1m721/721[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9092 - auc: 0.9295 - loss: 0.1981 - val_accuracy: 0.9123 - val_auc: 0.9328 - val_loss: 0.1927
Epoch 3/100
[1m721/721[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9119 - auc: 0.9344 - loss: 0.1920 - val_accuracy: 0.9133 - val_auc: 0.9337 - val_loss: 0.1919
Epoch 4/100
[1m721/721[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9148 - auc: 0.9376 - loss: 0.1879 - val_accuracy: 0.9123 - val_auc: 0.9341 - val_loss: 0.1913
Epoch 5/100
[1m721/721[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9156 - auc: 0.9402 - loss: 0.1842 - val_accuracy: 0.9119 - val_auc: 0.9346 - val_loss: 0.1909
Epoch 6/100
[1m721/721[

### Results

In [39]:
model.evaluate(X_test, y_test)

[1m387/387[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9060 - auc: 0.9279 - loss: 0.2022


[0.20219583809375763, 0.906045138835907, 0.9279459714889526]

In [40]:
y_true = np.array(y_test)
y_pred = np.squeeze(np.array(model.predict(X_test) >= 0.5, dtype=int))

[1m387/387[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step


In [41]:
print("Confusion Matrix \n", confusion_matrix(y_true, y_pred))

Confusion Matrix 
 [[10398   577]
 [  584   798]]


In [42]:
print("Classification Report: \n\n", classification_report(y_true, y_pred))

Classification Report: 

               precision    recall  f1-score   support

           0       0.95      0.95      0.95     10975
           1       0.58      0.58      0.58      1382

    accuracy                           0.91     12357
   macro avg       0.76      0.76      0.76     12357
weighted avg       0.91      0.91      0.91     12357

