In [1]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn import model_selection
from sklearn.metrics import accuracy_score

from dbn.tensorflow import SupervisedDBNClassification

In [2]:
df = pd.read_csv('datasets/banking.csv')

In [3]:
df.drop(['duration','contact','month','day_of_week','default','pdays',], axis=1, inplace=True)
df.replace(['basic.6y','basic.4y', 'basic.9y'], 'basic', inplace=True)

In [4]:
le_job = LabelEncoder()
le_marital = LabelEncoder()
le_education = LabelEncoder()
le_other = LabelEncoder()

In [5]:
df.job = le_job.fit_transform(df.job)
df.marital = le_marital.fit_transform(df.marital)
df.education = le_education.fit_transform(df.education)
df.housing = le_other.fit_transform(df.housing)
df.loan = le_other.fit_transform(df.loan)
df.poutcome = le_other.fit_transform(df.poutcome)

In [6]:
X = df.iloc[:,0:14]
y = df.iloc[:,14]

In [7]:
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2, random_state=0)

In [8]:
classifier = SupervisedDBNClassification(
    hidden_layers_structure=[20, 50],
    learning_rate_rbm=0.003,
    learning_rate=0.1,
    n_epochs_rbm=10,
    n_iter_backprop=30,
    batch_size=128,
    activation_function='relu',
    verbose=False
)

classifier.fit(x_train.to_numpy(), y_train)
pred = classifier.predict(x_test.to_numpy())

In [9]:
print(accuracy_score(y_test, pred))

0.9291721291575625


In [10]:
y_test = y_test.to_numpy()
fn = []

for i in range(len(y_test)):
    if y_test[i] == 1 and pred[i] == 0:
        fn.append(x_test.iloc[i].astype('int32'))

In [11]:
for data in fn[:20]:
    print(f"age: {data['age']} | job: {le_job.inverse_transform([data['job']])[0]} | marital: {le_marital.inverse_transform([data['marital']])[0]} | education: {le_education.inverse_transform([data['education']])[0]} | number phoned: {data['campaign']}")

age: 35 | job: admin. | marital: married | education: high.school | number phoned: 1
age: 30 | job: student | marital: single | education: professional.course | number phoned: 4
age: 51 | job: blue-collar | marital: married | education: basic | number phoned: 1
age: 31 | job: admin. | marital: married | education: university.degree | number phoned: 2
age: 30 | job: technician | marital: single | education: university.degree | number phoned: 1
age: 56 | job: entrepreneur | marital: married | education: university.degree | number phoned: 2
age: 42 | job: admin. | marital: married | education: professional.course | number phoned: 1
age: 23 | job: student | marital: single | education: unknown | number phoned: 2
age: 53 | job: management | marital: divorced | education: university.degree | number phoned: 2
age: 62 | job: technician | marital: married | education: unknown | number phoned: 1
age: 45 | job: admin. | marital: married | education: unknown | number phoned: 1
age: 33 | job: admin

In [12]:
fn_df = pd.DataFrame(fn, columns = x_test.columns)

print(pd.DataFrame(le_job.inverse_transform(fn_df.job), columns=['job']).job.value_counts(), '\n')
print(pd.DataFrame(le_education.inverse_transform(fn_df.education), columns=['education']).education.value_counts(), '\n')
print(pd.DataFrame(le_marital.inverse_transform(fn_df.marital), columns=['marital']).marital.value_counts())

admin.           264
technician       170
blue-collar      115
retired           83
services          66
management        58
student           50
self-employed     29
housemaid         28
unemployed        24
entrepreneur      23
unknown            3
Name: job, dtype: int64 

university.degree      345
basic                  211
high.school            192
professional.course    123
unknown                 42
Name: education, dtype: int64 

married     491
single      328
divorced     92
unknown       2
Name: marital, dtype: int64
