In [156]:
import pandas as pd
import numpy as np
from IPython.display import display
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

In [157]:
# import warnings filter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)

In [158]:
label_encoder = LabelEncoder()
def label_data(jobs):
    global label_encoder
    jobs = list(jobs)
    data = {}
    integer_encoded = label_encoder.fit_transform(jobs)

    for z in list(set(zip(jobs,integer_encoded))):
        job,code = z[0],z[1]
        data[job] = code

    return integer_encoded, data

def is_numerical(value):
    try:
        float(value)
    except:
        return False
    else:
        return True


In [160]:
df = pd.read_csv('term-deposit-marketing-2020.csv', sep=',')
df = df.dropna(axis="columns", how="any")
months = {'jan':1, 'feb':2, 'mar':3, 'apr':4, 'may':5, 'jun':6, 'jul':7, 'aug':8, 'sep':9, 'oct':10, 'nov':11, 'dec':12}

display(df.head())

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,no


In [161]:
# Change all string type columns to numerical 
# Convert month and days columns to time
dictionary = {}
for key in df.keys():
    if key == "month":
        df[key] = [months[val] for val in df[key]]
    if not is_numerical(df[key][0]):
        df[key], dictionary[key] = label_data(list(df[key]))
df['time'] = df['month']*30 + df['day']
df = df.drop(columns=['day', 'month'])
display(df.head())

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,duration,campaign,y,time
0,58,4,1,2,0,2143,1,0,2,261,1,0,155
1,44,9,2,1,0,29,1,0,2,151,1,0,155
2,33,2,1,1,0,2,1,1,2,76,1,0,155
3,47,1,1,3,0,1506,1,0,2,92,1,0,155
4,33,11,2,3,0,1,0,0,2,198,1,0,155


In [162]:
df_partial = df[df['y'] == 1]
df_temp = df[df['y'] == 0]
df_partial = df_partial.append(df_temp[:len(df_partial)  ], ignore_index = True)
df_partial = df_partial.sample(frac=1, random_state=42)
train_y = np.array(list(df_partial['y']))
df_partial = df_partial.drop(columns=['y'])
train_x = df_partial.to_numpy()


abs_size = 500
abs_test_x = train_x[-abs_size:]
abs_test_y = train_y[-abs_size:]
train_x, train_y = train_x[:-abs_size], train_y[:-abs_size]
print("Train x shape: ",train_x.shape)
print("Train y shape: ",train_y.shape)
print("Test x shape: ",abs_test_x.shape)
print("Test y shape: ",abs_test_x.shape)


Train x shape:  (5292, 12)
Train y shape:  (5292,)
Test x shape:  (500, 12)
Test y shape:  (500, 12)


In [None]:
from keras import models
from keras import layers

def build_model():
    # Because we will need to instantiate
    # the same model multiple times,
    # we use a function to construct it.
    model = models.Sequential()
    model.add(layers.Dense(64, activation='relu',
                           input_shape=(train_x.shape[1],)))
    model.add(layers.Dense(16, activation='relu'))
    model.add(layers.Dense(16, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid') )
    model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy', 'mean_absolute_error'])
    return model

In [None]:
from keras import backend as K
K.clear_session()
model = build_model()
model.fit(train_x, train_y, epochs=25, batch_size=1, verbose=1)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<tensorflow.python.keras.callbacks.History at 0x7fe89aab1f60>

In [None]:
keys = 'age', 'job', 'marital', 'education', 'default', 'balance', 'housing', 'loan', 'contact', 'duration', 'campaign', 'time'

result = {}
if True:
  for i,keyy in enumerate(keys):
    temp = abs_test_x.copy()
    ma = max(temp[:,i])
    temp[:,i] = [ma-elem for elem in temp[:,i]] 

    _, val_mae, _ = model.evaluate(temp, abs_test_y, verbose=0)
    print(f"Model score with opposite {keyy}: ", val_mae)
    result[keyy] = val_mae

Model score with opposite age:  0.9380000233650208
Model score with opposite job:  0.9359999895095825
Model score with opposite marital:  0.9359999895095825
Model score with opposite education:  0.9419999718666077
Model score with opposite default:  0.9440000057220459
Model score with opposite balance:  0.4659999907016754
Model score with opposite housing:  0.8640000224113464
Model score with opposite loan:  0.9480000138282776
Model score with opposite contact:  0.3840000033378601
Model score with opposite duration:  0.46399998664855957
Model score with opposite campaign:  0.7879999876022339
Model score with opposite time:  0.8820000290870667


In [None]:
print('From Most Effective Features To Least Effective')
[elem[0] for elem in sorted(result.items(), key=lambda x: x[1])]

From Most Effective Features To Least Effective


['contact',
 'duration',
 'balance',
 'campaign',
 'housing',
 'time',
 'job',
 'marital',
 'age',
 'education',
 'default',
 'loan']