# Bank Marketing Campaign Success

__ML-Zoomcamp Capstone Project__

Let's order our final model into a format that we can process as a python script:

In [10]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.feature_extraction import DictVectorizer
from sklearn.ensemble import RandomForestClassifier

In [3]:
df = pd.read_csv('./data/bank-full.csv', sep=';')

df.drop_duplicates(inplace=True)
df.drop(['day', 'month', 'contact'], axis=1, inplace=True)

df.rename(columns={'y': 'success'}, inplace=True)
df.success = (df.success == 'yes').astype('int')

In [4]:
df_full_train, df_test = train_test_split(df, test_size=0.2, random_state=7)

In [5]:
numerical = ['age', 'balance', 'duration', 'campaign', 'pdays', 'previous']
categorical = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'poutcome']

In [6]:
def train(df_train, y_train):
    train_dict = df_train[numerical + categorical].to_dict(orient='records')

    dv = DictVectorizer(sparse=False)
    X_train = dv.fit_transform(train_dict)

    model = RandomForestClassifier(n_estimators=50, max_depth=10, min_samples_leaf=3)
    model.fit(X_train, y_train)

    return dv, model   

In [15]:
def predict(df, dv, model):
    df_dict = df[numerical + categorical].to_dict(orient='records')

    X = dv.transform(df_dict)
    y_pred = model.predict_proba(X)[:, 1]

    return y_pred

Running the model:

In [16]:
dv, model = train(df_full_train, df_full_train['success'].values)
y_pred = predict(df_test, dv, model)

y_test = df_test['success'].values
roc_auc_score(y_test, y_pred)

0.8875743185646082

Exporting the model to a pickle binary file:

In [8]:
import pickle

In [9]:
output_file = f'rf_model.bin'

In [17]:
with open(output_file, 'wb') as f_out:
    pickle.dump((dv, model), f_out)

Selecting a customers for testing porposes:

In [34]:
print(df_test.iloc[100, :-1].to_json())

{"age":29,"job":"admin.","marital":"married","education":"secondary","default":"no","balance":252,"housing":"yes","loan":"no","duration":1223,"campaign":3,"pdays":371,"previous":1,"poutcome":"failure"}


In [25]:
df_test.iloc[100, 13:14]

success    0
Name: 38679, dtype: object

In [33]:
print(df_test.iloc[200, :-1].to_json())

{"age":46,"job":"management","marital":"married","education":"tertiary","default":"no","balance":273,"housing":"yes","loan":"no","duration":583,"campaign":6,"pdays":53,"previous":6,"poutcome":"success"}


In [30]:
df_test.iloc[200, 13:14]

success    1
Name: 43663, dtype: object