## Boiler plate

In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
import itertools
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support, accuracy_score, cohen_kappa_score
%matplotlib inline

class_names = ["A", "B", "None"]

In [2]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    #     print("Normalized confusion matrix")
    # else:
    #     print('Confusion matrix, without normalization')

    # print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

### Prepare the test data

In [3]:
df = pd.read_csv("original_data/custdatabase.csv")
df_copy = pd.read_csv("original_data/custdatabase.csv")
print('Test dataset shape {0}, {1}'.format(df.shape[0], df.shape[1]))

# strip the spaces in the header, if present
df = df.rename(columns=lambda x: x.strip())
df.columns.values

categorical_features = ["sex", "mstatus", "occupation", "education", "children"]
df[categorical_features].head()
df["children"] = df["children"].astype(str)
label_encoders = {}
label_mappings = {}
for categorical_feature in categorical_features:
    label_encoders[categorical_feature] = preprocessing.LabelEncoder()
    df[categorical_feature + "Num"] = label_encoders[categorical_feature].fit_transform(df[categorical_feature])
    label_mappings[categorical_feature] = label_encoders[categorical_feature].classes_

label_encoders = {}
label_mappings = {}
for categorical_feature in categorical_features:
    label_encoders[categorical_feature] = preprocessing.LabelEncoder()
    df[categorical_feature + "Num"] = label_encoders[categorical_feature].fit_transform(df[categorical_feature])
    label_mappings[categorical_feature] = label_encoders[categorical_feature].classes_

for categorical_feature in categorical_features:
    for class_value in label_mappings[categorical_feature]:
        df[categorical_feature + "_" + (class_value)] = df[categorical_feature] == np.array([(class_value)] * df.shape[0])
        df[categorical_feature + "_" + (class_value)] = df[categorical_feature + "_" + (class_value)].astype(int)

# drop the categorical values
df = df.drop(categorical_features, axis=1)

df.head()

# remove num columns for Neural networks
shouldRemoveNumColumns = True

if shouldRemoveNumColumns:
    num_columns = ["sexNum", "mstatusNum", "occupationNum", "educationNum", "childrenNum"]
    df = df.drop(num_columns, axis=1)
else:
    print("Not dropping the num columns")
df.head()

train_df = pd.read_csv("working_data/trial_promo_training.csv")
features_to_scale = ["age", "income", "avbal", "avtrans"]
min_max_scaler = preprocessing.MinMaxScaler()

# fit on the train data
train_df[features_to_scale] = min_max_scaler.fit(train_df[features_to_scale])

# transform the test data
df[features_to_scale] = min_max_scaler.transform(df[features_to_scale])
df.head()

customer_ids = df["index"]
df = df.drop("index", axis=1)
df.head()

df.columns

Test dataset shape 4000, 10


Index(['age', 'income', 'avbal', 'avtrans', 'sex_F', 'sex_M',
       'mstatus_divorced', 'mstatus_married', 'mstatus_single',
       'mstatus_widowed', 'occupation_IT', 'occupation_construct',
       'occupation_education', 'occupation_finance', 'occupation_government',
       'occupation_legal', 'occupation_manuf', 'occupation_medicine',
       'occupation_retired', 'education_postgrad', 'education_professional',
       'education_secondary', 'education_tertiary', 'children_0', 'children_1',
       'children_2', 'children_3', 'children_4'],
      dtype='object')

### Load the different models

## Neural network

In [None]:
from keras.models import load_model

# the model must be in the models folder
model_to_use = "model-004-0.82.h5"
model = load_model('models/'+model_to_use)