## Generate predictions on the test data set

In [54]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
import itertools
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support, accuracy_score
%matplotlib inline

In [55]:
# the model must be in the models folder
model_to_use = "model-004-0.82.h5"

In [56]:
from keras.models import load_model
model = load_model('models/'+model_to_use)

In [57]:
df = pd.read_csv("original_data/custdatabase.csv")
df_copy = pd.read_csv("original_data/custdatabase.csv")
print('Test dataset shape {0}, {1}'.format(df.shape[0], df.shape[1]))

Test dataset shape 4000, 10


In [58]:
# strip the spaces in the header, if present
df = df.rename(columns=lambda x: x.strip())
df.columns.values

array(['index', 'sex', 'mstatus', 'age', 'children', 'occupation',
       'education', 'income', 'avbal', 'avtrans'], dtype=object)

In [59]:
categorical_features = ["sex", "mstatus", "occupation", "education", "children"]
df[categorical_features].head()
df["children"] = df["children"].astype(str)
label_encoders = {}
label_mappings = {}
for categorical_feature in categorical_features:
    label_encoders[categorical_feature] = preprocessing.LabelEncoder()
    df[categorical_feature + "Num"] = label_encoders[categorical_feature].fit_transform(df[categorical_feature])
    label_mappings[categorical_feature] = label_encoders[categorical_feature].classes_

label_encoders = {}
label_mappings = {}
for categorical_feature in categorical_features:
    label_encoders[categorical_feature] = preprocessing.LabelEncoder()
    df[categorical_feature + "Num"] = label_encoders[categorical_feature].fit_transform(df[categorical_feature])
    label_mappings[categorical_feature] = label_encoders[categorical_feature].classes_

for categorical_feature in categorical_features:
    for class_value in label_mappings[categorical_feature]:
        df[categorical_feature + "_" + (class_value)] = df[categorical_feature] == np.array([(class_value)] * df.shape[0])
        df[categorical_feature + "_" + (class_value)] = df[categorical_feature + "_" + (class_value)].astype(int)

# drop the categorical values
df = df.drop(categorical_features, axis=1)

df.head()

Unnamed: 0,index,age,income,avbal,avtrans,sexNum,mstatusNum,occupationNum,educationNum,childrenNum,...,occupation_retired,education_postgrad,education_professional,education_secondary,education_tertiary,children_0,children_1,children_2,children_3,children_4
0,1001,44.27,10245.73,53183.04,4069.67,1,0,5,0,2,...,0,1,0,0,0,0,0,1,0,0
1,1002,61.9,1942.57,18100.78,1522.42,1,1,5,2,1,...,0,0,0,1,0,0,1,0,0,0
2,1003,37.3,9896.16,24496.82,2723.35,1,0,0,3,1,...,0,0,0,0,1,0,1,0,0,0
3,1004,25.02,10136.44,26690.01,4143.04,0,0,5,0,1,...,0,1,0,0,0,0,1,0,0,0
4,1005,48.37,2521.11,13439.81,240.26,0,0,4,2,2,...,0,0,0,1,0,0,0,1,0,0


### Remove the "Num" columns only if required

In [60]:
# remove num columns for Neural networks
shouldRemoveNumColumns = True

In [61]:
if shouldRemoveNumColumns:
    num_columns = ["sexNum", "mstatusNum", "occupationNum", "educationNum", "childrenNum"]
    df = df.drop(num_columns, axis=1)
else:
    print("Not dropping the num columns")
df.head()

Unnamed: 0,index,age,income,avbal,avtrans,sex_F,sex_M,mstatus_divorced,mstatus_married,mstatus_single,...,occupation_retired,education_postgrad,education_professional,education_secondary,education_tertiary,children_0,children_1,children_2,children_3,children_4
0,1001,44.27,10245.73,53183.04,4069.67,0,1,1,0,0,...,0,1,0,0,0,0,0,1,0,0
1,1002,61.9,1942.57,18100.78,1522.42,0,1,0,1,0,...,0,0,0,1,0,0,1,0,0,0
2,1003,37.3,9896.16,24496.82,2723.35,0,1,1,0,0,...,0,0,0,0,1,0,1,0,0,0
3,1004,25.02,10136.44,26690.01,4143.04,1,0,1,0,0,...,0,1,0,0,0,0,1,0,0,0
4,1005,48.37,2521.11,13439.81,240.26,1,0,1,0,0,...,0,0,0,1,0,0,0,1,0,0


## Get the normalizers based on the train data

In [62]:
train_df = pd.read_csv("working_data/trial_promo_training_original.csv")
features_to_scale = ["age", "income", "avbal", "avtrans"]
min_max_scaler = preprocessing.MinMaxScaler()

# fit on the train data
train_df[features_to_scale] = min_max_scaler.fit(train_df[features_to_scale])

# transform the test data
df[features_to_scale] = min_max_scaler.transform(df[features_to_scale])
df.head()

Unnamed: 0,index,age,income,avbal,avtrans,sex_F,sex_M,mstatus_divorced,mstatus_married,mstatus_single,...,occupation_retired,education_postgrad,education_professional,education_secondary,education_tertiary,children_0,children_1,children_2,children_3,children_4
0,1001,0.378355,0.510499,0.711262,0.527639,0,1,1,0,0,...,0,1,0,0,0,0,0,1,0,0
1,1002,0.632756,0.081311,0.234776,0.193628,0,1,0,1,0,...,0,0,0,1,0,0,1,0,0,0
2,1003,0.277778,0.49243,0.321646,0.351101,0,1,1,0,0,...,0,0,0,0,1,0,1,0,0,0
3,1004,0.100577,0.50485,0.351434,0.537259,1,0,1,0,0,...,0,1,0,0,0,0,1,0,0,0
4,1005,0.437518,0.111215,0.17147,0.025504,1,0,1,0,0,...,0,0,0,1,0,0,0,1,0,0


In [63]:
customer_ids = df["index"]
df = df.drop("index", axis=1)
df.head()

Unnamed: 0,age,income,avbal,avtrans,sex_F,sex_M,mstatus_divorced,mstatus_married,mstatus_single,mstatus_widowed,...,occupation_retired,education_postgrad,education_professional,education_secondary,education_tertiary,children_0,children_1,children_2,children_3,children_4
0,0.378355,0.510499,0.711262,0.527639,0,1,1,0,0,0,...,0,1,0,0,0,0,0,1,0,0
1,0.632756,0.081311,0.234776,0.193628,0,1,0,1,0,0,...,0,0,0,1,0,0,1,0,0,0
2,0.277778,0.49243,0.321646,0.351101,0,1,1,0,0,0,...,0,0,0,0,1,0,1,0,0,0
3,0.100577,0.50485,0.351434,0.537259,1,0,1,0,0,0,...,0,1,0,0,0,0,1,0,0,0
4,0.437518,0.111215,0.17147,0.025504,1,0,1,0,0,0,...,0,0,0,1,0,0,0,1,0,0


## Perform the predictions using the model

In [69]:
model_predictions = model.predict(df)
print("Num predictions: {0}".format(len(model_predictions)))
model_predictions = np.argmax(model_predictions, axis=1)
df_copy["status"] = model_predictions
df_copy["status"] = df_copy["status"].astype(int)
df_copy["status"].head()

Num predictions: 4000


0    2
1    2
2    2
3    1
4    2
Name: status, dtype: int32

In [71]:
df_copy["status"] = df_copy["status"].replace(0, "A").replace(1, "B").replace(2, "None")
df_copy.head()

Unnamed: 0,index,sex,mstatus,age,children,occupation,education,income,avbal,avtrans,status
0,1001,M,divorced,44.27,2,legal,postgrad,10245.73,53183.04,4069.67,
1,1002,M,married,61.9,1,legal,secondary,1942.57,18100.78,1522.42,
2,1003,M,divorced,37.3,1,IT,tertiary,9896.16,24496.82,2723.35,
3,1004,F,divorced,25.02,1,legal,postgrad,10136.44,26690.01,4143.04,B
4,1005,F,divorced,48.37,2,government,secondary,2521.11,13439.81,240.26,


In [72]:
df_copy.to_csv("working_data/all_customer_predictions.csv", index=False)