In [15]:
import os
import logging

#Set absl logging level to suppress warnings
logging.getLogger('absl').setLevel(logging.ERROR)

# Standard Data Packages
import pandas as pd
import numpy as np

#Visualization Packages
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="darkgrid")

# tf and keras
import tensorflow as tf
from keras import models

#Scikit
from sklearn import metrics
from sklearn import preprocessing

#Other Packages
import joblib

In [2]:
test_df = pd.read_csv('test.csv')

In [3]:
test_df.shape

(110023, 13)

In [4]:
test_df.isnull().values.any()

False

In [5]:
#clean and transform string features
test_df["Surname"] = test_df["Surname"].str.strip()
test_df["Surname"] = test_df["Surname"].apply(str.lower) 
test_df["Geography"] = test_df["Geography"].str.strip()
test_df["Geography"] = test_df["Geography"].apply(str.lower) 
test_df["Gender"] = test_df["Gender"].str.strip()
test_df["Gender"] = test_df["Gender"].apply(str.lower) 

In [6]:
test_df.head()

Unnamed: 0,id,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,165034,15773898,lucchese,586,france,female,23.0,2,0.0,2,0.0,1.0,160976.75
1,165035,15782418,nott,683,france,female,46.0,2,0.0,1,1.0,0.0,72549.27
2,165036,15807120,k?,656,france,female,34.0,7,0.0,2,1.0,0.0,138882.09
3,165037,15808905,o'donnell,681,france,male,36.0,8,0.0,1,1.0,0.0,113931.57
4,165038,15607314,higgins,752,germany,male,38.0,10,121263.62,1,1.0,0.0,139431.0


In [8]:
test_set = test_df.copy(deep=True)
test_set = test_set[["CreditScore", "Geography", "Gender", "Age", "Tenure", "Balance", "NumOfProducts", "HasCrCard", "IsActiveMember", "EstimatedSalary"]]
test_set.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,586,france,female,23.0,2,0.0,2,0.0,1.0,160976.75
1,683,france,female,46.0,2,0.0,1,1.0,0.0,72549.27
2,656,france,female,34.0,7,0.0,2,1.0,0.0,138882.09
3,681,france,male,36.0,8,0.0,1,1.0,0.0,113931.57
4,752,germany,male,38.0,10,121263.62,1,1.0,0.0,139431.0


In [11]:
quantitative_columns = ["CreditScore", "Age", "Tenure", "Balance", "NumOfProducts", "EstimatedSalary"]
categorical_columns = ["Geography", "Gender", "HasCrCard", "IsActiveMember"]

#load scaler
scaler = joblib.load("scaler.joblib")

test_norm = test_set.copy(deep=True)

#transform validation data
test_norm[quantitative_columns] = scaler.transform(test_norm[quantitative_columns])

for i in categorical_columns:
    #load encoder
    encoder_name = f"{i}_encoder.joblib"
    label_encoder = joblib.load(encoder_name)

    #transform validation data
    test_norm[i] = label_encoder.transform(test_norm[i])

test_norm.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,0.472,0,0,0.067568,0.2,0.0,0.333333,0,1,0.804903
1,0.666,0,0,0.378378,0.2,0.0,0.0,1,0,0.362723
2,0.612,0,0,0.216216,0.7,0.0,0.333333,1,0,0.694419
3,0.662,0,1,0.243243,0.8,0.0,0.0,1,0,0.569654
4,0.804,1,1,0.27027,1.0,0.483318,0.0,1,0,0.697164


In [13]:
rf_un = joblib.load("RF_Best_Unbalanced.joblib")

#make predictions
predict_val = rf_un.predict(test_norm)

predict_val

array([0, 1, 0, ..., 0, 0, 0])

In [19]:
mnn_norm = models.load_model("MNN_norm.keras")

#make predictions
predict_val = mnn_norm.predict({
    "CreditScore": test_norm[["CreditScore"]],
    "Geography": test_norm[["Geography"]],
    "Gender": test_norm[["Gender"]],
    "Age": test_norm[["Age"]],
    "Tenure": test_norm[["Tenure"]],
    "Balance": test_norm[["Balance"]],
    "NumOfProducts": test_norm[["NumOfProducts"]],
    "HasCrCard": test_norm[["HasCrCard"]],
    "IsActiveMember": test_norm[["IsActiveMember"]],
    "EstimatedSalary": test_norm[["EstimatedSalary"]],
})

predict_val[predict_val <= 0.5] = 0
predict_val[predict_val > 0.5] = 1

predict_val



array([[0.],
       [1.],
       [0.],
       ...,
       [0.],
       [0.],
       [0.]], dtype=float32)