In [1]:
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import scipy.stats as stats
import time

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import OneHotEncoder, KBinsDiscretizer, StandardScaler, MinMaxScaler, FunctionTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import roc_auc_score

from imblearn.over_sampling import RandomOverSampler, SMOTE, SMOTENC
from imblearn.pipeline import Pipeline as imb_pipeline
from category_encoders.target_encoder import TargetEncoder

import lightgbm as lgb
import xgboost as xgb
import optuna
from optuna.integration import LightGBMPruningCallback

import warnings
warnings.filterwarnings('ignore')
pd.options.mode.chained_assignment = None

%matplotlib inline

  from pandas import MultiIndex, Int64Index


In [2]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras import Sequential
from tensorflow.keras.regularizers import l1, l2, l1_l2
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.utils import to_categorical

In [3]:
from helper_codes.model_wrappers import ANNWrapper

# Read Data

In [4]:
train_data = pd.read_pickle('./data/train_file_eng.pkl')

In [5]:
feature_set = ['age', 'job', 'education', 'default',
               'contact', 'month', 'day_of_week', 'duration', 'campaign', 'previous',
               'poutcome', 'quarter']

cat_features = ['job', 'education', 'default',
                'contact', 'month', 'day_of_week', 'poutcome', 'quarter']

num_features = ['age', 'duration', 'campaign', 'previous']

In [6]:
for col_name in cat_features:
    train_data[col_name] = train_data[col_name].astype('category')

# Train Test Split

In [7]:
X_train, X_test, y_train, y_test = train_test_split(train_data[feature_set], train_data.y_encoded, random_state=24, test_size=0.2)

In [8]:
X_train.reset_index(drop=True, inplace=True)
X_test.reset_index(drop=True, inplace=True)
y_train.reset_index(drop=True, inplace=True)
y_test.reset_index(drop=True, inplace=True)

In [9]:
X_train.shape, X_test.shape

((26328, 12), (6582, 12))

# Hyperparameter Tuning for ANN

In [131]:
#Random Over Sampling
sampler = RandomOverSampler(sampling_strategy='auto', random_state=24)
X_train, y_train = sampler.fit_resample(X_train, y_train)

#One Hot Encoding
X_train = pd.get_dummies(X_train, columns=cat_features, drop_first=True)
X_test = pd.get_dummies(X_test, columns=cat_features, drop_first=True)

#Min Max Scaling
sc = MinMaxScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [10]:
X_train.shape, X_test.shape

((26328, 12), (6582, 12))

In [12]:
layer_list = [(20, 1), (10, 1), (60, 1), (80, 1), (16, 1), (8, 1), (4, 1),
              (8, 4, 1), (16, 8, 1), (16, 4, 1), (32, 8, 1), (20, 10, 1),
              (60, 30, 15, 1), (32, 16, 4, 1), (20, 10, 5, 1), (64, 16, 4, 1), (16, 8, 4, 1)
             ]

In [30]:
def ann_objective(trial, X, y, layer_list):
    
    #Param Grid for Lightgbm
    param_grid = {
        "layers": trial.suggest_categorical("layers", layer_list),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.5, log=True),
        "layer_activation": trial.suggest_categorical("layer_activation", ['relu', 'sigmoid', 'tanh']),
        "kernel_init": trial.suggest_categorical("kernel_init", ['uniform', 'lecun_uniform', 'normal', 'glorot_uniform']),
        "optimizer": trial.suggest_categorical("optimizer", ['rmsprop', 'adam']),
        "batch_size": trial.suggest_categorical("batch_size", [32, 64, 128]),
        "epochs": trial.suggest_categorical("epochs", [50]),
        "dropout": trial.suggest_categorical("dropout", [0, 0.1, 0.2, 0.3, 0.4, 0.5]),
        #"l1_reg": trial.suggest_float("l1_reg", 0, 1),
        #"l2_reg": trial.suggest_float("l2_reg", 0, 1),
    }
    
    #Cross Validation with StratifiedKFold
    scores = []
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)
    for train_idx, test_idx in cv.split(X, y):
        X_train_fold, X_test_fold = X.iloc[train_idx], X.iloc[test_idx]
        y_train_fold, y_test_fold = y[train_idx], y[test_idx]
        
        #OverSampling
        sampler = RandomOverSampler(sampling_strategy='auto', random_state=24)
        X_train_fold2, y_train_fold2 = sampler.fit_resample(X_train_fold, y_train_fold)
        
        #Pipeline for One Hot Encoding & Min Max Scaling
        cat_transformer = Pipeline(steps=[('encoder', OneHotEncoder(sparse=False))])

        num_transformer = Pipeline(steps=[('scaler', MinMaxScaler())])

        preprocessor = ColumnTransformer(transformers=[
                                                        ('num_trans', num_transformer, num_features),
                                                        ('cat_trans', cat_transformer, cat_features)
                                                    ],
                                        remainder='drop')
        
        X_train_fold2 = preprocessor.fit_transform(X_train_fold2)
        X_test_fold = preprocessor.transform(X_test_fold)
        
        X_train_fold2 = pd.DataFrame(X_train_fold2)
        X_test_fold = pd.DataFrame(X_test_fold)
        y_train_fold2 = pd.Series(y_train_fold2)
        y_test_fold = pd.Series(y_test_fold)
        

        tf.keras.backend.clear_session()
        ann_model = ANNWrapper(input_shape=X_train_fold2.shape[1],
                               loss='binary_crossentropy',
                               metrics=[tf.keras.metrics.AUC()],
                               out_activation='sigmoid',
                               callbacks=[EarlyStopping(patience=10)],
                               validation_data=(X_test_fold, y_test_fold),
                               model_type='classifier',
                               return_probability=True, **param_grid)

        ann_model.fit(X_train_fold2, y_train_fold2)

        y_preds_fold = ann_model.predict(X_test_fold)[::,1]
        fold_score = roc_auc_score(y_test_fold, y_preds_fold)
        scores.append(fold_score)
        
    return np.mean(scores)

In [None]:
study = optuna.create_study(direction="maximize", study_name="ANN")
func = lambda trial: ann_objective(trial, X_train, y_train, layer_list)
study.optimize(func, n_trials=20)

In [32]:
study.trials_dataframe().sort_values('value', ascending=False).head()

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batch_size,params_dropout,params_epochs,params_kernel_init,params_layer_activation,params_layers,params_learning_rate,params_optimizer,state
12,12,0.919776,2022-04-07 20:03:39.294951,2022-04-07 20:05:08.530220,0 days 00:01:29.235269,32,0.4,50,uniform,tanh,"(16, 1)",0.010825,rmsprop,COMPLETE
19,19,0.918515,2022-04-07 20:13:45.756320,2022-04-07 20:15:22.359275,0 days 00:01:36.602955,32,0.5,50,glorot_uniform,sigmoid,"(60, 1)",0.012327,rmsprop,COMPLETE
7,7,0.917302,2022-04-07 19:56:20.246595,2022-04-07 19:58:02.954969,0 days 00:01:42.708374,32,0.5,50,glorot_uniform,relu,"(20, 1)",0.031384,adam,COMPLETE
17,17,0.916993,2022-04-07 20:10:31.068109,2022-04-07 20:11:51.583609,0 days 00:01:20.515500,32,0.3,50,glorot_uniform,relu,"(16, 8, 1)",0.017257,adam,COMPLETE
13,13,0.916469,2022-04-07 20:05:08.531371,2022-04-07 20:06:36.384383,0 days 00:01:27.853012,32,0.5,50,uniform,relu,"(16, 1)",0.010008,rmsprop,COMPLETE


# Final Test

In [33]:
#Random Over Sampling
sampler = RandomOverSampler(sampling_strategy='auto', random_state=24)
X_train, y_train = sampler.fit_resample(X_train, y_train)

#One Hot Encoding
X_train = pd.get_dummies(X_train, columns=cat_features, drop_first=True)
X_test = pd.get_dummies(X_test, columns=cat_features, drop_first=True)

#Min Max Scaling
sc = MinMaxScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [34]:
X_train.shape, X_test.shape

((46720, 40), (6582, 40))

In [36]:
ann_hyperparam_dict = {
    "layers": (16,1),
    "learning_rate": 0.010825,
    "layer_activation": "tanh",
    "kernel_init": "uniform",
    "optimizer": "rmsprop",
    "batch_size": 32,
    "epochs": 50,
    "dropout": 0.4,
}

In [37]:
tf.keras.backend.clear_session()
ann_model = ANNWrapper(input_shape=X_train.shape[1],
                       loss='binary_crossentropy',
                       metrics=[tf.keras.metrics.AUC()],
                       out_activation='sigmoid',
                       callbacks=[EarlyStopping(patience=10)],
                       validation_data=(X_test, y_test),
                       model_type='classifier',
                       return_probability=True, **ann_hyperparam_dict)

ann_model.fit(X_train, y_train)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50


In [38]:
y_preds = ann_model.predict(X_test)[::,1]
score = roc_auc_score(y_test, y_preds)
score

0.9154282202058972