In [1]:
import sys

PACKAGE_PARENT = '..'
sys.path.append(PACKAGE_PARENT)

In [2]:
import os
import json
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder, MinMaxScaler

from gaminet import GAMINet
from gaminet.utils import local_visualize
from gaminet.utils import global_visualize_density
from gaminet.utils import global_visualize_wo_density
from gaminet.utils import feature_importance_visualize
from gaminet.utils import plot_regularization
from gaminet.utils import plot_trajectory



In [3]:
task_type = "Classification"

data = pd.read_csv("./bank.csv", sep=";")
meta_info = json.load(open("./data_types.json"))
data['month'] = data['month'].replace('jan', 1).replace('feb', 2).replace('mar', 3).replace('apr', 4).\
                              replace('may', 5).replace('jun', 6).replace('jul', 7).replace('aug', 8).\
                              replace('sep', 9).replace('oct', 10).replace('nov', 11).replace('dec', 12)
x, y = data.iloc[:,:-1].values, data.iloc[:,[-1]].values
xx = np.zeros((x.shape[0], x.shape[1]), dtype=np.float32)
for i, (key, item) in enumerate(meta_info.items()):
    if item['type'] == 'target':
        enc = OrdinalEncoder()
        enc.fit(y)
        y = enc.transform(y)
        meta_info[key]['values'] = enc.categories_[0].tolist()
    elif item['type'] == 'categorical':
        enc = OrdinalEncoder()
        xx[:,[i]] = enc.fit_transform(x[:,[i]])
        meta_info[key]['values'] = []
        for item in enc.categories_[0].tolist():
            try:
                if item == int(item):
                    meta_info[key]['values'].append(str(int(item)))
                else:
                    meta_info[key]['values'].append(str(item))
            except ValueError:
                meta_info[key]['values'].append(str(item))
    else:
        sx = MinMaxScaler((0, 1))
        xx[:,[i]] = sx.fit_transform(x[:,[i]])
        meta_info[key]['scaler'] = sx
train_x, test_x, train_y, test_y = train_test_split(xx.astype(np.float32), y, test_size=0.2, random_state=0)

In [4]:
def metric_wrapper(metric, scaler):
    def wrapper(label, pred):
        return metric(label, pred, scaler=scaler)
    return wrapper

def auc(label, pred, scaler=None):
    return roc_auc_score(label, pred)

get_metric = metric_wrapper(auc, None)

In [None]:
folder = "./results/"
if not os.path.exists(folder):
    os.makedirs(folder)

model_bank = GAMINet(meta_info=meta_info, interact_num=20,
            interact_arch=[40] * 1, subnet_arch=[40] * 2, 
            batch_size=200, task_type=task_type, activation_func=tf.nn.relu, 
            main_effect_epochs=5000, interaction_epochs=5000, tuning_epochs=500, 
            lr_bp=[0.001, 0.001, 0.001], early_stop_thres=[50, 50, 50],
            heredity=True, loss_threshold=0.01, reg_clarity=0.1,
            mono_increasing_list=[11],
            mono_decreasing_list=[0], 
            lattice_size=10,
            verbose=True, val_ratio=0.2, random_state=0)
model_bank.fit(train_x, train_y, sample_weight=np.random.uniform(0, 1, size=(train_x.shape[0], 1)))
data_dict_logs = model_bank.summary_logs(save_dict=False)
plot_trajectory(data_dict_logs, folder=folder, name="bank_traj", save_png=True, save_eps=True)
plot_regularization(data_dict_logs, folder=folder, name="bank_regu", save_png=True, save_eps=True)

####################GAMI-Net training start.####################
##########Stage 1: main effect training start.##########
Main effects training epoch: 1, train loss: 0.35058, val loss: 0.34717
Main effects training epoch: 2, train loss: 0.33487, val loss: 0.33082
Main effects training epoch: 3, train loss: 0.31394, val loss: 0.30984
Main effects training epoch: 4, train loss: 0.29304, val loss: 0.28919
Main effects training epoch: 5, train loss: 0.27602, val loss: 0.27282
Main effects training epoch: 6, train loss: 0.26421, val loss: 0.26122
Main effects training epoch: 7, train loss: 0.25449, val loss: 0.25210
Main effects training epoch: 8, train loss: 0.24848, val loss: 0.24627
Main effects training epoch: 9, train loss: 0.24289, val loss: 0.24108
Main effects training epoch: 10, train loss: 0.24015, val loss: 0.23892
Main effects training epoch: 11, train loss: 0.23687, val loss: 0.23550
Main effects training epoch: 12, train loss: 0.23457, val loss: 0.23350
Main effects training e

Main effects training epoch: 114, train loss: 0.22170, val loss: 0.22422
Main effects training epoch: 115, train loss: 0.22171, val loss: 0.22431
Main effects training epoch: 116, train loss: 0.22243, val loss: 0.22509
Main effects training epoch: 117, train loss: 0.22174, val loss: 0.22409
Main effects training epoch: 118, train loss: 0.22185, val loss: 0.22421
Main effects training epoch: 119, train loss: 0.22164, val loss: 0.22419
Main effects training epoch: 120, train loss: 0.22238, val loss: 0.22487
Main effects training epoch: 121, train loss: 0.22268, val loss: 0.22513
Main effects training epoch: 122, train loss: 0.22201, val loss: 0.22465
Main effects training epoch: 123, train loss: 0.22266, val loss: 0.22552
Main effects training epoch: 124, train loss: 0.22174, val loss: 0.22440
Main effects training epoch: 125, train loss: 0.22191, val loss: 0.22458
Main effects training epoch: 126, train loss: 0.22190, val loss: 0.22453
Main effects training epoch: 127, train loss: 0.222

Main effects training epoch: 227, train loss: 0.22057, val loss: 0.22400
Main effects training epoch: 228, train loss: 0.22087, val loss: 0.22414
Main effects training epoch: 229, train loss: 0.22063, val loss: 0.22399
Main effects training epoch: 230, train loss: 0.22111, val loss: 0.22465
Main effects training epoch: 231, train loss: 0.22088, val loss: 0.22421
Early stop at epoch 231, with validation loss: 0.22421
##########Stage 1: main effect training stop.##########
##########Stage 2: interaction training start.##########
Interaction training epoch: 1, train loss: 0.22202, val loss: 0.22394
Interaction training epoch: 2, train loss: 0.22183, val loss: 0.22384
Interaction training epoch: 3, train loss: 0.22170, val loss: 0.22373
Interaction training epoch: 4, train loss: 0.22155, val loss: 0.22361
Interaction training epoch: 5, train loss: 0.22136, val loss: 0.22339
Interaction training epoch: 6, train loss: 0.22097, val loss: 0.22306
Interaction training epoch: 7, train loss: 0.22

Interaction training epoch: 109, train loss: 0.20732, val loss: 0.21323
Interaction training epoch: 110, train loss: 0.20730, val loss: 0.21333
Interaction training epoch: 111, train loss: 0.20727, val loss: 0.21317
Interaction training epoch: 112, train loss: 0.20735, val loss: 0.21299
Interaction training epoch: 113, train loss: 0.20719, val loss: 0.21305
Interaction training epoch: 114, train loss: 0.20717, val loss: 0.21314
Interaction training epoch: 115, train loss: 0.20721, val loss: 0.21308
Interaction training epoch: 116, train loss: 0.20726, val loss: 0.21329
Interaction training epoch: 117, train loss: 0.20728, val loss: 0.21302
Interaction training epoch: 118, train loss: 0.20717, val loss: 0.21300
Interaction training epoch: 119, train loss: 0.20734, val loss: 0.21344
Interaction training epoch: 120, train loss: 0.20724, val loss: 0.21310
Interaction training epoch: 121, train loss: 0.20717, val loss: 0.21293
Interaction training epoch: 122, train loss: 0.20717, val loss: 

Global Interpretation

In [None]:
data_dict_global = model_bank.global_explain(save_dict=True, folder=folder, name="bank_global")
global_visualize_density(data_dict_global, folder=folder, name="bank_global",
                         main_effect_num=8, interaction_num=4, cols_per_row=4, save_png=True, save_eps=True)

Feature Importance

In [None]:
feature_importance_visualize(data_dict_global)

Interpret the prediction of a test sample

In [None]:
data_dict_local = model_bank.local_explain(test_x[[0]], test_y[[0]], save_dict=False)
local_visualize(data_dict_local[0], save_png=False)

## Model save and load 

In [None]:
model_bank.save(folder="./", name="model_saved")

In [None]:
## The reloaded model should not be refit again
modelnew = GAMINet(meta_info={})
modelnew.load(folder="./", name="model_saved")

In [None]:
pred_train = modelnew.predict(train_x)
pred_test = modelnew.predict(test_x)
gaminet_stat = np.hstack([np.round(get_metric(train_y, pred_train),5), 
                      np.round(get_metric(test_y, pred_test),5)])
print(gaminet_stat)