### * \[important] This notebook is **only for Colab-execution**, please use colaboratory to test following codes.
### * \[important] Change runtime type to GPU first & execute following cells
### * Official Github repository & documents @ https://github.com/keras-team/keras-tuner
### * Keras-tuner Basic tutorial (TF official document) @ https://www.tensorflow.org/tutorials/keras/keras_tuner


<hr>

<br>

## 1. Install Keras-Tuner

In [1]:
!pip install keras-tuner==1.0.2

Collecting keras-tuner==1.0.2
  Downloading keras-tuner-1.0.2.tar.gz (62 kB)
[?25l[K     |█████▏                          | 10 kB 31.4 MB/s eta 0:00:01[K     |██████████▍                     | 20 kB 36.4 MB/s eta 0:00:01[K     |███████████████▋                | 30 kB 21.2 MB/s eta 0:00:01[K     |████████████████████▉           | 40 kB 17.4 MB/s eta 0:00:01[K     |██████████████████████████      | 51 kB 9.3 MB/s eta 0:00:01[K     |███████████████████████████████▎| 61 kB 9.2 MB/s eta 0:00:01[K     |████████████████████████████████| 62 kB 1.6 MB/s 
Collecting terminaltables
  Downloading terminaltables-3.1.0.tar.gz (12 kB)
Collecting colorama
  Downloading colorama-0.4.4-py2.py3-none-any.whl (16 kB)
Building wheels for collected packages: keras-tuner, terminaltables
  Building wheel for keras-tuner (setup.py) ... [?25l[?25hdone
  Created wheel for keras-tuner: filename=keras_tuner-1.0.2-py3-none-any.whl size=78935 sha256=dedde08a7940edc84efa25d10bdd4aef5297f0bbb0201daf457

In [2]:
import tensorflow as tf
import kerastuner as kt

print(tf.__version__)
print(kt.__version__)

2.5.0
1.0.2


In [2]:
# from tensorflow.keras import datasets, Sequential, utils
# from tensorflow.keras.layers import Flatten, Conv2D, Dense, Dropout
# from tensorflow.keras.optimizers import Adam

<br>

## 3. Bayesian HPO with Keras-tuner

In [3]:
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

from tensorflow.keras import datasets, Sequential, utils, layers, models, optimizers, losses
# from tensorflow.keras.layers import Flatten, Conv2D, Dense, Dropout
# from tensorflow.keras.optimizers import Adam

import kerastuner as kt
import numpy as np
import IPython

import copy

import warnings
warnings.filterwarnings(action='ignore') 

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 시각화
import matplotlib as mpl
import matplotlib.pyplot as plt 
import matplotlib.dates as mdates
import seaborn as sns
%matplotlib inline

# 결측치
import missingno as msno

import sklearn
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn import model_selection, linear_model
from sklearn.metrics import auc

# 파이프라인
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Import libraries for resampling
import sklearn.neighbors._base

In [5]:
data_df = pd.read_csv('filename.csv', index_col=0)
data_df.head(3)

Unnamed: 0,Attrition_Flag,Customer_Age,Gender,Dependent_count,Education_Level,Marital_Status,Income_Category,Card_Category,Months_on_book,Total_Relationship_Count,Months_Inactive_12_mon,Contacts_Count_12_mon,Credit_Limit,Total_Revolving_Bal,Avg_Open_To_Buy,Total_Amt_Chng_Q4_Q1,Total_Trans_Amt,Total_Trans_Ct,Total_Ct_Chng_Q4_Q1,Avg_Utilization_Ratio
0,1,45,1,3,3,1,2,0,39,5,1,3,12691.0,777,11914.0,1.335,1144,42,1.625,0.061
1,1,49,0,5,2,2,4,0,44,6,1,2,8256.0,864,7392.0,1.541,1291,33,3.714,0.105
2,1,51,1,3,2,1,3,0,36,4,1,0,3418.0,0,3418.0,2.594,1887,20,2.333,0.0


In [6]:
x_data = data_df.drop(['Attrition_Flag'], axis=1)
y_data = data_df['Attrition_Flag']

In [7]:
x_train, x_test, y_train, y_test =\
model_selection.train_test_split(x_data, y_data, test_size=0.3, random_state=0)

In [8]:
categorical_features = ['Card_Category', 'Education_Level', 'Gender', 'Income_Category', 'Marital_Status']
categorical_transformer = OneHotEncoder(categories='auto') # categories='auto' : just for ignoring warning messages

temp = list(data_df[data_df.columns.difference(categorical_features)].columns)
temp.remove('Attrition_Flag')

numeric_features = temp
numeric_transformer = StandardScaler()


preprocessor = ColumnTransformer(
    transformers=[ # List of (name, transformer, column(s))
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])


preprocessor_pipe = Pipeline(steps=[('preprocessor', preprocessor)])

preprocessor_pipe.fit(x_train)

x_train_transformed = preprocessor_pipe.transform(x_train)
x_test_transformed = preprocessor_pipe.transform(x_test)

pd.DataFrame(x_train_transformed)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33
0,-0.748406,1.867430,-0.396875,-0.643042,-0.031516,0.503334,0.661574,0.382646,-0.072588,-0.248166,-0.528565,1.176519,-0.226807,0.065913,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
1,-0.465262,-0.136514,1.409288,-0.483890,0.465118,-1.042699,-1.355504,0.506545,-0.529711,-0.227189,1.413773,-0.203125,-0.602199,-0.686286,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
2,-0.645385,-1.004532,-0.396875,-0.773617,-0.528149,0.503334,0.661574,-0.732443,-0.309941,-0.877476,0.766327,-1.419165,-0.445185,-0.937019,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
3,0.661164,-0.843788,-0.396875,0.603356,-0.900624,-0.269682,1.670113,-0.236848,-0.274777,-0.206212,1.413773,-0.647541,0.097591,0.776322,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
4,-0.340494,-1.004532,0.506206,-0.468502,1.085910,-1.042699,1.670113,1.249938,0.498816,-0.420177,1.413773,-1.419165,-0.598166,-1.145963,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4951,1.505764,-0.697332,-0.396875,1.598057,-0.900624,-1.042699,-0.346965,-0.856342,-0.024239,-0.105522,1.413773,1.012916,-0.539970,-1.354907,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
4952,-0.256693,-0.450858,0.506206,-0.283961,-0.155674,-0.269682,-0.346965,-1.351937,-1.290118,0.053903,-0.528565,-0.300799,-0.787158,-1.396696,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
4953,-0.630997,0.642202,-0.396875,-0.609849,-1.645575,-1.042699,0.661574,0.010950,-0.081379,-1.066269,0.118881,0.240071,-0.683154,-0.686286,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
4954,1.285004,-1.004532,0.506206,1.158190,0.713435,0.503334,-1.355504,1.373837,-1.527470,-1.666211,1.413773,-1.419165,-0.766991,-0.937019,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [9]:
x_train = x_train_transformed.astype('float32')
x_test = x_test_transformed.astype('float32') 

In [61]:
x_train_transformed.shape

(4956, 34)

In [10]:
# 2) Build the hyper-model
# Available HyperParameter search spaces (https://j.mp/2IXPzh7) : Int, Float, Boolean, Choice, Fixed

def build_hyper_model(hp):
    
    model = models.Sequential()
    # model.add(layers.Dense(input_dim=34, units=64, activation='relu'))
        
    # Tune the number of hidden layer (Choose an optimal value between 1~3)
    for i in range(hp.Int('num_layers', min_value=1, max_value=3)): 
        # Tune the number of perceptrons in a dense layer (Choose an optimal value between 32~512) 
        hp_units = hp.Int('units_' + str(i), min_value=32, max_value=512, step=32) # 32:512 & step 32, all parameter names should be unique (we name the inner parameters 'units_' + str(i))
        hp_activations = hp.Choice('activation_' + str(i), values=['relu', 'elu'])
        model.add(layers.Dense(units = hp_units, activation = hp_activations))

    model.add(layers.Dense(10, activation='softmax')) # class 10 : 0~9

    # Tune the learning rate for the optimizer (Choose an optimal value from 0.01, 0.001, or 0.0001)
    hp_learning_rate = hp.Choice('learning_rate', values = [1e-2, 1e-3, 1e-4]) 
    
    model.compile(optimizer = optimizers.Adam(learning_rate = hp_learning_rate),
                loss = losses.SparseCategoricalCrossentropy(), # use sparse c.c when our labels are looks like "1" (single integer), not "[1,0,0]" (one-hot vector) (@ http://j.mp/2XS0jmv)
                metrics = ['accuracy'])
    
    return model

In [11]:
# 3) Select tuner and compile it
# Available tuners (https://j.mp/39cWz4n) : kt.BayesianOptimization / kt.Hyperband / kt.RandomSearch / kt.Sklearn (https://j.mp/3nSJn8O)

tuner = kt.BayesianOptimization(build_hyper_model,
                                objective = 'val_accuracy', # Hyper-params tuning을 위한 목적함수 설정 (metric to minimize or maximize)
                                max_trials = 10, # 서로 다른 Hyper-params 조합으로 시도할 총 Trial 횟수 설정
                                directory = 'test_prac_dir', # Path to the working directory
                                project_name = 'MNIST_hyper_1') # Name to use as directory name for files saved by this Tuner

# tuner = kt.Hyperband(build_hyper_model,
#                      objective = 'val_accuracy', # Hyper-params tuning을 위한 목적함수 설정 (metric to minimize or maximize)
#                      max_epochs = 5, # 최대 epoch 수 설정, epoch 수 자체도 지정한 최대 횟수 내에서 변화시켜가며 테스트를 진행함 (epochs to train one model) 
#                      directory = 'test_prac_dir', # Path to the working directory
#                      project_name = 'MNIST_hyper_1') # Name to use as directory name for files saved by this Tuner

tuner.search_space_summary()

Search space summary
Default search space size: 4
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 3, 'step': 1, 'sampling': None}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
activation_0 (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'elu'], 'ordered': False}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}


In [12]:
# 4) Train the model

tuner.search(x_train, y_train, epochs=10, validation_data = (x_test, y_test)) # epochs == learning epoch for training a single model(epoch for each trial) 


# # 아래와 같이 별도의 클래스로 콜백을 정의하여 search 함수에서 활용하면 모든 학습 단계 종료 후 학습 중 발생한 출력 결과를 자동으로 지워낼 수 있습니다.
# class ClearTrainingOutput(tf.keras.callbacks.Callback):
#   def on_train_end(*args, **kwargs):
#     IPython.display.clear_output(wait = True)

# tuner.search(x_train, y_train, epochs = 7, validation_data = (x_test, y_test), callbacks = [ClearTrainingOutput()]) # epochs == learning epoch for training a single model 

Trial 10 Complete [00h 00m 05s]
val_accuracy: 0.9317647218704224

Best val_accuracy So Far: 0.9350588321685791
Total elapsed time: 00h 01m 05s


In [13]:
# 5) Check the result 

tuner.results_summary(num_trials=3) # Show "n" best trial results

Results summary
Results in test_prac_dir/MNIST_hyper_1
Showing 3 best trials
Objective(name='val_accuracy', direction='max')
Trial summary
Hyperparameters:
num_layers: 1
units_0: 32
activation_0: relu
learning_rate: 0.01
units_1: 32
activation_1: elu
units_2: 512
activation_2: elu
Score: 0.9350588321685791
Trial summary
Hyperparameters:
num_layers: 1
units_0: 512
activation_0: relu
learning_rate: 0.001
units_1: 320
activation_1: elu
units_2: 416
activation_2: elu
Score: 0.9345882534980774
Trial summary
Hyperparameters:
num_layers: 1
units_0: 512
activation_0: relu
learning_rate: 0.01
units_1: 512
activation_1: elu
units_2: 512
activation_2: relu
Score: 0.9336470365524292


In [14]:
# Check top-3 trials' hyper-params

top3_models = tuner.get_best_hyperparameters(num_trials=3)
# print(tuner.get_best_hyperparameters(num_trials=3)[0].space) # 특정 Trial의 Search-space 를 확인할 수 있음
# print(tuner.get_best_hyperparameters(num_trials=3)[0].values) # 특정 Trial에 적용된 Hyper-params를 확인할 수 있음

for idx, model in enumerate(top3_models):
    print('Model performance rank :', idx)
    print(model.values)
    print()


# Check the best trial's hyper-params

best_hps = top3_models[0]

print("""
The hyperparameter search is complete. 
* Optimal # of layers : {}
* Optimal value of the learning-rate : {}""".format(best_hps.get('num_layers'), best_hps.get('learning_rate')))

for layer_num in range(best_hps.get('num_layers')):
    print('Layer {} - # of Perceptrons :'.format(layer_num), best_hps.get('units_' + str(layer_num)))
    print('Layer {} - Applied activation function :'.format(layer_num), best_hps.get('activation_' + str(layer_num)))

Model performance rank : 0
{'num_layers': 1, 'units_0': 32, 'activation_0': 'relu', 'learning_rate': 0.01, 'units_1': 32, 'activation_1': 'elu', 'units_2': 512, 'activation_2': 'elu'}

Model performance rank : 1
{'num_layers': 1, 'units_0': 512, 'activation_0': 'relu', 'learning_rate': 0.001, 'units_1': 320, 'activation_1': 'elu', 'units_2': 416, 'activation_2': 'elu'}

Model performance rank : 2
{'num_layers': 1, 'units_0': 512, 'activation_0': 'relu', 'learning_rate': 0.01, 'units_1': 512, 'activation_1': 'elu', 'units_2': 512, 'activation_2': 'relu'}


The hyperparameter search is complete. 
* Optimal # of layers : 1
* Optimal value of the learning-rate : 0.01
Layer 0 - # of Perceptrons : 32
Layer 0 - Applied activation function : relu


In [18]:
print(best_models[0])

<tensorflow.python.keras.engine.sequential.Sequential object at 0x7f7510280c90>


In [16]:
# We can retrain the model with the optimal hyperparameters from the search.
best_hps = top3_models[0]

# Build the model with the optimal hyperparameters and train it on the data.
model = tuner.hypermodel.build(best_hps)
model.fit(x_train_transformed, y_train, epochs=10, validation_data=(x_test_transformed, y_test))

results = model.evaluate(x_test, y_test)
print('Cross-entropy :', results[0])
print('Accuracy :', results[1])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Cross-entropy : 0.17171938717365265
Accuracy : 0.929411768913269


In [24]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                1120      
_________________________________________________________________
dense_1 (Dense)              (None, 10)                330       
Total params: 1,450
Trainable params: 1,450
Non-trainable params: 0
_________________________________________________________________


In [17]:
# We can also find detailed logs, checkpoints, etc, in the folder "directory/project_name".

# The [test_prac_dir/MNIST_hyper_1] directory contains detailed logs and checkpoints for every trial (model configuration) run during the hyperparameter search. 
# If you re-run the hyperparameter search, the Keras Tuner uses the existing state from these logs to resume the search. 
# To disable this behavior, pass an additional [overwrite = True] argument while instantiating the tuner.

for trial in tuner.oracle.get_best_trials(num_trials=3):
    print('Trial-score is :', trial.score)
    print('Trial-directory(trial_id) is :', trial.trial_id)
    print()

# tuner.oracle.trials -> get all trial_id 

Trial-score is : 0.9350588321685791
Trial-directory(trial_id) is : c4d62457511638f3107db517044a3cf5

Trial-score is : 0.9345882534980774
Trial-directory(trial_id) is : 986a0a7674e5f3a01883604341d7431e

Trial-score is : 0.9336470365524292
Trial-directory(trial_id) is : 0217c25a4f2de0f6606b11cb844759d9

