In [1]:

import json
import shutil
import os
import pickle
from callback import MultipleClassAUROC, MultiGPUModelCheckpoint
from configparser import ConfigParser
from generator import AugmentedImageSequence
from keras.callbacks import ModelCheckpoint, TensorBoard, ReduceLROnPlateau
from tensorflow.keras.optimizers import SGD
from keras.utils import multi_gpu_model
from utility import get_sample_counts
from weights import get_class_weights
from augmenter import augmenter
from keras import backend as K
import tensorflow as tf
import pandas as pd
import numpy as np
import time
import tensorflow.keras.initializers
import statistics
import tensorflow.keras
from sklearn import metrics
from sklearn.metrics import roc_auc_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, InputLayer, Flatten, Input, GaussianNoise
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from keras_radam import RAdam

%load_ext tensorboard

from datetime import datetime
from packaging import version

import tensorflow as tf
from tensorflow import keras

print("TensorFlow version: ", tf.__version__)
assert version.parse(tf.__version__).release[0] >= 2, \
    "This notebook requires TensorFlow 2.0 or above."


Using TensorFlow backend.


TensorFlow version:  2.1.0


In [2]:
import tensorboard
tensorboard.__version__

'2.1.1'

In [3]:
config_file = "./config.ini"
cp = ConfigParser()
cp.read(config_file)

    # default config
output_dir = cp["DEFAULT"].get("output_dir")
image_source_dir = cp["DEFAULT"].get("image_source_dir")
base_model_name = cp["DEFAULT"].get("base_model_name")
class_names = cp["DEFAULT"].get("class_names").split(",")

    # train config
use_base_model_weights = cp["TRAIN"].getboolean("use_base_model_weights")
use_trained_model_weights = cp["TRAIN"].getboolean("use_trained_model_weights")
use_best_weights = cp["TRAIN"].getboolean("use_best_weights")
output_weights_name = cp["TRAIN"].get("output_weights_name")
epochs = cp["TRAIN"].getint("epochs")
batch_size = cp["TRAIN"].getint("batch_size")
initial_learning_rate = cp["TRAIN"].getfloat("initial_learning_rate")
generator_workers = cp["TRAIN"].getint("generator_workers")
image_dimension = cp["TRAIN"].getint("image_dimension")
train_steps = cp["TRAIN"].get("train_steps")
patience_reduce_lr = cp["TRAIN"].getint("patience_reduce_lr")
min_lr = cp["TRAIN"].getfloat("min_lr")
validation_steps = cp["TRAIN"].get("validation_steps")
positive_weights_multiply = cp["TRAIN"].getfloat("positive_weights_multiply")
dataset_csv_dir = cp["TRAIN"].get("dataset_csv_dir")

In [4]:
def focal_loss(gamma=1.0, alpha=0.5):
    gamma = float(gamma)
    alpha = float(alpha)
    def focal_loss_fixed(y_true, y_pred):
        epsilon = K.epsilon()
        y_pred = K.clip(y_pred, epsilon, 1.0-epsilon)
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))
    return focal_loss_fixed

In [5]:
def construct_network(dropout, neuronPct, neuronShrink,noisePct):
    # We start with some percent of 5000 starting neurons on the first hidden layer.
    neuronCount = int(neuronPct * 5000)
    # Construct neural network
    neuronCount = neuronCount * neuronShrink
    model = Sequential()
    model.add(Input(shape=(1,1536)))
    model.add(Flatten())
    model.add(Dense(neuronCount))
    model.add(GaussianNoise(noisePct))
    model.add(Activation('relu'))
    model.add(Dropout(dropout))
    model.add(Dense(14, activation='sigmoid')) # Output
    return model

In [6]:
# compute steps
train_counts, train_pos_counts = get_sample_counts(output_dir, "train", class_names)
dev_counts, _ = get_sample_counts(output_dir, "dev", class_names)
    
if train_steps == "auto":
    train_steps = int(train_counts / batch_size)
else:
    try:
        train_steps = int(train_steps)
    except ValueError:
        raise ValueError(f"""train_steps: {train_steps} is invalid,please use 'auto' or integer.""")
    print(f"** train_steps: {train_steps} **")

if validation_steps == "auto":
    validation_steps = int(dev_counts / batch_size)
else:
    try:
        validation_steps = int(validation_steps)
    except ValueError:
        raise ValueError(f"""validation_steps: {validation_steps} is invalid,please use 'auto' or integer.""")
        print(f"** validation_steps: {validation_steps} **")

        # compute class weights
print("** compute class weights from training data **")
class_weights = get_class_weights(train_counts,train_pos_counts,multiply=positive_weights_multiply,)
print("** class_weights **")
print(class_weights)

** compute class weights from training data **
** class_weights **
[{0: 0.976060692178489, 1: 0.023939307821511}, {0: 0.9379028967906056, 1: 0.06209710320939444}, {0: 0.977507900874183, 1: 0.02249209912581691}, {0: 0.9801862148908839, 1: 0.01981378510911613}, {0: 0.9642020357560434, 1: 0.03579796424395662}, {0: 0.9663727015263743, 1: 0.033627298473625666}, {0: 0.8859702012473223, 1: 0.11402979875267771}, {0: 0.9586866934982315, 1: 0.04131330650176841}, {0: 0.9623146440112557, 1: 0.03768535598874437}, {0: 0.9298929992036218, 1: 0.07010700079637826}, {0: 0.9335352709009039, 1: 0.06646472909909606}, {0: 0.9021976306069932, 1: 0.09780236939300682}, {0: 0.9453965277787032, 1: 0.05460347222129675}, {1: 0.720226409263611, 0: 0.27977359073638897}]


In [7]:
test_steps = cp["TEST"].get("test_steps")
test_counts, _ = get_sample_counts(output_dir, "test", class_names)

if test_steps == "auto":
    test_steps = int(test_counts / batch_size)
else:
    try:
        test_steps = int(test_steps)
    except ValueError:
        raise ValueError(f"""test_steps: {test_steps} is invalid,please use 'auto' or integer.""")
        
print(f"** test_steps: {test_steps} **")

** test_steps: 21 **


In [8]:
train_sequence = AugmentedImageSequence(
            dataset_csv_file=os.path.join(output_dir, "train.csv"),
            class_names=class_names,
            source_image_dir=image_source_dir,
            batch_size=batch_size,
            target_size=(image_dimension, image_dimension),
            augmenter=augmenter,
            steps=train_steps,
        )
validation_sequence = AugmentedImageSequence(
            dataset_csv_file=os.path.join(output_dir, "dev.csv"),
            class_names=class_names,
            source_image_dir=image_source_dir,
            batch_size=batch_size,
            target_size=(image_dimension, image_dimension),
            augmenter=augmenter,
            steps=validation_steps,
            shuffle_on_epoch_end=False,
        )

test_sequence = AugmentedImageSequence(
        dataset_csv_file=os.path.join(output_dir, "test.csv"),
        class_names=class_names,
        source_image_dir=image_source_dir,
        batch_size=batch_size,
        target_size=(image_dimension, image_dimension),
        augmenter=None,
        steps=test_steps,
        shuffle_on_epoch_end=False,
    )

In [9]:

def optimize_network(dropout,lr,neuronPct,neuronShrink,alpha,gamma,noisePct):
    # Define the Keras TensorBoard callback.
    logdir="logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")


    #lookahead = Lookahead(k=5, alpha=0.5) # Initialize Lookahead
    #lookahead.inject(model) # add into model
    output_weights_path = os.path.join(output_dir,  str(dropout)+"_"+str(lr)+"_"+"_"+str(neuronPct)+"_"+str(neuronShrink)+"_"+str(noisePct)+"_"+output_weights_name)
    print(f"** set output weights path to: {output_weights_path} **")
    checkpoint = ModelCheckpoint(
                 output_weights_path,
                 save_weights_only=True,
                 save_best_only=True,
                 verbose=1,
            )
    start_time = time.time()
    model = construct_network(dropout, neuronPct, neuronShrink,noisePct)
    
    #model.compile(loss=focal_loss(gamma=gamma,alpha=alpha), optimizer=SGD(lr=lr))
    optimizer = SGD(lr=initial_learning_rate)
    model.compile(optimizer=optimizer,loss=focal_loss(gamma=gamma,alpha=alpha))
    #lookahead = Lookahead(k=5, alpha=0.5) # Initialize Lookahead
    #lookahead.inject(model) # add into model
    callbacks = [
            checkpoint,
            keras.callbacks.TensorBoard(log_dir=logdir),
            #TensorBoard(log_dir=os.path.join(output_dir, "logs"), batch_size=batch_size),
            ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=patience_reduce_lr,
                              verbose=1, mode="min", min_lr=min_lr), 
            EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto', restore_best_weights=True)
        ]
    
    model.summary()
    
    history = model.fit_generator(
            generator=train_sequence,
            steps_per_epoch=train_steps,
            epochs=epochs,

            validation_data=validation_sequence,
            validation_steps=validation_steps,
            callbacks=callbacks,
            class_weight=class_weights,
            workers=generator_workers,
            shuffle=False,
        )
    y_hat = model.predict_generator(test_sequence, verbose=1)
    y = test_sequence.get_y_true()
    
    test_log_path = os.path.join(output_dir, str(dropout)+"_"+str(lr)+"_"+"_"+str(neuronPct)+"_"+str(neuronShrink)+"_"+str(noisePct)+"_"+"test.log")
    print(f"** write log to {test_log_path} **")
    aurocs = []
    
    with open(test_log_path, "w") as f:
        for i in range(len(class_names)):
            try:
                score = roc_auc_score(y[:, i], y_hat[:, i])
                aurocs.append(score)
            except ValueError:
                score = 0
            f.write(f"{class_names[i]}: {score}\n")
        mean_auroc = float(np.mean(aurocs))
        f.write("-------------------------\n")
        f.write(f"mean auroc: {mean_auroc}\n")
        print(f"mean auroc: {mean_auroc}")
    

    print("iteration|auroc|alpha|dropout|gamma|learning_rate|neuronPct|neuronShrink|noisePct")
    tensorflow.keras.backend.clear_session()
    time_took = time.time() - start_time
    return mean_auroc

        
    
    model.summary()
    


In [10]:
from new_bayes_opt.bayesian_optimization import BayesianOptimization
import time

# Supress NaN warnings
import warnings
warnings.filterwarnings("ignore",category=RuntimeWarning)

# Bounded region of parameter space
pbounds = { 'gamma':(1.0, 4.0),
            'alpha':(0.5, 2.0),
            'dropout': (0.05, 0.2),
           'lr': (0.009, 0.015),
           'neuronPct': (0.1900 , 0.1950),
           'neuronShrink': (0.350, 0.360),
           'noisePct':(0.1,0.4)
          }

#print(bounds.values())


In [11]:
optimizer = BayesianOptimization(
    f=optimize_network,
    pbounds=pbounds,
    verbose=2,  # verbose = 1 prints only when a maximum 
    # is observed, verbose = 0 is silent
    random_state=1,
)

Kunci ['alpha', 'dropout', 'gamma', 'lr', 'neuronPct', 'neuronShrink', 'noisePct']
Bound  [[0.5   2.   ]
 [0.05  0.2  ]
 [1.    4.   ]
 [0.009 0.015]
 [0.19  0.195]
 [0.35  0.36 ]
 [0.1   0.4  ]]


In [12]:
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events
logger = JSONLogger(path="./noise12_1.json")
optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)

In [13]:
def convert_string(sec_elapsed):
    h = int(sec_elapsed / (60 * 60))
    m = int((sec_elapsed % (60 * 60)) / 60)
    s = sec_elapsed % 60
    return "{}:{:>02}:{:>05.2f}".format(h, m, s)

In [14]:
start_time = time.time()
optimizer.maximize(init_points=5,acq="weightedei", n_iter=2, omega=0.9)
time_took = time.time() - start_time

Isine x [1.12553301 0.15804867 1.00034312 0.010814   0.19073378 0.35092339
 0.15587806]
Konten (1.125533007053861, 0.15804867401632372, 1.0003431244520347, 0.010813995435791039, 0.19073377945408557, 0.35092338594768796, 0.1558780634133013)
Masuk
** set output weights path to: ./experiments/0.15804867401632372_0.010813995435791039__0.19073377945408557_0.35092338594768796_0.1558780634133013_weights.h5 **
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 1536)              0         
_________________________________________________________________
dense (Dense)                (None, 334)               513358    
_________________________________________________________________
gaussian_noise (GaussianNois (None, 334)               0         
_________________________________________________________________
activation (Activation)      (None, 334)      

Epoch 6/7
Epoch 00006: val_loss did not improve from -3978.69521

Epoch 00006: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-08.
Restoring model weights from the end of the best epoch.
Epoch 00006: early stopping
** write log to ./experiments/0.1095151211346005_0.011515167086419769__0.1934260975019838_0.35204452249731516_0.3634352309172837_test.log **
mean auroc: 0.49996247070602956
iteration|auroc|alpha|dropout|gamma|learning_rate|neuronPct|neuronShrink|noisePct
Konten (1.0183410905645716, 0.1095151211346005, 2.616450202010071, 0.011515167086419769, 0.1934260975019838, 0.35204452249731516, 0.3634352309172837)
Konten (1.0183410905645716, 0.1095151211346005, 2.616450202010071, 0.011515167086419769, 0.1934260975019838, 0.35204452249731516, 0.3634352309172837)
Isine x [0.54108139 0.15057013 2.25191441 0.01235214 0.19070193 0.35198101
 0.34022337]
Konten (0.5410813897968892, 0.15057012652676036, 2.251914407101381, 0.01235213897067451, 0.19070193469297617, 0.351981014890848

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 76 steps, validate for 10 steps
Epoch 1/7
Epoch 00001: val_loss improved from inf to 163.84670, saving model to ./experiments/0.19504139478869603_0.013490444119825869__0.19416043034372363_0.3530768597732787_0.2663398686246416_weights.h5
Epoch 2/7
Epoch 00002: val_loss improved from 163.84670 to 162.77279, saving model to ./experiments/0.19504139478869603_0.013490444119825869__0.19416043034372363_0.3530768597732787_0.2663398686246416_weights.h5
Epoch 3/7
Epoch 00003: val_loss improved from 162.77279 to 162.33005, saving model to ./experiments/0.19504139478869603_0.013490444119825869__0.19416043034372363_0.3530768597732787_0.2663398686246416_weights.h5
Epoch 4/7
Epoch 00004: val_loss improved from 162.33005 to 162.03192, saving model to ./experiments/0.19504139478869603_0.013490444119825869__0.19416043034372363_0.3530768597732787_0.2663398686246416_weights.h5
Epoch 5/7
Epoch 00005: val_loss improved from 162.03192 to 161.94445, 

In [15]:
print(f"Total runtime: {convert_string(time_took)}")
print(optimizer.max)

Total runtime: 0:17:41.41
{'target': 0.8576525167957695, 'params': {'alpha': 0.7547456293468533, 'dropout': 0.18172137551441198, 'gamma': 1.2950405014991504, 'lr': 0.011526645750030312, 'neuronPct': 0.1947894476507525, 'neuronShrink': 0.3553316528497302, 'noisePct': 0.30756313418514203}}


In [16]:
import gc
gc.collect()

5352

In [17]:
new_params = {} 
for t in optimizer.max.keys():
    print(t)
    new_params = optimizer.max.get(t)    

target
params


In [18]:
a = new_params.get('alpha')
d = new_params.get('dropout')
g = new_params.get('gamma')
l = new_params.get('lr')
np = new_params.get('neuronPct')
ns = new_params.get('neuronShrink')
noiP = new_params.get('noisePct')
new_params.update({'alpha':(float(a-0.025),float(a+0.025)),
                   'dropout':(float(d-0.025),float(d+0.025)),
                   'gamma':(float(g-0.025),float(g+0.025)),
                   'lr':(float(l),float(l+0.025)),                   
                   'neuronPct':(float(np-0.025),float(np+0.025)),
                   'neuronShrink':(float(ns-0.025),float(ns+0.025)),
                   'noisePct':(float(noiP-0.025),float(noiP+0.025))
                  }
                 )

In [19]:
new_params

{'alpha': (0.7297456293468533, 0.7797456293468533),
 'dropout': (0.15672137551441198, 0.20672137551441197),
 'gamma': (1.2700405014991505, 1.3200405014991503),
 'lr': (0.011526645750030312, 0.03652664575003031),
 'neuronPct': (0.16978944765075252, 0.2197894476507525),
 'neuronShrink': (0.33033165284973015, 0.3803316528497302),
 'noisePct': (0.282563134185142, 0.33256313418514205)}

In [20]:
import csv
w = csv.writer(open("new_params.csv", "w"))
for key, val in new_params.items():
    w.writerow([key, val])