#### BayesianOptimizer를 이용한 CNN모델 하이퍼파라메터 튜닝
* tensorflow를 사용하기 때문에 `t212p39` anaconda env 사용

#### 라이브러리 불러오기

In [34]:
from bayes_opt import BayesianOptimization
from bayes_opt import UtilityFunction

import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical

from datetime import datetime as dt, timedelta as td

In [3]:
(train_x, train_y), (test_x, test_y) = mnist.load_data()

In [4]:
train_x, test_x = train_x/255.0, test_x/255.0

In [5]:
train_x, test_x = tf.expand_dims(train_x, -1), tf.expand_dims(test_x, -1)

2023-08-22 08:52:34.312652: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22787 MB memory:  -> device: 0, name: NVIDIA TITAN RTX, pci bus id: 0000:5e:00.0, compute capability: 7.5
2023-08-22 08:52:34.314889: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 22338 MB memory:  -> device: 1, name: NVIDIA TITAN RTX, pci bus id: 0000:af:00.0, compute capability: 7.5


In [6]:
train_y, test_y = to_categorical(train_y, 10), to_categorical(test_y, 10)

In [7]:
def create_cnn_model(filters, kernel_size, pool_size, dense_units):
    model = Sequential([
        Conv2D(
            filters, kernel_size, activation='relu', 
            input_shape=(train_x.shape[1:])
        ),
        MaxPooling2D(pool_size),
        Flatten(),
        Dense(dense_units, activation='relu'),
        Dense(10, activation='softmax')
    ])
    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

In [8]:
# 임의 실행
_start = dt.now()
model = create_cnn_model(256, 3, 2, 64)
model.fit(train_x, train_y, epochs=10, verbose=0)
_, accuracy = model.evaluate(test_x, test_y, verbose=0)
print(f'정확도: {accuracy}')
print(f'처리시간: {dt.now()-_start}')

2023-08-22 08:55:10.145531: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8600
2023-08-22 08:55:11.712450: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-08-22 08:55:11.852176: I tensorflow/compiler/xla/service/service.cc:169] XLA service 0x7f2665ac6050 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-08-22 08:55:11.852246: I tensorflow/compiler/xla/service/service.cc:177]   StreamExecutor device (0): NVIDIA TITAN RTX, Compute Capability 7.5
2023-08-22 08:55:11.852272: I tensorflow/compiler/xla/service/service.cc:177]   StreamExecutor device (1): NVIDIA TITAN RTX, Compute Capability 7.5
2023-08-22 08:55:11.888516: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-08-22 08:55:12.049818: I tensorflow/tsl/platform/default/subpr

정확도: 0.987500011920929
처리시간: 0:02:00.534160


In [9]:
pbounds = {
    'filters': (16, 256),
    'kernel_size': (3, 5),
    'pool_size': (2, 3),
    'dense_units': (32, 512),
    'epochs': (5, 20)
}

In [10]:
def target_func(filters, kernel_size, pool_size, dense_units, epochs):
    model = create_cnn_model(
        int(filters), int(kernel_size), int(pool_size), int(dense_units)
    )
    model.fit(train_x, train_y, epochs=int(epochs), verbose=0)
    _, accuracy = model.evaluate(test_x, test_y, verbose=0)
    return accuracy

In [11]:
optimizer = BayesianOptimization(
    f=target_func,
    pbounds=pbounds,
    verbose=2,
    random_state=123
)

In [12]:
_start = dt.now()
optimizer.maximize(init_points=2, n_iter=10)
print(f'처리시간: {dt.now()-_start}')

|   iter    |  target   | dense_... |  epochs   |  filters  | kernel... | pool_size |
-------------------------------------------------------------------------------------
| [0m1        [0m | [0m0.9879   [0m | [0m366.3    [0m | [0m9.292    [0m | [0m70.44    [0m | [0m4.103    [0m | [0m2.719    [0m |
| [0m2        [0m | [0m0.985    [0m | [0m235.1    [0m | [0m19.71    [0m | [0m180.4    [0m | [0m3.962    [0m | [0m2.392    [0m |
| [0m3        [0m | [0m0.9866   [0m | [0m366.1    [0m | [0m10.53    [0m | [0m71.74    [0m | [0m3.38     [0m | [0m2.321    [0m |
| [0m4        [0m | [0m0.9857   [0m | [0m368.6    [0m | [0m9.899    [0m | [0m72.95    [0m | [0m3.186    [0m | [0m2.804    [0m |
| [0m5        [0m | [0m0.9876   [0m | [0m364.5    [0m | [0m13.81    [0m | [0m69.45    [0m | [0m4.861    [0m | [0m2.237    [0m |
| [0m6        [0m | [0m0.9879   [0m | [0m362.8    [0m | [0m11.41    [0m | [0m72.16    [0m | [0m3.345    [0

In [46]:
print(f'Best parameters: {optimizer.max["params"]}')
print(f'Best validation accuracy: {optimizer.max["target"]}')

Best parameters: {'dense_units': 92.0243546296183, 'epochs': 8.738983549393662, 'filters': 69.83388714960446, 'kernel_size': 5.359160271287903, 'pool_size': 4.569730940341101}
Best validation accuracy: 0.9909999966621399


#### 제안-평가-등록

In [18]:
pbounds = {
    'filters': (16, 256),
    'kernel_size': (3, 10),
    'pool_size': (2, 10),
    'dense_units': (32, 128),
    'epochs': (5, 15)
}

In [19]:
optimizer = BayesianOptimization(
    f=None,
    pbounds=pbounds,
    verbose=2,
    random_state=123
) 

In [20]:
utility = UtilityFunction(
    kind='ucb', # 탐색과 활용 사이의 균형 유지, 불확실한 지점을 더 많이 탐색
    kappa=2.5,  # UCB전략에서 얼마의 불확실성을 고려할지 지정
                # 값이 높으면 탐색을, 값이 낮으면 활용을 강조함
    xi=0.0      # 탐색전략에서 사용되는 파라미터로 얼마나 큰 개선을 고려할지 조정
                # 값이 높으면 큰 개선을 값이 낮으면 작은 개선을 탐색함
)

In [21]:
next_point = optimizer.suggest(utility)
print(next_point)

{'dense_units': 98.86104181739472, 'epochs': 7.861393349503794, 'filters': 70.44434885540875, 'kernel_size': 6.859203383580239, 'pool_size': 7.755751758284505}


In [22]:
_start = dt.now()
target = target_func(**next_point)
print(target)
print(f'처리시간: {dt.now()-_start}')

0.9896000027656555
처리시간: 0:01:19.731189


In [24]:
optimizer.register(
    params=next_point,
    target=target
)

In [39]:
_start = dt.now()
print('|  Index |  Target | Epochs | Filter | Kernel |  Pool  |  Dense |     Time     |')
print('|------------------------------------------------------------------------------|')
for epoch in range(10):
    __start = dt.now()
    next_point = optimizer.suggest(utility)
    target = target_func(**next_point)
    optimizer.register(params=next_point, target=target)
    print(
        f'|{(epoch+1):>7} |{target:8.5f} |{int(next_point["epochs"]):>7} ' 
        f'|{int(next_point["filters"]):>7} |{int(next_point["kernel_size"]):>7} '
        f'|{int(next_point["pool_size"]):>7} |{int(next_point["dense_units"]):>7} '
        f'|  {str(dt.now()-__start)[:-3]} |' 
    )
    
print('|------------------------------------------------------------------------------|')
print(optimizer.max)
print(f'처리시간: {dt.now()-_start}')

|  Index |  Target | Epochs | Filter | Kernel |  Pool  |  Dense |     Time     |
|------------------------------------------------------------------------------|
|      1 | 0.98980 |     14 |     73 |      7 |      2 |     94 |  0:02:39.573 |
|      2 | 0.98910 |     13 |     77 |      5 |      7 |     91 |  0:02:26.309 |
|      3 | 0.99100 |      8 |     69 |      5 |      4 |     92 |  0:01:32.250 |
|      4 | 0.99100 |     13 |     74 |      9 |      3 |     94 |  0:02:27.965 |
|      5 | 0.99020 |     10 |     71 |      9 |      5 |     89 |  0:01:54.508 |
|      6 | 0.98810 |     10 |     67 |      3 |      3 |     89 |  0:01:54.046 |
|      7 | 0.98890 |      9 |     71 |      7 |      5 |     93 |  0:01:43.162 |
|      8 | 0.99090 |     10 |     74 |      7 |      2 |     94 |  0:01:54.560 |
|      9 | 0.98380 |      6 |     70 |      6 |      4 |     91 |  0:01:09.665 |
|     10 | 0.96650 |      6 |     20 |      7 |      8 |     83 |  0:01:10.798 |
|---------------------------

In [44]:
_start = dt.now()
# 최고 하이퍼파라메터 적용
model = create_cnn_model(69, 5, 4, 92)
model.fit(train_x, train_y, epochs=8, verbose=0)
_, accuracy = model.evaluate(test_x, test_y, verbose=0)
print(f'정확도: {accuracy}')
print(f'처리시간: {dt.now()-_start}')
# 최고 정확도가 나오는 것은 아니네...

정확도: 0.9898999929428101
처리시간: 0:01:32.072386


In [43]:
test_x.shape[0], test_x.shape[0] * (1-0.9909999966621399)

(10000, 90.00003337860107)