In [1]:
# 구글 드라이브 연동
from google.colab import drive
drive.mount('cnontent/')

Mounted at cnontent/


In [2]:
cd /content/cnontent/MyDrive/SSU/nural_nework/project

/content/cnontent/MyDrive/SSU/nural_nework/project


In [3]:
# wandb install
!pip install -q wandb

[K     |████████████████████████████████| 1.8 MB 6.4 MB/s 
[K     |████████████████████████████████| 181 kB 46.1 MB/s 
[K     |████████████████████████████████| 145 kB 45.3 MB/s 
[K     |████████████████████████████████| 63 kB 1.8 MB/s 
[?25h  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


In [4]:
!wandb login

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [9]:
# 필요한 라이브러리 import

import tqdm
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import wandb
from wandb.keras import WandbCallback
import os

import random
%matplotlib inline

In [6]:
# pandas 통해 읽은 csv data numpy 형태로 변경
def convert_data_csv_to_numpy(data, sorting = False):
  if sorting == True:
    data = data.sort_values(by=[0], axis=0)

  label = np.array(data[0]) # csv file 에서 0번째 colum은 index임
  only_data = np.array(data.drop([0], axis = 1)).reshape((-1, 28, 28, 1)) # csv file에서 0번 째 colum 탈락 -> data만 남게 됨

  return only_data, label

In [7]:
# 효율적인 실험 관리를 위해 balanced data 사용
def balanced_data(data, label, border, n_train=1000, val_rate = 0.2):
  i, start=0,0
  d_train = l_train = d_val = l_val= None
  n_data_per_class = n_train
  n_val_per_class = int(n_data_per_class*val_rate)


  for idx, class_end in enumerate(border):
    if i ==0: # 정의한 border만큼 iteration 하면서 data를 추가
      val_end = n_data_per_class+n_val_per_class

      d_train = data[:n_data_per_class].copy()
      l_train = label[:n_data_per_class].copy()

      d_val = data[n_data_per_class:val_end].copy()
      l_val = label[n_data_per_class:val_end].copy()

      i+=1
    else: # border index까지 data 추가했으면 다음 class로 이동
      train_end = start+n_data_per_class
      val_end = train_end+n_val_per_class

      now_d_data = data[start:train_end].copy()
      now_l_train = label[start:train_end].copy()
      d_train = np.concatenate((d_train, now_d_data))
      l_train = np.concatenate((l_train, now_l_train))

      now_d_val = data[train_end:val_end].copy()
      now_l_val = label[train_end:val_end].copy()
      d_val = np.concatenate((d_val, now_d_val))
      l_val = np.concatenate((l_val, now_l_val))
    start += class_end
  return (d_train, l_train, d_val, l_val)

# Data Prepare

In [None]:
train_csv, test_csv = [], []
train_csv = pd.read_csv("./emnist-byclass-train.csv", header=None) # csv 파일 읽음

In [None]:
train, train_label = convert_data_csv_to_numpy(train_csv, sorting = True) # numpy로 변경 -> balanced data를 생성하기 위해 dataset 오름차순 정렬(label 기준)

In [None]:
border = np.unique(train_label, return_counts = True)[1]
print(border)
train_data, train_label, val_data, val_label = balanced_data(train.copy(), train_label.copy(), border)

In [10]:
# 추후 aux resnet 모델의 label 정의를 위해 sparse label값 저장
sparse_train_label = train_label.copy()
sparse_val_label = val_label.copy()

In [11]:
# ndarray type의 데이터 tensor 형식으로 변경
train_data = tf.convert_to_tensor(train_data, dtype=tf.float32)
val_data = tf.convert_to_tensor(val_data, dtype=tf.float32)

# categorical loss 적용을 위한 one-hot 인코딩
train_label = keras.utils.to_categorical(train_label)
val_label = keras.utils.to_categorical(val_label)

In [12]:
print(train_data.shape)
print(train_label.shape)
print(val_data.shape)
print(val_label.shape)

(62000, 28, 28, 1)
(62000, 62)
(12400, 28, 28, 1)
(12400, 62)


# Lenet wandb

In [None]:
# Lnet 파라미터 세팅 실험을 위한 학습 함수
def sweep_train(config_defaults=None):
    # Set default values
    config_defaults = {
      'batch_size': 64,
      'optimizer' : 'adam',
      'learning_rate' : 0.01,
      'n_first_node' : 6,
      'n_first_kernel_size' : 5,
      'n_second_node' : 16,
      'n_second_kernel_size' : 5,
      'n_third_node' : 120,
      'n_third_kernel_size' : 5,
    }
    # Initialize wandb with a sample project name
    wandb.init(config=config_defaults)  # this gets over-written in the Sweep

    # initialize model
    model = keras.models.Sequential()
    model.add(keras.layers.Conv2D(wandb.config.n_first_node, kernel_size=wandb.config.n_first_kernel_size, strides=1,  activation="selu", kernel_initializer="lecun_normal", input_shape=train_data[0].shape, padding='same'))
    model.add(keras.layers.AveragePooling2D())
    model.add(keras.layers.Conv2D(wandb.config.n_second_node, kernel_size=wandb.config.n_second_kernel_size, strides=1, activation="selu", kernel_initializer="lecun_normal", padding='valid'))
    model.add(keras.layers.AveragePooling2D())
    model.add(keras.layers.Conv2D(wandb.config.n_third_node, kernel_size=wandb.config.n_third_kernel_size, strides=1, activation="selu", kernel_initializer="lecun_normal", padding='valid'))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(84, activation="selu", kernel_initializer="lecun_normal"))
    model.add(keras.layers.Dense(62, activation='softmax'))

    print(model.summary())

    # Instantiate an optimizer to train the model.
    optimizer = keras.optimizers.Adam(learning_rate=wandb.config.learning_rate, beta_1=0.9, beta_2=0.999)
    # Instantiate a loss function.
    # loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)


    model.compile(loss="categorical_crossentropy",
                optimizer="adam",
                metrics=["accuracy"])
    hist = model.fit(train_data, train_label, epochs=2, validation_data=(val_data, val_label), callbacks=[WandbCallback()])

In [None]:
# 변경할 파라미터
sweep_config = {
  'method': 'random', 
  'parameters': {
      'learning_rate':{
          'values' : [0.05, 0.01, 0.005]
      },
      'n_first_node' : {
          'values' : [6, 12]
      },
      'n_second_node' : {
          'values' : [16, 32]
      },
      'n_third_node' : {
          'values' : [120, 60]
      },
      'n_first_kernel_size' : {
          'values' : [5, 3]
      },
      'n_second_kernel_size' : {
          'values' : [5, 3]
      },
      'n_third_kernel_size' : {
          'values' : [5, 3]
      }
  }
}

# wandb 프로젝트 생성 및 실행
sweep_id = wandb.sweep(sweep_config, project="Lenet_for_report")
wandb.agent(sweep_id, function=sweep_train, count=3)

Create sweep with ID: dsidt7j3
Sweep URL: https://wandb.ai/chanin-eom/Lenet_for_report/sweeps/dsidt7j3


[34m[1mwandb[0m: Agent Starting Run: f5tzsloh with config:
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	n_first_kernel_size: 5
[34m[1mwandb[0m: 	n_first_node: 6
[34m[1mwandb[0m: 	n_second_kernel_size: 5
[34m[1mwandb[0m: 	n_second_node: 16
[34m[1mwandb[0m: 	n_third_kernel_size: 3
[34m[1mwandb[0m: 	n_third_node: 60


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 6)         156       
                                                                 
 average_pooling2d (AverageP  (None, 14, 14, 6)        0         
 ooling2D)                                                       
                                                                 
 conv2d_1 (Conv2D)           (None, 10, 10, 16)        2416      
                                                                 
 average_pooling2d_1 (Averag  (None, 5, 5, 16)         0         
 ePooling2D)                                                     
                                                                 
 conv2d_2 (Conv2D)           (None, 3, 3, 60)          8700      
                                                                 
 flatten (Flatten)           (None, 540)               0

VBox(children=(Label(value='0.762 MB of 0.762 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁█
epoch,▁█
loss,█▁
val_accuracy,▁█
val_loss,█▁

0,1
accuracy,0.68753
best_epoch,1.0
best_val_loss,0.79632
epoch,1.0
loss,0.8131
val_accuracy,0.68565
val_loss,0.79632


[34m[1mwandb[0m: Agent Starting Run: g2d1htxa with config:
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	n_first_kernel_size: 3
[34m[1mwandb[0m: 	n_first_node: 6
[34m[1mwandb[0m: 	n_second_kernel_size: 5
[34m[1mwandb[0m: 	n_second_node: 16
[34m[1mwandb[0m: 	n_third_kernel_size: 3
[34m[1mwandb[0m: 	n_third_node: 60


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 6)         60        
                                                                 
 average_pooling2d (AverageP  (None, 14, 14, 6)        0         
 ooling2D)                                                       
                                                                 
 conv2d_1 (Conv2D)           (None, 10, 10, 16)        2416      
                                                                 
 average_pooling2d_1 (Averag  (None, 5, 5, 16)         0         
 ePooling2D)                                                     
                                                                 
 conv2d_2 (Conv2D)           (None, 3, 3, 60)          8700      
                                                                 
 flatten (Flatten)           (None, 540)               0

VBox(children=(Label(value='0.761 MB of 0.761 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁█
epoch,▁█
loss,█▁
val_accuracy,▁█
val_loss,█▁

0,1
accuracy,0.68806
best_epoch,1.0
best_val_loss,0.85498
epoch,1.0
loss,0.80229
val_accuracy,0.6775
val_loss,0.85498


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: se265a3i with config:
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	n_first_kernel_size: 3
[34m[1mwandb[0m: 	n_first_node: 12
[34m[1mwandb[0m: 	n_second_kernel_size: 5
[34m[1mwandb[0m: 	n_second_node: 16
[34m[1mwandb[0m: 	n_third_kernel_size: 3
[34m[1mwandb[0m: 	n_third_node: 120


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 12)        120       
                                                                 
 average_pooling2d (AverageP  (None, 14, 14, 12)       0         
 ooling2D)                                                       
                                                                 
 conv2d_1 (Conv2D)           (None, 10, 10, 16)        4816      
                                                                 
 average_pooling2d_1 (Averag  (None, 5, 5, 16)         0         
 ePooling2D)                                                     
                                                                 
 conv2d_2 (Conv2D)           (None, 3, 3, 120)         17400     
                                                                 
 flatten (Flatten)           (None, 1080)              0

VBox(children=(Label(value='1.407 MB of 1.407 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁█
epoch,▁█
loss,█▁
val_accuracy,▁█
val_loss,█▁

0,1
accuracy,0.6884
best_epoch,1.0
best_val_loss,0.79794
epoch,1.0
loss,0.80328
val_accuracy,0.69435
val_loss,0.79794


# MobileNetV2 wandb

In [None]:
# 3채널 영상을 다루는 MobileNet을 위해 data를 3채널 color 영상으로 변경
rgb_train_data = tf.image.grayscale_to_rgb(train_data)
rgb_val_data = tf.image.grayscale_to_rgb(val_data)

In [None]:
print(rgb_train_data.shape)
print(train_label.shape)
print(rgb_val_data.shape)
print(val_label.shape)

(62000, 28, 28, 3)
(62000, 62)
(12400, 28, 28, 3)
(12400, 62)


In [14]:
# wandb 중 optimizer 변경을 위한 opt 선택 함수
def select_opt(opt_name, lr):
  if opt_name == 'adam':
    optimizer = keras.optimizers.Adam(learning_rate=lr, beta_1=0.9, beta_2=0.999)
  elif opt_name == 'rmsprop':
    optimizer = keras.optimizers.RMSprop(learning_rate=lr, rho=0.9)
  elif opt_name == 'sgd':
    optimizer = keras.optimizers.SGD(learning_rate = lr)
  elif opt_name == 'momentum':
    optimizer = keras.optimizers.SGD(learning_rate = lr, momentum=0.9)
  elif opt_name == 'nesterov':
    optimizer = keras.optimizers.SGD(learning_rate=lr, momentum=0.9, nesterov=True)
  elif opt_name == 'adagrad':
    optimizer = keras.optimizers.Adagrad(learning_rate=lr)
  elif opt_name == 'adamax':
    optimizer = keras.optimizers.Adamax(learning_rate=lr, beta_1=0.9, beta_2=0.999)
  elif opt_name == 'nadam':
    optimizer = keras.optimizers.Nadam(learning_rate=lr, beta_1=0.9, beta_2=0.999)

  return optimizer

In [15]:
# wandb activation function 변경을 위한 선택 함수
def select_activation(function):
  if function == 'relu':
    activation = keras.layers.ReLU()
  elif function == 'prelu':
    activation = keras.layers.PReLU()
  elif function == 'leacky_relu':
    activation = keras.layers.LeakyReLU()
  elif function == 'elu':
    activation = keras.layers.ELU()

  return activation

In [None]:
# mobileV2 다운로드
mobile2 = tf.keras.applications.mobilenet_v2.MobileNetV2(input_shape = (224, 224, 3), include_top=False)

In [None]:
# MobilenetV2 파라미터 세팅 실험을 위한 학습 함수
def sweep_for_mobile(config_defaults=None):
    # Set default values
    config_defaults = {
      'batch_size': 64,
      'optimizer' : 'adam',
      'learning_rate' : 0.01,
      'activation' : 'relu',
      'n_Dense_layer' : 1,
      'first_layer_node' : 1000,
      'second_layer_node' : 100
    }
    # Initialize wandb with a sample project name
    wandb.init(config=config_defaults)  # this gets over-written in the Sweep

    # initialize model
    global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
    prediction_layer = tf.keras.layers.Dense(62, activation = 'softmax')
    node_per_layer = [wandb.config.first_layer_node, wandb.config.second_layer_node]
    

    model = keras.models.Sequential()
    model.add(tf.keras.layers.Normalization( axis=-1, mean=44.412914, variance=84.77896, input_shape = (28, 28, 3)))
    model.add(tf.keras.layers.Resizing(height = 224, width = 224))
    model.add(mobile2)
    model.add(global_average_layer)
    for i in range(wandb.config.n_Dense_layer):
      model.add(tf.keras.layers.Dense(node_per_layer[i]))
      model.add(select_activation(wandb.config.activation))
    model.add(prediction_layer)
    
    print(model.summary())

    # Instantiate an optimizer to train the model.
    optimizer = select_opt(wandb.config.optimizer, wandb.config.learning_rate)
    # Instantiate a loss function.
    # loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)


    model.compile(loss="categorical_crossentropy",
                optimizer=optimizer,
                metrics=["accuracy"])
    hist = model.fit(rgb_train_data, train_label, epochs=2, validation_data=(rgb_val_data, val_label))

In [None]:
# 변경할 파라미터
sweep_config = {
  'method': 'random', 
  'parameters': {
      
      'optimizer': {
          'values' : ['adam', 'rmsprop', 'sgd', 'momentum', 'nesterov', 'adagrad', 'adamax', 'nadam']
      },
      'learning_rate':{
          'values' : [0.1, 0.05, 0.01, 0.005]
      },
      'activation' : {
          'values' : ['relu', 'leacky_relu', 'elu']
      },
      'n_Dense_layer' : {
          'values' : [1, 2]
      },
      'first_layer_node' : {
          'values' : [1000, 800, 500]
      },
      'second_layer_node' : {
          'values' : [100, 120]
      }
  }
}

# wandb 프로젝트 생성 및 실행
sweep_id = wandb.sweep(sweep_config, project="mobilenetv2_for_report")
wandb.agent(sweep_id, function=sweep_for_mobile, count=3)

Create sweep with ID: 980vasdg
Sweep URL: https://wandb.ai/chanin-eom/mobilenetv2_for_report/sweeps/980vasdg


[34m[1mwandb[0m: Agent Starting Run: ko737kpf with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	first_layer_node: 500
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	n_Dense_layer: 1
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	second_layer_node: 120
[34m[1mwandb[0m: Currently logged in as: [33mchanin-eom[0m. Use [1m`wandb login --relogin`[0m to force relogin


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 normalization (Normalizatio  (None, 28, 28, 3)        0         
 n)                                                              
                                                                 
 resizing (Resizing)         (None, 224, 224, 3)       0         
                                                                 
 mobilenetv2_1.00_224 (Funct  (None, 7, 7, 1280)       2257984   
 ional)                                                          
                                                                 
 global_average_pooling2d (G  (None, 1280)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dense_1 (Dense)             (None, 500)               640500    
                                                        

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

[34m[1mwandb[0m: Agent Starting Run: swojc8ef with config:
[34m[1mwandb[0m: 	activation: leacky_relu
[34m[1mwandb[0m: 	first_layer_node: 800
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	n_Dense_layer: 2
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	second_layer_node: 100


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 normalization (Normalizatio  (None, 28, 28, 3)        0         
 n)                                                              
                                                                 
 resizing (Resizing)         (None, 224, 224, 3)       0         
                                                                 
 mobilenetv2_1.00_224 (Funct  (None, 7, 7, 1280)       2257984   
 ional)                                                          
                                                                 
 global_average_pooling2d (G  (None, 1280)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dense_1 (Dense)             (None, 800)               1024800   
                                                        

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

[34m[1mwandb[0m: Agent Starting Run: wwq7vcqe with config:
[34m[1mwandb[0m: 	activation: leacky_relu
[34m[1mwandb[0m: 	first_layer_node: 1000
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	n_Dense_layer: 1
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	second_layer_node: 100


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 normalization (Normalizatio  (None, 28, 28, 3)        0         
 n)                                                              
                                                                 
 resizing (Resizing)         (None, 224, 224, 3)       0         
                                                                 
 mobilenetv2_1.00_224 (Funct  (None, 7, 7, 1280)       2257984   
 ional)                                                          
                                                                 
 global_average_pooling2d (G  (None, 1280)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dense_1 (Dense)             (None, 1000)              1281000   
                                                        

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

# ResNet_ResNet wandb

In [13]:
# Residual block class
class ResnetBlock(keras.models.Model):
    """
    A standard resnet block.
    """

    def __init__(self, channels: int, down_sample=False):
        """
        channels: same as number of convolution kernels
        """
        super().__init__()

        self.__channels = channels
        self.__down_sample = down_sample
        self.__strides = [2, 1] if down_sample else [1, 1]

        KERNEL_SIZE = (3, 3)
        # use He initialization, instead of Xavier (a.k.a 'glorot_uniform' in Keras), as suggested in [2]
        INIT_SCHEME = "he_normal"

        self.conv_1 = keras.layers.Conv2D(self.__channels, strides=self.__strides[0],
                             kernel_size=KERNEL_SIZE, padding="same", kernel_initializer=INIT_SCHEME)
        self.bn_1 = keras.layers.BatchNormalization()
        self.conv_2 = keras.layers.Conv2D(self.__channels, strides=self.__strides[1],
                             kernel_size=KERNEL_SIZE, padding="same", kernel_initializer=INIT_SCHEME)
        self.bn_2 = keras.layers.BatchNormalization()
        self.merge = keras.layers.Add()

        if self.__down_sample:
            # perform down sampling using stride of 2, according to [1].
            self.res_conv = keras.layers.Conv2D(
                self.__channels, strides=2, kernel_size=(1, 1), kernel_initializer=INIT_SCHEME, padding="same")
            self.res_bn = keras.layers.BatchNormalization()

    def call(self, inputs):
        res = inputs

        x = self.conv_1(inputs)
        x = self.bn_1(x)
        x = tf.nn.relu(x)
        x = self.conv_2(x)
        x = self.bn_2(x)

        if self.__down_sample:
            res = self.res_conv(res)
            res = self.res_bn(res)

        # if not perform down sample, then add a shortcut directly
        x = self.merge([x, res])
        out = tf.nn.relu(x)
        return out

In [None]:
# Res(Feature detection)_Res(Domain_detection) 구조 모델 layer 설정 class
# wandb에 의해 선택적으로 각 layer를 변경할 수 있도록 설계
class Res_5x5_v4(keras.models.Model):

    def __init__(self, num_classes, config_list, **kwargs):
        """
            num_classes: number of classes in specific classification task.
        """
        super().__init__(**kwargs)
        self.feature_layer_depth, self.num_layer_depth, \
        self.upper_layer_depth, self.lower_layer_depth = config_list

        self.input_ch = 32
        self.layer_ch = 32 * self.feature_layer_depth
        self.Batch_Norm_Layer = keras.layers.BatchNormalization()
        self.Global_Average_Pool_Layer = keras.layers.GlobalAveragePooling2D()
        self.Flatten_Layer = keras.layers.Flatten()

        self.Feature_Detection_Layer = self.feature_detection_layer()

        self.Number_Layer = self.number_layer()
        self.Upper_Letter_Layer = self.upper_letter_layer()
        self.Lower_Letter_Layer = self.lower_letter_layer()


        self.Prediction_Layer = keras.layers.Dense(num_classes, activation="softmax")

    def feature_detection_layer(self):
      feature_dec_layer = keras.models.Sequential()
      feature_dec_layer.add(self.Batch_Norm_Layer)
      feature_dec_layer.add(keras.layers.Conv2D(self.input_ch, (5, 5), strides=1, padding="same", kernel_initializer="he_normal"))
      feature_dec_layer.add(self.Batch_Norm_Layer)
      feature_dec_layer.add(keras.layers.MaxPool2D(pool_size=(2, 2), strides=1, padding="same"))
      feature_dec_layer.add(ResnetBlock(self.input_ch))
      feature_dec_layer.add(ResnetBlock(self.input_ch))
      
      for i in range(self.feature_layer_depth-1):
        feature_dec_layer.add(ResnetBlock(self.input_ch*(2*(i+1)), down_sample=True))
        feature_dec_layer.add(ResnetBlock(self.input_ch*(2*(i+1))))


      return feature_dec_layer

    def number_layer(self):
      num_layer = keras.models.Sequential()
      for i in range(self.num_layer_depth):
        num_layer.add(ResnetBlock(self.layer_ch*(2*(i+1)), down_sample=True))
        num_layer.add(ResnetBlock(self.layer_ch*(2*(i+1))))
      num_layer.add(self.Global_Average_Pool_Layer)
      num_layer.add(self.Flatten_Layer)

      return num_layer

    def upper_letter_layer(self):
      upper_layer = keras.models.Sequential()
      for i in range(self.upper_layer_depth):
        upper_layer.add(ResnetBlock(self.layer_ch*(2*(i+1)), down_sample=True))
        upper_layer.add(ResnetBlock(self.layer_ch*(2*(i+1))))
      upper_layer.add(self.Global_Average_Pool_Layer)
      upper_layer.add(self.Flatten_Layer)
      return upper_layer

    def lower_letter_layer(self):
      lower_layer = keras.models.Sequential()

      for i in range(self.lower_layer_depth):
        lower_layer.add(ResnetBlock(self.layer_ch*(2*(i+1)), down_sample=True))
        lower_layer.add(ResnetBlock(self.layer_ch*(2*(i+1))))
      lower_layer.add(self.Global_Average_Pool_Layer)
      lower_layer.add(self.Flatten_Layer)
      return lower_layer

In [None]:
# wandb에서 선택된 hyper parameter들의 배열을 입력으로 받아서
# 적절한 model을 선택하는 함수

def select_model(config_list):
  model_class = Res_5x5_v4(62, config_list[:-2])
  dense_layer_depth = config_list[-2]
  num_nueran = config_list[-1]

  input = keras.layers.Input(shape=(28, 28, 1))

  feature_out = model_class.Feature_Detection_Layer(input)

  num_out = model_class.Number_Layer(feature_out)
  upper_out = model_class.Upper_Letter_Layer(feature_out)
  lower_out = model_class.Lower_Letter_Layer(feature_out)

  final_out = keras.layers.concatenate([num_out, upper_out, lower_out])
  for i in range(dense_layer_depth):
    final_out = tf.keras.layers.Dense(num_nueran, activation = 'relu')(final_out)

  result = model_class.Prediction_Layer(final_out)

  model = keras.models.Model(inputs=[input], outputs=[result])
  return model

In [None]:
# Res_Res 구조 모델 파라미터 세팅 실험을 위한 학습 함수
def sweep_for_res_res(config_defaults=None):
    # Set default values
    config_defaults = {
      'batch_size': 64,
      'optimizer' : 'adam',
      'learning_rate' : 0.01,
      'feature_layer_depth' : 2,
      'num_layer_depth' : 2,
      'upper_layer_depth' : 2,
      'lower_layer_depth' : 2,
      'prediction_layer' : 1,
      'dense_nueran' : 100 
    }
    # Initialize wandb with a sample project name
    wandb.init(config=config_defaults)  # this gets over-written in the Sweep

    # initialize model
    early_stopping_cb = keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)
    config_list = [wandb.config.feature_layer_depth, wandb.config.num_layer_depth, 
                   wandb.config.upper_layer_depth, wandb.config.lower_layer_depth, wandb.config.prediction_layer, wandb.config.dense_nueran]
    model = select_model(config_list)
    
    print(model.summary())

    # Instantiate an optimizer to train the model.
    optimizer = select_opt(wandb.config.optimizer, wandb.config.learning_rate)


    model.compile(loss="categorical_crossentropy",
                optimizer=optimizer,
                metrics=["accuracy"])
    hist = model.fit(train_data, train_label, epochs=2, validation_data=(val_data, val_label), callbacks=[early_stopping_cb])

In [None]:
# 변경할 파라미터
sweep_config_res_res = {
  'method': 'random', 
  'parameters': {
      
      'optimizer': {
          'values' : ['adam', 'rmsprop', 'sgd', 'momentum', 'nesterov', 'adagrad', 'adamax', 'nadam']
      },
      'learning_rate':{
          'values' : [0.1, 0.05, 0.01, 0.005]
      },
      'feature_layer_depth' : {
          'values' : [1, 2]
      },
      'num_layer_depth' : {
          'values' : [1, 2]
      },
      'upper_layer_depth' : {
          'values' : [1, 2]
      },
      'lower_layer_depth' : {
          'values' : [1, 2]
      },
      'prediction_layer' : {
          'values' : [0, 1]
      },
      'dense_nueran' : {
          'values' : [200, 150, 120, 100]
      }
  }
}

In [None]:
# wandb 프로젝트 생성 및 실행
sweep_id = wandb.sweep(sweep_config_res_res, project="sweep_for_res_res")
wandb.agent(sweep_id, function=sweep_for_res_res, count=3)

Create sweep with ID: irozrjcj
Sweep URL: https://wandb.ai/chanin-eom/sweep_for_res_res/sweeps/irozrjcj


[34m[1mwandb[0m: Agent Starting Run: hbz7avvb with config:
[34m[1mwandb[0m: 	dense_nueran: 100
[34m[1mwandb[0m: 	feature_layer_depth: 2
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	lower_layer_depth: 2
[34m[1mwandb[0m: 	num_layer_depth: 2
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	prediction_layer: 0
[34m[1mwandb[0m: 	upper_layer_depth: 1


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 28, 28, 1)]  0           []                               
                                                                                                  
 sequential (Sequential)        (None, 14, 14, 64)   171012      ['input_1[0][0]']                
                                                                                                  
 sequential_1 (Sequential)      (None, 256)          2631040     ['sequential[0][0]']             
                                                                                                  
 sequential_2 (Sequential)      (None, 128)          527488      ['sequential[0][0]']             
                                                                                              

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: m0rv8vei with config:
[34m[1mwandb[0m: 	dense_nueran: 200
[34m[1mwandb[0m: 	feature_layer_depth: 1
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	lower_layer_depth: 2
[34m[1mwandb[0m: 	num_layer_depth: 1
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	prediction_layer: 0
[34m[1mwandb[0m: 	upper_layer_depth: 1


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 28, 28, 1)]  0           []                               
                                                                                                  
 sequential (Sequential)        (None, 28, 28, 32)   38340       ['input_1[0][0]']                
                                                                                                  
 sequential_1 (Sequential)      (None, 64)           132672      ['sequential[0][0]']             
                                                                                                  
 sequential_2 (Sequential)      (None, 64)           132672      ['sequential[0][0]']             
                                                                                              

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

[34m[1mwandb[0m: Agent Starting Run: h1hk78p3 with config:
[34m[1mwandb[0m: 	dense_nueran: 120
[34m[1mwandb[0m: 	feature_layer_depth: 2
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	lower_layer_depth: 1
[34m[1mwandb[0m: 	num_layer_depth: 2
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	prediction_layer: 1
[34m[1mwandb[0m: 	upper_layer_depth: 2


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 28, 28, 1)]  0           []                               
                                                                                                  
 sequential (Sequential)        (None, 14, 14, 64)   171012      ['input_1[0][0]']                
                                                                                                  
 sequential_1 (Sequential)      (None, 256)          2631040     ['sequential[0][0]']             
                                                                                                  
 sequential_2 (Sequential)      (None, 256)          2631040     ['sequential[0][0]']             
                                                                                              

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

# ResNet-Dense wandb

Feature detection : 입력 이미지의 feature detect -> Resnet 구조

Result_predictor : 총 62개의 class(0-9, A-Z, a-z) 예측 -> 3개의 path로 분기 된 후 concatenate -> Dense 구조

In [None]:
# Res(Feature detection)_Dense(Domain_detection) 구조 모델 layer 설정 class
# wandb에 의해 선택적으로 각 layer를 변경할 수 있도록 설계
 class Res_5x5_v4(keras.models.Model):

    def __init__(self, num_classes, config_list, **kwargs):
        """
            num_classes: number of classes in specific classification task.
        """
        super().__init__(**kwargs)

        self.input_ch, self.activation, self.feature_layer_depth, self.num_layer_nueron, self.upper_layer_nueron, self.lower_layer_nueron = config_list


        self.Global_Average_Pool_Layer = keras.layers.GlobalAveragePooling2D()
        self.Flatten_Layer = keras.layers.Flatten()

        self.Feature_Detection_Layer = self.feature_detection_layer()

        self.Number_Layer = self.number_layer()
        self.Upper_Letter_Layer = self.upper_letter_layer()
        self.Lower_Letter_Layer = self.lower_letter_layer()


        self.Prediction_Layer = keras.layers.Dense(num_classes, activation="softmax")

    def feature_detection_layer(self):
      feature_dec_layer = keras.models.Sequential()
      # feature_dec_layer.add(keras.layers.experimental.preprocessing.RandomRotation(0.2))
      feature_dec_layer.add(keras.layers.BatchNormalization())
      feature_dec_layer.add(keras.layers.Conv2D(self.input_ch, (5, 5), strides=1, padding="same", kernel_initializer="he_normal"))
      feature_dec_layer.add(keras.layers.BatchNormalization())
      feature_dec_layer.add(keras.layers.MaxPool2D(pool_size=(2, 2), strides=1, padding="same"))
      feature_dec_layer.add(ResnetBlock(self.input_ch))
      feature_dec_layer.add(ResnetBlock(self.input_ch))
      
      for i in range(self.feature_layer_depth-1):
        feature_dec_layer.add(ResnetBlock(self.input_ch*(2*(i+1)), down_sample=True))
        feature_dec_layer.add(ResnetBlock(self.input_ch*(2*(i+1))))

      feature_dec_layer.add(self.Global_Average_Pool_Layer)
      feature_dec_layer.add(self.Flatten_Layer)


      return feature_dec_layer

    def number_layer(self):
      num_layer = keras.models.Sequential()
      for i, num_nueron in enumerate(self.num_layer_nueron):
        num_layer.add(tf.keras.layers.Dense(num_nueron))
        num_layer.add(self.select_activation(self.activation))
        num_layer.add(keras.layers.BatchNormalization())
    
      num_layer.add(tf.keras.layers.Dense(10, activation = None))

      return num_layer

    def upper_letter_layer(self):
      upper_layer = keras.models.Sequential()
      for i, num_nueron in enumerate(self.upper_layer_nueron):
        upper_layer.add(tf.keras.layers.Dense(num_nueron))
        upper_layer.add(self.select_activation(self.activation))
        upper_layer.add(keras.layers.BatchNormalization())

      upper_layer.add(tf.keras.layers.Dense(26, activation = None))
      return upper_layer

    def lower_letter_layer(self):
      lower_layer = keras.models.Sequential()

      for i, num_nueron in enumerate(self.lower_layer_nueron):
        lower_layer.add(tf.keras.layers.Dense(num_nueron))
        lower_layer.add(self.select_activation(self.activation))
        lower_layer.add(keras.layers.BatchNormalization())

      lower_layer.add(tf.keras.layers.Dense(26, activation = None))
      return lower_layer

    def select_activation(self, function):
      if function == 'relu':
        activation = keras.layers.ReLU()
      elif function == 'prelu':
        activation = keras.layers.PReLU()
      elif function == 'leacky_relu':
        activation = keras.layers.LeakyReLU()
      elif function == 'elu':
        activation = keras.layers.ELU()
      else:
        activation = keras.layers.ReLU()
      return activation

In [None]:
# wandb에서 선택된 hyper parameter들의 배열을 입력으로 받아서
# 적절한 model을 선택하는 함수
def select_model(config_list):
  model_class = Res_5x5_v4(64, config_list)

  input = keras.layers.Input(shape=(28, 28, 1))

  feature_out = model_class.Feature_Detection_Layer(input)

  num_out = model_class.Number_Layer(feature_out)
  upper_out = model_class.Upper_Letter_Layer(feature_out)
  lower_out = model_class.Lower_Letter_Layer(feature_out)

  final_out = keras.layers.concatenate([num_out, upper_out, lower_out])
  result = tf.nn.softmax(final_out)

  model = keras.models.Model(inputs=[input], outputs=[result])
  return model

In [None]:
# Res_Dense 파라미터 세팅 실험을 위한 학습 함수
def sweep_for_res_dense(config_defaults=None):
    # Set default values
    test = ['relu', 2, [100, 50], [200, 100 , 50], [100, 50]]

    config_defaults = {
      'channel' : 32,
      'batch_size': 64,
      'activation': 'relu',
      'optimizer' : 'adam',
      'learning_rate' : 0.01,
      'feature_layer_depth' : 2,
      'num_layer_depth' : 2,
      'upper_layer_depth' : 2,
      'lower_layer_depth' : 2,
    }
    # Initialize wandb with a sample project name
    wandb.init(config=config_defaults)  # this gets over-written in the Sweep

    last_neuron_perlayer = [10, 26, 26]
    num_dense_per_layer = [wandb.config.num_layer_depth, wandb.config.upper_layer_depth, wandb.config.lower_layer_depth]
    num_flatten_layer = wandb.config.channel * wandb.config.feature_layer_depth
    nueron_list = []

    for last_nueron, num_dense in zip(last_neuron_perlayer, num_dense_per_layer):
      now_layer_nueron = []
      boundary = (num_flatten_layer - last_nueron)//(num_dense+1)
      now_nueron = num_flatten_layer
      for i in range(num_dense):
        now_nueron -= boundary
        now_layer_nueron.append(now_nueron)
      nueron_list.append(now_layer_nueron)

    print(nueron_list)

    # initialize model
    config_list = [
                    wandb.config.channel, wandb.config.activation, wandb.config.feature_layer_depth, 
                    nueron_list[0], nueron_list[1], nueron_list[2]
                   ]
    model = select_model(config_list)
    
    print(model.summary())

    # Instantiate an optimizer to train the model.
    optimizer = select_opt(wandb.config.optimizer, wandb.config.learning_rate)


    model.compile(loss="categorical_crossentropy",
                optimizer=optimizer,
                steps_per_execution=1000,
                metrics=["accuracy"])
    hist = model.fit(train_data, train_label, epochs=2, validation_data=(val_data, val_label))

In [None]:
# 변경할 파라미터
sweep_config = {
  'method': 'random', 
  'parameters': {
      
      'channel': {
          'values' : [16, 32, 64]
      },
      'optimizer': {
          'values' : ['adam', 'rmsprop', 'sgd', 'momentum', 'nesterov', 'adagrad', 'adamax', 'nadam']
      },
      'learning_rate':{
          'values' : [0.1, 0.05, 0.01, 0.005]
      },
      'feature_layer_depth' : {
          'values' : [1, 2, 3]
      },
      'num_layer_depth' : {
          'values' : [1, 2, 3]
      },
      'upper_layer_depth' : {
          'values' : [1, 2, 3]
      },
      'lower_layer_depth' : {
          'values' : [1, 2, 3]
      },
      'activation' : {
          'values' : ['relu', 'leacky_relu', 'elu']
      }
  }
}

In [None]:
# wandb 프로젝트 생성 및 실행
sweep_id = wandb.sweep(sweep_config, project="sweep_for_res_dense")
wandb.agent(sweep_id, function=sweep_for_res_dense, count=3)

Create sweep with ID: 8n9h3wc2
Sweep URL: https://wandb.ai/chanin-eom/sweep_for_res_dense/sweeps/8n9h3wc2


[34m[1mwandb[0m: Agent Starting Run: 6ijp4rbt with config:
[34m[1mwandb[0m: 	activation: leacky_relu
[34m[1mwandb[0m: 	channel: 16
[34m[1mwandb[0m: 	feature_layer_depth: 1
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	lower_layer_depth: 3
[34m[1mwandb[0m: 	num_layer_depth: 1
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	upper_layer_depth: 2


[[13], [20, 24], [19, 22, 25]]
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 28, 28, 1)]  0           []                               
                                                                                                  
 sequential (Sequential)        (None, 16)           10020       ['input_1[0][0]']                
                                                                                                  
 sequential_1 (Sequential)      (None, 10)           413         ['sequential[0][0]']             
                                                                                                  
 sequential_2 (Sequential)      (None, 26)           1670        ['sequential[0][0]']             
                                                               

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

[34m[1mwandb[0m: Agent Starting Run: dhnvn2j0 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	channel: 64
[34m[1mwandb[0m: 	feature_layer_depth: 3
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	lower_layer_depth: 1
[34m[1mwandb[0m: 	num_layer_depth: 1
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	upper_layer_depth: 2


[[101], [137, 82], [109]]
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 28, 28, 1)]  0           []                               
                                                                                                  
 sequential (Sequential)        (None, 256)          2781700     ['input_1[0][0]']                
                                                                                                  
 sequential_1 (Sequential)      (None, 10)           27381       ['sequential[0][0]']             
                                                                                                  
 sequential_2 (Sequential)      (None, 26)           49559       ['sequential[0][0]']             
                                                                    

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

[34m[1mwandb[0m: Agent Starting Run: tv3ihpc9 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	channel: 16
[34m[1mwandb[0m: 	feature_layer_depth: 2
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	lower_layer_depth: 3
[34m[1mwandb[0m: 	num_layer_depth: 1
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	upper_layer_depth: 1


[[21], [29], [31, 30, 29]]
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 28, 28, 1)]  0           []                               
                                                                                                  
 sequential (Sequential)        (None, 32)           43588       ['input_1[0][0]']                
                                                                                                  
 sequential_1 (Sequential)      (None, 10)           997         ['sequential[0][0]']             
                                                                                                  
 sequential_2 (Sequential)      (None, 26)           1853        ['sequential[0][0]']             
                                                                   

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

# Aux output(Domain prediction) + ResNet + Dense

Feature detection : 입력 이미지의 feature detect -> Resnet 구조

Domain predictor : 이미지의 도메인 (숫자, 대문자, 소문자) 예측

Result_predictor : 총 62개의 class(0-9, A-Z, a-z) 예측 -> 3개의 path로 분기 된 후 concatenate

In [17]:
# Aux output을 위한 label 값 생성
domain_train_label = np.array(sparse_train_label.copy())
domain_val_label = np.array(sparse_val_label.copy())

domain_train_label[domain_train_label<10] = 0
domain_train_label[(10<=domain_train_label)&(domain_train_label<36)] = 1
domain_train_label[36<=domain_train_label] = 2

domain_val_label[domain_val_label<10] = 0
domain_val_label[(10<=domain_val_label)&(domain_val_label<36)] = 1
domain_val_label[36<=domain_val_label] = 2

In [18]:
print(np.unique(domain_train_label))
print(np.unique(domain_val_label))

[0 1 2]
[0 1 2]


In [19]:
# domain label one hot 인코딩
domain_train_label = keras.utils.to_categorical(domain_train_label)
domain_val_label = keras.utils.to_categorical(domain_val_label)

In [20]:
print(domain_train_label.shape)
print(domain_val_label.shape)

(62000, 3)
(12400, 3)


In [21]:
# Aux_output(Domain_predict), Res(Feature detection)_Res(Domain_detection) 구조 모델 layer 설정 class
# wandb에 의해 선택적으로 각 layer를 변경할 수 있도록 설계
class Res_5x5_v4(keras.models.Model):

    def __init__(self, num_classes, config_list, **kwargs):
        """
            num_classes: number of classes in specific classification task.
        """
        super().__init__(**kwargs)

        self.input_ch, self.activation, self.feature_layer_depth, self.domain_nueron, self.dropout_rate,\
        self.num_layer_nueron, self.upper_layer_nueron, self.lower_layer_nueron, self.avgpool = config_list


        self.Global_Average_Pool_Layer = keras.layers.GlobalAveragePooling2D()
        self.Flatten_Layer = keras.layers.Flatten()

        self.Feature_Detection_Layer = self.feature_detection_layer()
        
        self.Domain_Layer = self.domain_layer()
        self.Number_Layer = self.number_layer()
        self.Upper_Letter_Layer = self.upper_letter_layer()
        self.Lower_Letter_Layer = self.lower_letter_layer()


        self.Prediction_Layer = keras.layers.Dense(num_classes, activation="softmax")

    def feature_detection_layer(self):
      feature_dec_layer = keras.models.Sequential()
      feature_dec_layer.add(keras.layers.BatchNormalization())
      feature_dec_layer.add(keras.layers.Conv2D(self.input_ch, (5, 5), strides=1, padding="same", kernel_initializer="he_normal"))
      feature_dec_layer.add(keras.layers.BatchNormalization())

      if self.avgpool == 'T':
        feature_dec_layer.add(keras.layers.AvgPool2D(pool_size=2, padding = 'same'))
      else:
        feature_dec_layer.add(keras.layers.MaxPool2D(pool_size=(2, 2), strides=1, padding="same"))

      feature_dec_layer.add(ResnetBlock(self.input_ch))
      feature_dec_layer.add(ResnetBlock(self.input_ch))
      
      for i in range(self.feature_layer_depth-1):
        feature_dec_layer.add(ResnetBlock(self.input_ch*(2*(i+1)), down_sample=True))
        feature_dec_layer.add(ResnetBlock(self.input_ch*(2*(i+1))))

      feature_dec_layer.add(self.Global_Average_Pool_Layer)
      feature_dec_layer.add(self.Flatten_Layer)

      return feature_dec_layer

    def domain_layer(self):
      num_layer = keras.models.Sequential(name = "domain_out")
      for i, num_nueron in enumerate(self.domain_nueron):
        num_layer.add(tf.keras.layers.Dense(num_nueron))
        num_layer.add(self.select_activation(self.activation))
        num_layer.add(keras.layers.BatchNormalization())
      
      num_layer.add(tf.keras.layers.Dropout(self.dropout_rate))
      num_layer.add(tf.keras.layers.Dense(3, activation = 'softmax'))

      return num_layer

    def number_layer(self):
      num_layer = keras.models.Sequential(name = "number_layer")
      for i, num_nueron in enumerate(self.num_layer_nueron):
        num_layer.add(tf.keras.layers.Dense(num_nueron))
        num_layer.add(self.select_activation(self.activation))
        num_layer.add(keras.layers.BatchNormalization())

      num_layer.add(tf.keras.layers.Dropout(self.dropout_rate))
      num_layer.add(tf.keras.layers.Dense(10, activation = None))

      return num_layer

    def upper_letter_layer(self):
      upper_layer = keras.models.Sequential(name = "upper_layer")
      for i, num_nueron in enumerate(self.upper_layer_nueron):
        upper_layer.add(tf.keras.layers.Dense(num_nueron))
        upper_layer.add(self.select_activation(self.activation))
        upper_layer.add(keras.layers.BatchNormalization())

      upper_layer.add(tf.keras.layers.Dropout(self.dropout_rate))
      upper_layer.add(tf.keras.layers.Dense(26, activation = None))
      
      return upper_layer

    def lower_letter_layer(self):
      lower_layer = keras.models.Sequential(name = "lower_layer")

      for i, num_nueron in enumerate(self.lower_layer_nueron):
        lower_layer.add(tf.keras.layers.Dense(num_nueron))
        lower_layer.add(self.select_activation(self.activation))
        lower_layer.add(keras.layers.BatchNormalization())

      lower_layer.add(tf.keras.layers.Dropout(self.dropout_rate))
      lower_layer.add(tf.keras.layers.Dense(26, activation = None))

      return lower_layer

    def select_activation(self, function):
      if function == 'relu':
        activation = keras.layers.ReLU()
      elif function == 'prelu':
        activation = keras.layers.PReLU()
      elif function == 'leacky_relu':
        activation = keras.layers.LeakyReLU()
      elif function == 'elu':
        activation = keras.layers.ELU()
      else:
        activation = keras.layers.ReLU()
      return activation

In [22]:
# wandb에서 선택된 hyper parameter들의 배열을 입력으로 받아서
# 적절한 model을 선택하는 함수
def select_model(config_list):
  model_class = Res_5x5_v4(64, config_list)

  input = keras.layers.Input(shape=(28, 28, 1), name = "input_layer")

  feature_out = model_class.Feature_Detection_Layer(input)

  domain_result = model_class.Domain_Layer(feature_out)

  num_out = model_class.Number_Layer(feature_out)
  upper_out = model_class.Upper_Letter_Layer(feature_out)
  lower_out = model_class.Lower_Letter_Layer(feature_out)


  final_out = keras.layers.concatenate([num_out, upper_out, lower_out])
  result = tf.keras.layers.Softmax(name = 'main_out')(final_out)

  model = keras.models.Model(inputs=[input], outputs=[domain_result, result])
  return model

In [24]:
# aux_res_dense 모델의 파라미터 세팅 실험을 위한 학습 함수
def sweep_for_aux_res_dense(config_defaults=None):
    # Set default values

    early_stopping_cb = keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)

    config_defaults = {
      'channel' : 32,
      'batch_size': 64,
      'activation': 'relu',
      'optimizer' : 'adam',
      'learning_rate' : 0.01,
      'feature_layer_depth' : 2,
      'domain_layer_depth' : 2,
      'num_layer_depth' : 2,
      'upper_layer_depth' : 2,
      'lower_layer_depth' : 2,
      'domain_loss_rate' : 0.5,
      'avg_pool' : 'T',
    }
    # Initialize wandb with a sample project name
    wandb.init(config=config_defaults)  # this gets over-written in the Sweep

    last_neuron_perlayer = [3, 10, 26, 26]
    num_dense_per_layer = [wandb.config.domain_layer_depth, wandb.config.num_layer_depth, wandb.config.upper_layer_depth, wandb.config.lower_layer_depth]
    num_flatten_layer = wandb.config.channel * wandb.config.feature_layer_depth
    nueron_list = []

    for last_nueron, num_dense in zip(last_neuron_perlayer, num_dense_per_layer):
      now_layer_nueron = []
      boundary = (num_flatten_layer - last_nueron)//(num_dense+1)
      now_nueron = num_flatten_layer
      for i in range(num_dense):
        now_nueron -= boundary
        now_layer_nueron.append(now_nueron)
      nueron_list.append(now_layer_nueron)

    print(nueron_list)

    # initialize model

    # test = [64, 'relu', 2, [50], 0.5, [100, 50], [200, 100 , 50], [100, 50]]

    config_list = [
                    wandb.config.channel, wandb.config.activation, wandb.config.feature_layer_depth, 
                    nueron_list[0], wandb.config.dropout_rate, nueron_list[1], nueron_list[2], nueron_list[3], wandb.config.avg_pool
                   ]
              
    model = select_model(config_list)
    optimizer = select_opt(wandb.config.optimizer, wandb.config.learning_rate)
    loss_rate = [wandb.config.domain_loss_rate , 1-wandb.config.domain_loss_rate]
    print(loss_rate)
    
    print(model.summary())

    # Instantiate an optimizer to train the model.


    model.compile(loss=["categorical_crossentropy", "categorical_crossentropy"], loss_weights=loss_rate,
                
                  optimizer=optimizer,

                  metrics=["accuracy",],
                   run_eagerly=True
                  )
    
    hist = model.fit(train_data, [domain_train_label, train_label], epochs=2, 
                     validation_data=(val_data, [domain_val_label, val_label]))

In [25]:
# 변경할 파라미터
sweep_config = {
  'method': 'random', 
  'parameters': {
      
      'channel': {
          'values' : [16, 32, 64]
      },
      'optimizer': {
          'values' : ['adam', 'rmsprop', 'sgd', 'momentum', 'nadam']
      },
      'learning_rate':{
          'values' : [0.05, 0.01, 0.005]
      },
      'feature_layer_depth' : {
          'values' : [1, 2, 3]
      },
      'domain_layer_depth' : {
          'values' : [1, 2, 3]
      },
      'num_layer_depth' : {
          'values' : [1, 2, 3]
      },
      'upper_layer_depth' : {
          'values' : [1, 2, 3]
      },
      'lower_layer_depth' : {
          'values' : [1, 2, 3]
      },
      'activation' : {
          'values' : ['relu', 'leacky_relu', 'elu']
      },
      'domain_loss_rate' : {
          'values' : [0.3, 0.4, 0.5, 0.6]
      },
      'dropout_rate' : {
          'values' : [0.4, 0.5, 0.6]
      },
      'avg_pool' : {
          'values' : ['T','F']
      }
  }
}

In [27]:
# wandb 프로젝트 생성 및 실행
sweep_id = wandb.sweep(sweep_config, project="sweep_for_aux_res_dense")
wandb.agent(sweep_id, function=sweep_for_aux_res_dense, count=3)

Create sweep with ID: heagnmi9
Sweep URL: https://wandb.ai/chanin-eom/sweep_for_aux_res_dense/sweeps/heagnmi9


[34m[1mwandb[0m: Agent Starting Run: agkptw9i with config:
[34m[1mwandb[0m: 	activation: leacky_relu
[34m[1mwandb[0m: 	avg_pool: T
[34m[1mwandb[0m: 	channel: 32
[34m[1mwandb[0m: 	domain_layer_depth: 2
[34m[1mwandb[0m: 	domain_loss_rate: 0.5
[34m[1mwandb[0m: 	dropout_rate: 0.6
[34m[1mwandb[0m: 	feature_layer_depth: 2
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	lower_layer_depth: 1
[34m[1mwandb[0m: 	num_layer_depth: 2
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	upper_layer_depth: 2
[34m[1mwandb[0m: Currently logged in as: [33mchanin-eom[0m. Use [1m`wandb login --relogin`[0m to force relogin


[[44, 24], [46, 28], [52, 40], [45]]
Cause: mangled names are not yet supported
Cause: mangled names are not yet supported
[0.5, 0.5]
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_layer (InputLayer)       [(None, 28, 28, 1)]  0           []                               
                                                                                                  
 sequential (Sequential)        (None, 64)           171140      ['input_layer[0][0]']            
                                                                                                  
 number_layer (Sequential)      (None, 10)           4892        ['sequential[0][0]']             
                                                                                                  
 upper_layer (Sequential)       (None, 26)           6934  

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

[34m[1mwandb[0m: Agent Starting Run: der2pial with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	avg_pool: F
[34m[1mwandb[0m: 	channel: 64
[34m[1mwandb[0m: 	domain_layer_depth: 2
[34m[1mwandb[0m: 	domain_loss_rate: 0.6
[34m[1mwandb[0m: 	dropout_rate: 0.6
[34m[1mwandb[0m: 	feature_layer_depth: 2
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	lower_layer_depth: 2
[34m[1mwandb[0m: 	num_layer_depth: 1
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	upper_layer_depth: 1


[[87, 46], [69], [77], [94, 60]]
[0.6, 0.4]
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_layer (InputLayer)       [(None, 28, 28, 1)]  0           []                               
                                                                                                  
 sequential (Sequential)        (None, 128)          678148      ['input_layer[0][0]']            
                                                                                                  
 number_layer (Sequential)      (None, 10)           9877        ['sequential[0][0]']             
                                                                                                  
 upper_layer (Sequential)       (None, 26)           12269       ['sequential[0][0]']             
                                                  

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

[34m[1mwandb[0m: Agent Starting Run: dz5g2uwe with config:
[34m[1mwandb[0m: 	activation: elu
[34m[1mwandb[0m: 	avg_pool: T
[34m[1mwandb[0m: 	channel: 64
[34m[1mwandb[0m: 	domain_layer_depth: 1
[34m[1mwandb[0m: 	domain_loss_rate: 0.6
[34m[1mwandb[0m: 	dropout_rate: 0.6
[34m[1mwandb[0m: 	feature_layer_depth: 2
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	lower_layer_depth: 1
[34m[1mwandb[0m: 	num_layer_depth: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	upper_layer_depth: 1


[[66], [99, 70, 41], [77], [77]]
[0.6, 0.4]
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_layer (InputLayer)       [(None, 28, 28, 1)]  0           []                               
                                                                                                  
 sequential (Sequential)        (None, 128)          678148      ['input_layer[0][0]']            
                                                                                                  
 number_layer (Sequential)      (None, 10)           23942       ['sequential[0][0]']             
                                                                                                  
 upper_layer (Sequential)       (None, 26)           12269       ['sequential[0][0]']             
                                                  

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…