# 무작위 탐색을 통한 하이퍼파라미터 튜닝

## 데이터 만들기

In [18]:
import tensorflow as tf
import numpy as np

In [19]:
x = np.random.normal(size=(1000, 20))

In [20]:
x.shape

(1000, 20)

In [21]:
y = np.where(np.abs(x.sum(axis=1)) > 1, 1, 0)

## 데이터 분할

In [22]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1234)


In [23]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(16, activation = 'relu')) # 은닉층
model.add(tf.keras.layers.Dense(1, activation = 'sigmoid'))

In [24]:
model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.001), loss = 'binary_crossentropy', metrics = ['accuracy'])


In [26]:
checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath='model', monitor='val_accuracy', save_best_only=True)
history = model.fit(x_train, y_train, epochs=10, validation_split=0.2, callbacks=[checkpoint])


Epoch 1/10
INFO:tensorflow:Assets written to: model\assets
Epoch 2/10
INFO:tensorflow:Assets written to: model\assets
Epoch 3/10
INFO:tensorflow:Assets written to: model\assets
Epoch 4/10
INFO:tensorflow:Assets written to: model\assets
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [27]:
max(history.history['val_accuracy'])

0.824999988079071

## 모형

- 모형을 만들고 학습시키는 과정을 함수로 작성

- f'model{i:02d}': 항상 두자리가 되도록 00, 01, 02

In [34]:
def experiment(i, h_size, num_hidden, activation, lr):
    model = tf.keras.Sequential()
    for _ in range(num_hidden):
        model.add(tf.keras.layers.Dense(h_size, activation=activation))
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr), loss='binary_crossentropy', metrics=['accuracy'])
    
    checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=f'model{i:02d}', monitor='val_accuracy', save_best_only=True)
    history = model.fit(x_train, y_train, epochs=10, validation_split=0.2, callbacks=[checkpoint]) # , verbose=0
    
    return max(history.history['val_accuracy'])



## 랜덤 탐색

In [35]:
results = []
for i in range(3):
    h_sizes = [2, 4, 8, 16]
    h_size = np.random.choice(h_sizes)
    num_hidden = np.random.randint(0, 3)
    activation = np.random.choice(['relu', 'tanh'])
    lr = 10 ** np.random.randint(-5, 0)

    val_accuracy = experiment(i, h_size, num_hidden, activation, lr)
    results.append({
        'val_accuracy': val_accuracy,
        'h_size': h_size,
        'num_hidden': num_hidden,
        'activation': activation,
        'lr': lr
    })

Epoch 1/10
INFO:tensorflow:Assets written to: model00\assets
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
INFO:tensorflow:Assets written to: model01\assets
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
INFO:tensorflow:Assets written to: model01\assets
Epoch 6/10
INFO:tensorflow:Assets written to: model01\assets
Epoch 7/10
Epoch 8/10
INFO:tensorflow:Assets written to: model01\assets
Epoch 9/10
INFO:tensorflow:Assets written to: model01\assets
Epoch 10/10
Epoch 1/10
INFO:tensorflow:Assets written to: model02\assets
Epoch 2/10
Epoch 3/10
Epoch 4/10
INFO:tensorflow:Assets written to: model02\assets
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: model02\assets


In [36]:
results

[{'val_accuracy': 0.824999988079071,
  'h_size': 4,
  'num_hidden': 2,
  'activation': 'tanh',
  'lr': 0.1},
 {'val_accuracy': 0.9312499761581421,
  'h_size': 4,
  'num_hidden': 1,
  'activation': 'relu',
  'lr': 0.1},
 {'val_accuracy': 0.824999988079071,
  'h_size': 8,
  'num_hidden': 1,
  'activation': 'relu',
  'lr': 0.001}]

## 가장 좋은 모형 불러오기

In [37]:
model = tf.keras.models.load_model('model02')

## 테스트

In [38]:
model.evaluate(x_test, y_test)



[0.49019360542297363, 0.8399999737739563]

# TF에서 레이블 스무딩

- 레이블 스무딩을 하려면 먼저 y_test는 one-hot encoding이 되어 있어야 한다. 그 후에 다음 2가지 방법 중에 하나를 사용한다.

- 직접 계산
- CategoricalCrossentropy에 옵션 설정

## 직접 계산

- 직접 계산을 하는 방법은 먼저 스무딩할 정도를 정한다.

In [39]:
e = 0.2

- 다음으로 카테고리의 수를 변수에 할당한다.

In [40]:
n = 20

- 마지막으로 아래 공식에 따라 y를 직접 계산한다.

In [41]:
y_smooth = y_train * (1 - e) + (e / n)

학습을 할 때는 y_train 대신 y_smooth를 사용한다.

## 옵션 설정

model.compile에서 loss를 다음과 같이 설정한다.

In [43]:
model.compile(
    loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.2),
    ...)

SyntaxError: positional argument follows keyword argument (<ipython-input-43-fa8215f457f5>, line 3)

y_train으로 학습을 하면 손실을 계산할 때 레이블 스무딩이 적용된다.

# 다항분류 전처리

- 20newsgroups 데이터를 이용해 문서분류 실습을 진행한다. 
- 이 데이터는 20개의 뉴스그룹(일종의 주제별 게시판)의 글들을 모은 것이다.

## 데이터 불러들이기

- scikit-learn 패키지의 fetch_20newsgroups 함수를 이용해 데이터를 받을 수 있다. 
- 데이터는 미리 훈련용과 테스트용이 나뉘어져 있다.

In [45]:
from sklearn.datasets import fetch_20newsgroups

In [46]:
train = fetch_20newsgroups(subset='train')
test = fetch_20newsgroups(subset='test')

데이터를 확인한다.

In [47]:
print(train.data[0])

From: lerxst@wam.umd.edu (where's my thing)
Subject: WHAT car is this!?
Nntp-Posting-Host: rac3.wam.umd.edu
Organization: University of Maryland, College Park
Lines: 15

 I was wondering if anyone out there could enlighten me on this car I saw
the other day. It was a 2-door sports car, looked to be from the late 60s/
early 70s. It was called a Bricklin. The doors were really small. In addition,
the front bumper was separate from the rest of the body. This is 
all I know. If anyone can tellme a model name, engine specs, years
of production, where this car is made, history, or whatever info you
have on this funky looking car, please e-mail.

Thanks,
- IL
   ---- brought to you by your neighborhood Lerxst ----







레이블을 확인한다.

In [48]:
train.target[0]

7

레이블의 번호가 어떤 뉴스그룹을 가리키는지는 train.target_names에서 볼 수 있다.

In [49]:
train.target_names

['alt.atheism',
 'comp.graphics',
 'comp.os.ms-windows.misc',
 'comp.sys.ibm.pc.hardware',
 'comp.sys.mac.hardware',
 'comp.windows.x',
 'misc.forsale',
 'rec.autos',
 'rec.motorcycles',
 'rec.sport.baseball',
 'rec.sport.hockey',
 'sci.crypt',
 'sci.electronics',
 'sci.med',
 'sci.space',
 'soc.religion.christian',
 'talk.politics.guns',
 'talk.politics.mideast',
 'talk.politics.misc',
 'talk.religion.misc']

## TF-IDF

텍스트를 TF-IDF 단어문서행렬로 만든다.

In [50]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [51]:
tfidf = TfidfVectorizer(stop_words='english', max_features=10000)

In [52]:
x_train = tfidf.fit_transform(train.data)

데이터의 모양을 확인한다.

In [53]:
x_train.shape

(11314, 10000)

train.target을 one-hot encoding한다.

In [54]:
from sklearn.preprocessing import LabelBinarizer
lb = LabelBinarizer()
y_train = lb.fit_transform(train.target) 

원래 값과 one-hot encoding된 결과를 비교해본다.

In [55]:
train.target[:5]

array([ 7,  4,  4,  1, 14])

In [56]:
y_train[:5] # 20개 중에 하나이므로, 19개가 0, 7번만 1

array([[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]])

테스트 데이터에도 동일한 처리를 한다.

In [57]:
x_test = tfidf.transform(test.data)
y_test = lb.transform(test.target)

## 저장

In [58]:
import joblib

joblib.dump(
    {
        'tfidf': tfidf,
        'target_names': train.target_names,
        'x_train': x_train,
        'y_train': y_train,    
        'x_test': x_test,
        'y_test': y_test,
    },
    'newsgroup.pkl')

['newsgroup.pkl']

# 다항분류 및 Dropout과 BN 실습

In [59]:
import joblib
data = joblib.load('newsgroup.pkl')
locals().update(data)

## 단층 신경망 모형

In [112]:
import tensorflow as tf

In [113]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(20, activation='softmax')
]) # 뉴스그룹이 20개중 하나, activation은 softmax로
# 입력의 형태가 정해지지 않았으므로, model.summary는 에러바생

In [114]:
model.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(lr=0.01),
    metrics=['accuracy'])

In [115]:
model.fit(
    x_train.toarray(),
    y_train,
    epochs=30,
    validation_split=.1,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_accuracy')],
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30


<tensorflow.python.keras.callbacks.History at 0x20f1327de50>

In [116]:
model.summary()

Model: "sequential_25"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_57 (Dense)             (None, 20)                200020    
Total params: 200,020
Trainable params: 200,020
Non-trainable params: 0
_________________________________________________________________


## 계수

In [117]:
import pandas as pd

In [118]:
w, _ = model.weights

In [119]:
w.numpy()

array([[-2.9104254 , -0.94404   , -1.1166897 , ..., -0.6063509 ,
        -1.1134286 , -2.1443493 ],
       [ 0.24162254, -1.2161368 , -0.49712718, ...,  0.3653656 ,
         0.23280773, -0.6512136 ],
       [-0.4498055 , -0.7232601 , -0.5132127 , ..., -0.5773479 ,
        -0.6466029 , -0.5361678 ],
       ...,
       [-0.5223099 , -0.6723426 ,  0.18309747, ..., -0.60862654,
        -0.5157819 , -0.571886  ],
       [-1.119206  , -1.2560188 , -1.1056676 , ..., -0.94855416,
        -1.1914417 , -1.0675484 ],
       [-1.005452  , -0.93737483, -0.38409838, ..., -0.7879447 ,
        -0.8972243 , -0.8602842 ]], dtype=float32)

In [120]:
weights = pd.DataFrame(w.numpy(), columns=target_names, index=tfidf.get_feature_names())


In [121]:
weights.head() # +는 가능성이 크다, -는 가능성이 작다.

Unnamed: 0,alt.atheism,comp.graphics,comp.os.ms-windows.misc,comp.sys.ibm.pc.hardware,comp.sys.mac.hardware,comp.windows.x,misc.forsale,rec.autos,rec.motorcycles,rec.sport.baseball,rec.sport.hockey,sci.crypt,sci.electronics,sci.med,sci.space,soc.religion.christian,talk.politics.guns,talk.politics.mideast,talk.politics.misc,talk.religion.misc
0,-2.910425,-0.94404,-1.11669,-0.434633,-1.820769,0.067174,1.811589,-0.943654,-0.783432,0.59644,-0.280682,-1.94408,-0.973372,-1.693,-1.059113,-0.425391,-0.278748,-0.606351,-1.113429,-2.144349
0,0.241623,-1.216137,-0.497127,-3.914483,-2.7018,-1.433206,0.630437,0.064944,-1.190411,0.629433,-1.295463,-1.224286,-0.207217,-1.461632,0.630078,-2.266793,0.354327,0.365366,0.232808,-0.651214
5,-0.449805,-0.72326,-0.513213,-0.566408,-0.578435,-0.378389,-0.535255,-0.491466,-0.693897,-0.740143,0.512901,-0.560832,0.284254,-0.785443,-0.768814,-0.702017,-0.397881,-0.577348,-0.646603,-0.536168
1,0.126227,0.359794,-3.075186,-0.047741,-0.319145,-1.551994,-0.899924,-1.05717,-0.994673,0.681395,-2.20718,0.63103,-0.951239,-1.231813,-0.769745,1.244427,-0.252249,-0.905358,-1.898649,-0.350859
2,0.033861,-1.590259,-0.151439,0.09533,-1.287629,0.087044,-0.504054,-0.416667,-0.860595,0.091072,-1.749259,-0.486391,0.467781,-0.852261,-0.134095,0.805853,-2.355234,-1.302598,0.126834,0.608218


In [122]:
target_names

['alt.atheism',
 'comp.graphics',
 'comp.os.ms-windows.misc',
 'comp.sys.ibm.pc.hardware',
 'comp.sys.mac.hardware',
 'comp.windows.x',
 'misc.forsale',
 'rec.autos',
 'rec.motorcycles',
 'rec.sport.baseball',
 'rec.sport.hockey',
 'sci.crypt',
 'sci.electronics',
 'sci.med',
 'sci.space',
 'soc.religion.christian',
 'talk.politics.guns',
 'talk.politics.mideast',
 'talk.politics.misc',
 'talk.religion.misc']

In [125]:
column = 'sci.space' # 과학, 우주에 관련된 게시판
weights.sort_values(column)[[column]].tail(10)

Unnamed: 0,sci.space
nasa,1.811197
orbital,1.939048
prb,1.973615
shuttle,1.998683
launch,2.160031
moon,2.19886
sci,2.289251
earth,2.333266
orbit,2.7691
space,3.624901


In [124]:
weights.sort_values(column, ascending=False)[[column]]

Unnamed: 0,sci.space
space,3.624901
orbit,2.769100
earth,2.333266
sci,2.289251
moon,2.198860
...,...
cars,-3.734363
bought,-3.752234
hp,-3.838088
windows,-4.166755


## 다층 신경망 모형

Dropout 레이어를 넣은 다층신경망 모형이다.

In [126]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, 'relu'), # 은닉층 64개의 출력을 받음
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(64, 'relu'),    
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(20, activation='softmax') # 최종 20개의 출력
])

Batch Normalization 레이어를 넣은 다층신경망 모형이다.

In [127]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, 'relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(64, 'relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(20, activation='softmax')
])

만약 활성화 함수를 적용하기 전에 batch normalization을 적용하려면 다음과 같이 한다.

In [97]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'), # activation만 하는 layer
    tf.keras.layers.Dense(64),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.Dense(20, activation='softmax')
])

## sparse_categorical_crossentropy

텐서플로에서는 원핫인코딩을 하지 않아도 다항분류를 할 수 있다. 먼저 원핫인코딩 되기 이전의 형태로 되돌린다.

In [98]:
import numpy as np

벡터 
(
0
,
1
,
2
,
⋯
,
18
,
19
)
를 곱하면 각 행에서 1이 있는 위치의 값만 남는다.

In [99]:
train_target = y_train @ np.arange(20)

또는 각 행에서 가장 큰 값의 위치를 찾아도 된다.

In [100]:
train_target = np.argmax(y_train, axis=1)

원핫인코딩 이전의 형태로 되돌렸다.

In [101]:
train_target[:5]

array([ 7,  4,  4,  1, 14], dtype=int64)

손실 함수를 sparse_categorical_crossentropy로 바꾼다.

In [102]:
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(lr=0.1),
    metrics=['accuracy'])

y_train 대신 train_target으로 학습을 진행한다.

In [103]:
model.fit(
    x_train.toarray(),
    train_target,
    epochs=30,
    validation_split=.1,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_accuracy')],
)

Epoch 1/30
Epoch 2/30
Epoch 3/30


<tensorflow.python.keras.callbacks.History at 0x20f0d55e130>

## label smoothing

레이블 스무딩을 하는 방법에는 두 가지가 있다. 하나는 데이터를 스무딩된 형태로 바꾸는 것이다.

In [104]:
e = 0.2

In [105]:
y_smooth = y_train * (1 - e) + (e / 20)
y_smooth

array([[0.01, 0.01, 0.01, ..., 0.01, 0.01, 0.01],
       [0.01, 0.01, 0.01, ..., 0.01, 0.01, 0.01],
       [0.01, 0.01, 0.01, ..., 0.01, 0.01, 0.01],
       ...,
       [0.01, 0.01, 0.01, ..., 0.01, 0.01, 0.01],
       [0.01, 0.81, 0.01, ..., 0.01, 0.01, 0.01],
       [0.01, 0.01, 0.01, ..., 0.01, 0.01, 0.01]])

학습을 시킬 때 y_train 대신 y_smooth를 사용하면 된다.

또는 손실 함수 CategoricalCrossentropy에서 label_smoothing을 설정해주어도 된다.

In [106]:
model.compile(
    loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.2),
    optimizer='adam',
    metrics=['accuracy'])