-----

# 0. 기존 작업

In [1]:
import tensorflow as tf
tf.random.set_seed(42)

In [2]:
from tensorflow.keras.datasets import fashion_mnist

(x, y), (x_test, y_test) = fashion_mnist.load_data()


from tensorflow.keras.utils import to_categorical

x = x.astype('float32') 
x_test = x_test.astype('float32')

x /= 255
x_test /= 255

y = to_categorical(y, 10)
y_test = to_categorical(y_test, 10)


from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import SGD

-----

In [3]:
lr = 0.01
momentum = 0.9

---

In [4]:
def create_model(learning_rate=lr, momentum = momentum):
    model = Sequential()
    model.add(Flatten(input_shape=(28, 28)))
    model.add(Dense(units=32, activation='relu'))
    model.add(Dense(64, 'relu'))
    model.add(Dense(128, 'relu'))
    model.add(Dense(units=10, activation='softmax'))
    
    sgd = SGD(learning_rate=learning_rate, momentum=momentum, nesterov=False)
    model.compile(optimizer=sgd, 
                  loss='categorical_crossentropy',
                 metrics=['accuracy'])

    return model

model = create_model()
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 32)                25120     
_________________________________________________________________
dense_1 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_2 (Dense)              (None, 128)               8320      
_________________________________________________________________
dense_3 (Dense)              (None, 10)                1290      
Total params: 36,842
Trainable params: 36,842
Non-trainable params: 0
_________________________________________________________________


----

# 1. schedule 함수 만들기

epoch의 값에 따라 learning_rate가 변하는 함수를 만들면 됩니다.

In [5]:
def my_schedule(epoch, learning_rate=lr):
    if epoch < 5:
        return lr
    else:
        return float(lr * tf.math.exp(0.1 * (5- epoch)))

In [6]:
from tensorflow.keras.callbacks import LearningRateScheduler

In [7]:
lr_schedule_custom = LearningRateScheduler(my_schedule)

In [8]:
model = create_model()

model.fit(x, y,  epochs = 20, validation_split = 1/6, callbacks = [lr_schedule_custom], batch_size=512, shuffle=False)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0xc8ab79d6d8>

# 2. tensorflow의 scheduler 사용

https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/schedules

**def** decayed_learning_rate(step):

  > return initial_learning_rate * decay_rate ^ (step / decay_steps)
  
  
`decay_steps` 마다 `decay_rate`의 비율로 감소

In [9]:
from tensorflow.keras.optimizers.schedules import ExponentialDecay

In [10]:
lr_scheduler_exp = ExponentialDecay(lr, decay_steps=10000, decay_rate=0.96, staircase=False, name=None)

In [11]:
def exp_model(learning_rate=lr, momentum = momentum):
    model = Sequential()
    model.add(Flatten(input_shape=(28, 28)))
    model.add(Dense(units=32, activation='relu'))
    model.add(Dense(64, 'relu'))
    model.add(Dense(128, 'relu'))
    model.add(Dense(units=10, activation='softmax'))

    return model

In [12]:
model = exp_model()
sgd = SGD(learning_rate=lr_scheduler_exp, momentum=momentum, nesterov=False)
model.compile(optimizer=sgd, 
                  loss='categorical_crossentropy',
                 metrics=['accuracy'])


model.fit(x, y,  epochs = 20, validation_split = 1/6, batch_size=512)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0xc8ac2f6a90>

In [13]:
model.optimizer.lr(100000) # 0.96^10

<tf.Tensor: shape=(), dtype=float32, numpy=0.0066483244>

# 3. ReduceLRonPlateau

In [14]:
#plateau: 학습개선이 없는 상태 
from tensorflow.keras.callbacks import ReduceLROnPlateau

In [15]:
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, verbose=1)

#factor: Plateau 상태일때 0.1씩 decrease

In [16]:
model = create_model()

model.fit(x, y,  epochs = 20, validation_split = 1/6, callbacks = [lr_schedule_custom, reduce_lr], batch_size=512, shuffle=False)

Train on 50000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.0006065306719392539.
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x24a8e276548>