In [1]:
from tensorflow import keras
(train_input, train_target), (test_input, test_target) = keras.datasets.fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [2]:
from sklearn.model_selection import train_test_split

train_scaled = train_input / 255.0
train_scaled = train_scaled.reshape(-1,28*28)
train_scaled, val_scaled, train_target, val_target = train_test_split(train_scaled, train_target, test_size=0.2, random_state=42)

In [3]:
dense1 = keras.layers.Dense(100, activation='sigmoid', input_shape=(784,)) # 은닉층
dense2 = keras.layers.Dense(10, activation='softmax') # 출력층

신경망 모델 생성

In [5]:
model = keras.Sequential([dense1, dense2])

In [6]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 100)               78500     
                                                                 
 dense_1 (Dense)             (None, 10)                1010      
                                                                 
Total params: 79,510
Trainable params: 79,510
Non-trainable params: 0
_________________________________________________________________


In [7]:
model = keras.Sequential([
  keras.layers.Dense(100, activation='sigmoid', input_shape=(784,), name='hidden'),
  keras.layers.Dense(10, activation='softmax', name='output')
], name='Fashion MNIST Model')

In [8]:
model.summary()

Model: "Fashion MNIST Model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 hidden (Dense)              (None, 100)               78500     
                                                                 
 output (Dense)              (None, 10)                1010      
                                                                 
Total params: 79,510
Trainable params: 79,510
Non-trainable params: 0
_________________________________________________________________


In [16]:
model = keras.Sequential(name='MNIST_Model')
model.add(keras.layers.Dense(100, activation='sigmoid', input_shape=(784,), name='hidden'))
model.add(keras.layers.Dense(10, activation='softmax', name='output'))

In [17]:
model.summary()

Model: "MNIST_Model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 hidden (Dense)              (None, 100)               78500     
                                                                 
 output (Dense)              (None, 10)                1010      
                                                                 
Total params: 79,510
Trainable params: 79,510
Non-trainable params: 0
_________________________________________________________________


모델 compile 하기

In [18]:
model.compile(loss='sparse_categorical_crossentropy', metrics='accuracy')

모델 훈련하기

In [19]:
model.fit(train_scaled, train_target, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f926b53a7d0>

ReLU함수

In [20]:
model = keras.Sequential()
model.add(keras.layers.Flatten(input_shape=(28,28)))
model.add(keras.layers.Dense(100, activation='relu'))
model.add(keras.layers.Dense(10, activation='softmax'))

In [25]:
(train_input, train_target), (test_input, test_target) = keras.datasets.fashion_mnist.load_data()

train_scaled = train_input / 255.0
train_scaled, val_scaled, train_target, val_target = train_test_split(train_scaled, train_target, test_size=0.2, random_state=42)

In [26]:
model.compile(loss='sparse_categorical_crossentropy', metrics='accuracy')
model.fit(train_scaled, train_target, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f926cfe6390>

In [28]:
model.evaluate(val_scaled, val_target)



[0.35471245646476746, 0.8791666626930237]

In [29]:
model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics='accuracy') # 미니배치 경사하강법을 사용

In [31]:
sgd = keras.optimizers.SGD()
model.compile(optimizer=sgd, loss='sparse_categorical_crossentropy', metrics='accuracy')

In [32]:
sgd = keras.optimizers.SGD(learning_rate=0.1)

In [34]:
sgd = keras.optimizers.SGD(momentum=0.9, nesterov=True) # nesterov=True 네스테로프 모멘텀 최적화(네스트로프 가속 경사)

- 네스테로프 모멘텀은 모멘텀 최적화를 2번 반복하여 구현
- 대부분의 경우 네스테로프 모멘텀 최적화가 기본 확률적 경사 하강법보다 더 나은 성능을 제공

모델이 최적점에 가까이 갈수록 학습률을 낮출 수 있고 이렇게 하면 안정적으로 최적점에 수렴할 가능성이 높아진다. 

이러한 학습률을 **`적응적 학습률`**이라고 한다.
- 학습률 매개변수를 튜닝하는 수고를 덜 수 있다.

### Adagrad & RMSprop
- 적응적 학습률을 사용하는 대표적인 옵티마이저
- 기본값으로 learning_raete = 0.001

In [35]:
adagrad = keras.optimizers.Adagrad()
model.compile(optimizer=adagrad, loss='sparse_categorical_crossentropy', metrics='accuraccy')

In [36]:
rmsprop = keras.optimizers.RMSprop()
model.compile(optimizer=rmsprop, loss='sparse_categorical_crossentropy', metrics='accuracy')

### Adam
- 모멘텀 최적화와 RMSprop의 장점을 접목한 옵티마이저
- 기본 학습률 0.001

In [37]:
model = keras.Sequential()
model.add(keras.layers.Flatten(input_shape=(28,28)))
model.add(keras.layers.Dense(100, activation='relu'))
model.add(keras.layers.Dense(10, activation='softmax'))

In [40]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics='accuracy')
model.fit(train_scaled, train_target, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f926a3f2810>

In [41]:
model.evaluate(val_scaled, val_target)



[0.3470974564552307, 0.875]