# MLP(다층 퍼셉트론), 딥러닝, 심층신경망(DNN)에서 활성화 함수로서의 시그모이드 함수]

## 오차 역전파 과정에서 기울기 소실 문제 발생

#**RELU 함수 사용**

#은닉층을 추가한 MLP

In [1]:
from tensorflow import keras
import numpy as np

## 데이터 수집
(train_input,train_target), (test_input,test_target) = keras.datasets.fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [2]:
print(train_input.shape)
print(np.unique(train_target, return_counts=True))

(60000, 28, 28)
(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8), array([6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000]))


In [3]:
# 데이터 전처리(정규화)
train_scaled = train_input / 255
train_scaled = train_scaled.reshape(-1,28*28)

print(train_scaled.shape)

(60000, 784)


In [4]:
from sklearn.model_selection import train_test_split
# 데이터 전처리( 데이터 분할 )
train_scaled, val_scaled, train_target, val_target = train_test_split(
    train_scaled, train_target, test_size = 0.2, random_state=42
)

print(train_target.shape)
print(train_scaled.shape)

print(val_target.shape)
print(val_scaled.shape)

(48000,)
(48000, 784)
(12000,)
(12000, 784)


In [5]:
# dense1 layer: 활성함수: sigmoid, 입력층을 포함,, 노드 100개로 구성한다.
dense1 = keras.layers.Dense(100,activation="sigmoid",input_shape = (784,)) #은닉층

# dense2 layer: 활성함수: softmax, 출력 노드 10개로 구성
dense2 = keras.layers.Dense(10,activation='softmax')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


# 심층 신경망 구축(DNN)

In [6]:
model = keras.Sequential([dense1, dense2])
 # keras.Sequential 은 keras.models.Sequential과 같다(모듈간 재노출)

In [7]:
model.summary()

In [8]:
## 모델 컴파일
model.compile(optimizer= 'rmsprop', loss = "sparse_categorical_crossentropy",metrics=['accuracy'])

In [9]:
## 모델 훈련
model.fit(train_scaled, train_target, epochs=5, batch_size = 32)

Epoch 1/5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.7592 - loss: 0.7631
Epoch 2/5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.8489 - loss: 0.4219
Epoch 3/5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - accuracy: 0.8603 - loss: 0.3830
Epoch 4/5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8677 - loss: 0.3620
Epoch 5/5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8781 - loss: 0.3366


<keras.src.callbacks.history.History at 0x7a05ad0472b0>

In [10]:
# 검증 세트로 성능 확인
model.evaluate(val_scaled, val_target)

[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.8739 - loss: 0.3493


[0.35282188653945923, 0.871833324432373]

# Fatten층과 relu 활성함수를 사용한 버전
## Flatten은 퍼셉트론 층은 아니다
# 모델 파라미터를 학습하는 층이 퍼셉트론 층으로 인정

In [11]:
(train_input ,train_target), (test_input, test_target) = keras.datasets.fashion_mnist.load_data()

train_scaled = train_input / 255

train_scaled_nflat, val_scaled, train_target_nflat, val_target = train_test_split(
    train_scaled, train_target, test_size=0.2, random_state = 42
)

In [12]:
model = keras.Sequential()

model.add(keras.Input(shape=(28,28)))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(100, activation='relu'))
model.add(keras.layers.Dense(10,activation='softmax'))

In [13]:
model.summary()

In [14]:
model.compile(optimizer = 'rmsprop', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

In [15]:
# 모델 훈련 에포크와 배치 사이즈 결정
model.fit(train_scaled, train_target,epochs=5)

Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.7758 - loss: 0.6440
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.8577 - loss: 0.3945
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - accuracy: 0.8728 - loss: 0.3550
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.8815 - loss: 0.3283
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.8848 - loss: 0.3162


<keras.src.callbacks.history.History at 0x7a059131b760>

In [16]:
# 모델 검증
model.evaluate(val_scaled, val_target)

[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8889 - loss: 0.3020


[0.3059512674808502, 0.8879166841506958]

# optimizer를 adam으로 사용한 버전 (momentum+rmsprop)

In [17]:
model.compile(optimizer='adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

model.fit(train_scaled, train_target, epochs=5)

Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.8908 - loss: 0.2996
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8963 - loss: 0.2859
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9004 - loss: 0.2690
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9044 - loss: 0.2578
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9092 - loss: 0.2485


<keras.src.callbacks.history.History at 0x7a05920169e0>

In [18]:
model.evaluate(val_scaled, val_target)

[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9085 - loss: 0.2345


[0.23843277990818024, 0.909416675567627]

#  모델을 구축하는 다른 방법

In [20]:
# 방법 2.

model = keras.Sequential([
    keras.layers.Dense(100,activation='sigmoid',input_shape = (784,),name='hidden'),
    keras.layers.Dense(10, activation = 'softmax', name = 'output')
], name = "FashionMNIST model")

In [21]:
model.summary()

In [22]:
from keras import layers, models
model = models.Sequential()
model.add(layers.Dense(100,activation='sigmoid',input_shape=(784,)))
model.add(layers.Dense(10,activation='softmax'))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [23]:
model.summary()

In [26]:
flatten = keras.layers.Flatten(input_shape = (28,28)) # 입력층을 합친다

dense1 = keras.layers.Dense(100, activation = 'relu')

dense2 = keras.layers.Dense(10, activation = 'softmax')

model = keras.Sequential([flatten,dense1,dense2])

  super().__init__(**kwargs)


In [25]:
model.summary()