In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.datasets import fashion_mnist

In [None]:
(x_train, t_train), (x_test, t_test) = fashion_mnist.load_data()

print('\n train shape = ', x_train.shape, ', train label shape = ', t_train.shape)
print(' test shape = ', x_test.shape, ', test label shape =', t_test.shape)

print('\n train label = ', t_train)  # 학습데이터 정답 출력
print(' test label  = ', t_test)     # 테스트 데이터 정답 출력

In [None]:
import matplotlib.pyplot as plt

# 100개의 이미지 출력
plt.figure(figsize=(6, 8)) 

for index in range(25):    # 100 개 이미지 출력
    plt.subplot(5, 5, index + 1)  # 10행 10열 
    plt.imshow(x_train[index], cmap='gray') 
    plt.axis('off')   
    plt.title(str(t_train[index]))
plt.show()

- 0: T-shirt/top
- 1: Trouse
- 2: Pullover
- 3: Dress
- 4: Coat
- 5: Sandal
- 6: shirt
- 7: Sneaker
- 8: Bag
- 9: Ankle Boot

In [None]:
print(x_test[1])

In [None]:
plt.imshow(x_test[1], cmap='gray') 
plt.colorbar()   
plt.show()

In [None]:
plt.title('train label distribution')
plt.grid()
plt.xlabel('label')
plt.hist(t_train, bins=10, rwidth=0.8)
plt.show()

In [None]:
# 학습데이터 정답 분포 확인
label_distribution = np.zeros(10)

for idx in range(len(t_train)):
    label = int(t_train[idx])
    label_distribution[label] +=1 # label_distribution[label] + 1

print(label_distribution)

In [None]:
# 학습 데이터 / 테스트 데이터 정규화 (Normalization)
x_train = (x_train - 0.0) / (255.0 - 0.0)
x_test = (x_test - 0.0) / (255.0 - 0.0)
print(x_train[0])

In [None]:
# 정답 데이터 원핫 인코딩 (One-Hot Encoding)
#5,0
t_train = tf.keras.utils.to_categorical(t_train, num_classes=10) #5:[0,0,0,0,0,1,0,0,0,0]
t_test = tf.keras.utils.to_categorical(t_test, num_classes=10)

In [None]:
#print(x_train[0])
print(t_train[0])
print(t_test[0])

In [None]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten(input_shape=(28, 28))) #입력층
model.add(tf.keras.layers.Dense(100, activation='relu')) #은닉층
model.add(tf.keras.layers.Dense(10, activation='softmax')) #출력층

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
              loss='categorical_crossentropy', #분류할 것이 3개가지 이상이면 categorical_crossentropy을 사용
              metrics=['accuracy'])

model.summary()

In [None]:
#오버피팅을 확인하기 위해서 validation데이터를 할당함
hist = model.fit(x_train, t_train, epochs=30, validation_split=0.3) 

In [None]:
model.evaluate(x_test, t_test)

In [None]:
predictions=model.predict(x_test)

In [None]:
predictions[5]

In [None]:
#argmax를 사용하여 predictions[5]에서의 제일 큰 값을 받음
label=np.argmax(predictions[5])
label

In [None]:
plt.imshow(x_test[5], cmap='gray') 
plt.colorbar()   
plt.show()

In [None]:
plt.title('Loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.grid()

plt.plot(hist.history['loss'], label='train loss')
plt.plot(hist.history['val_loss'], label='validation loss')

plt.legend(loc='best')
plt.show()

In [None]:
plt.title('Accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.grid()

plt.plot(hist.history['accuracy'], label='train accuracy')
plt.plot(hist.history['val_accuracy'], label='validation accuracy')

plt.legend(loc='best')
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

In [None]:
plt.figure(figsize=(6, 6))

predicted_value = model.predict(x_test)

#confusion_matrix를 사용하면 정확하게 몇 개가 분류되었는지 확인할 수 있다.
cm = confusion_matrix(np.argmax(t_test, axis=-1),np.argmax(predicted_value, axis=-1))

sns.heatmap(cm, annot=True, fmt='d')
plt.show()