# 1. import
데이터 분석 및 딥러닝에 사용될 라이브러리를 import 한다.

In [None]:
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from tensorflow.keras.datasets import fashion_mnist

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import seaborn as sns

import pandas as pd
import numpy as np

## 2. Data 가져오기
분석할 데이터(Fashion Mnist)를 가져온다.

In [None]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [None]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

In [None]:
print(pd.DataFrame(x_train[1]))

In [None]:
item = {
      0: 'T-shirt/top'
    , 1: 'Trouser'
    , 2: 'Pullover'
    , 3: 'Dress'
    , 4: 'Coat'
    , 5: 'Sandal'
    , 6: 'Shirt'
    , 7: 'Sneaker'
    , 8: 'Bag'
    , 9: 'Ankle boot'
}

## 3. EDA(Exploratory Data Analysis)
Fashion Mnist 데이터에 대한 분석을 진행합니다.

Label Count

In [None]:
plt.figure(figsize=(10, 3))

plt.subplot(1, 2, 1)
sns.countplot(y_train, palette=['#' + ('{}'.format(i))*6 for i in range(10)])
plt.title('train labels count')

plt.subplot(1, 2, 2)
sns.countplot(y_test, palette=['#fb'+ ('{}'.format(i))*4 for i in range(10)])
plt.title('test labels count')

plt.plot()

Train Image

In [None]:
plt.figure(figsize=(12, 12))
for i in range(36):
    plt.subplot(6, 6, i+1)
    plt.suptitle('Train Images', fontsize=20)
    plt.title(item[y_train[i]])
    plt.imshow(x_train[i], cmap=plt.cm.gray)
    plt.axis("off")

plt.show()

Test Image

In [None]:
plt.figure(figsize=(12, 12))
for i in range(36):
    plt.subplot(6, 6, i+1)
    plt.suptitle('Test Images', fontsize=20)
    plt.title(item[y_test[i]])
    plt.imshow(x_test[i], cmap=plt.cm.gray)
    plt.axis("off")

plt.show()

## 4. 전처리(Preprocessing)
CNN(Convolution Neural Network)에 데이터를 입력하기 전, 전처리를 진행합니다.

In [None]:
x_train = x_train.reshape(-1, 28, 28, 1)
x_train = x_train.astype('float32')
x_train = x_train/255

x_test = x_test.reshape(-1, 28, 28, 1)
x_test = x_test.astype('float32')
x_test = x_test/255

In [None]:
y_onehot_train = to_categorical(y_train, num_classes=10)
y_onehot_test = to_categorical(y_test, num_classes=10)

In [None]:
for i in [x_train, y_onehot_train, x_test, y_onehot_test]:
    print(i.shape)

## 5. 모델 생성
케라스를 사용하여 신경망을 구성한다. 

Model Parameters

In [None]:
INPUT_SHAPE = (28, 28, 1)
OUTPUT_SHAPE = 10
BATCH_SIZE = 128
EPOCHS = 10
VERBOSE = 1

Model

In [None]:
model = Sequential([
  Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=INPUT_SHAPE),
  MaxPooling2D(pool_size=(2, 2)),

  Flatten(),

  Dense(128, activation='relu'),
  Dropout(0.25),

  Dense(64, activation='relu'),
  Dropout(0.25),

  Dense(10, activation='softmax')
])

Compile Model

In [None]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

Model Summary

In [None]:
model.summary()

Model Fitting

In [None]:
history = model.fit(
    x_train, y_onehot_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    verbose=VERBOSE,
    validation_split=0.3
)

Accuracy and Loss

In [None]:
plt.figure(figsize=(14, 5))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')

plt.show()

*Evaluating* Model

In [None]:
model.evaluate(x_test, y_onehot_test)

Predicting on Test

In [None]:
y_pred_enc = model.predict(x_test)
y_pred = [np.argmax(i) for i in y_pred_enc]

In [None]:
fig, ax = plt.subplots(figsize=(18, 8))
for idx, row in enumerate(x_test[:8]):
    plt.subplot(2, 4, idx+1)
    plt.title(item[y_pred[idx]])
    img = row.reshape(28, 28)
    fig.suptitle("Predicted values", fontsize=20)
    plt.axis('off')
    plt.imshow(img, cmap=plt.cm.gray)

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))
sns.heatmap(
    confusion_matrix(y_test, y_pred), 
    annot=True, 
    cbar=False, 
    fmt='3d', 
    cmap='Blues', 
    ax=ax
)
ax.set_title(
    'Confusion Matrix', 
    loc='left', 
    fontsize=16
)
ax.set_xlabel('Predicted')
ax.set_ylabel('Actual')
ax.set_xticklabels(item.values())
ax.set_yticklabels(item.values(), rotation=0)

plt.show()