# 텐서플로우 2.x 내 이미지 분류

## 빅데이터

### 이영석, 문현수

#### munhyunsu@cs-cnu.org

#### 참고자료
- [파이썬 3 표준 문서](https://docs.python.org/3/index.html)
- [텐서플로우 이미지 분류](https://www.tensorflow.org/tutorials/keras/classification)

### 1. 도구 불러오기 및 버전 확인

In [None]:
# 도구 준비
import os
import shutil
import pickle

import tensorflow as tf # 텐서플로우
import matplotlib.pyplot as plt # 시각화 도구
%matplotlib inline
import numpy as np

import PIL.Image as Image

In [None]:
print(f'Tensorflow 버전을 확인합니다: {tf.__version__}')

### Optional. 학습데이터 처리
- CIFAR 10 이미지를 위해서는 불필요함.
- 자신의 데이터를 이용해서 분류하는 방법을 익히기 위하여 처리.
- Python3 dict to jpg

In [None]:
# pickle_root = 'cifar-10-batches-py'
# png_root = 'dataset'

In [None]:
# def unpickle(file):
#     with open(file, 'rb') as fo:
#         data = pickle.load(fo, encoding='bytes')
#     return data

In [None]:
# meta = unpickle(os.path.join(pickle_root, 'batches.meta'))

In [None]:
# label_names = []
# for label in meta[b'label_names']:
#     label_names.append(label.decode('utf-8'))
# label_names

In [None]:
# for bpath in ['data_batch_1', 'data_batch_2', 'data_batch_3',
#               'data_batch_4', 'data_batch_5', 'test_batch']:
#     data = unpickle(os.path.join(pickle_root, bpath))
#     for raw, label, filename in zip(data[b'data'], data[b'labels'], data[b'filenames']):
#         raw = raw.reshape(32, 32, 3, order='F').swapaxes(0, 1)
#         label_name = label_names[label]
#         odir = os.path.join(png_root, label_name)
#         os.makedirs(odir, exist_ok=True)
#         opath = os.path.join(png_root, label_name, filename.decode('utf-8'))
#         image = Image.fromarray(raw)
#         image.save(opath)
#     print(f'Done {bpath}')

### 2. 학습 데이터 불러오기

In [None]:
# prepare dataset
png_root = 'dataset'
dataset_root = os.path.abspath(os.path.expanduser(png_root))
print(f'Dataset root: {dataset_root}')

IMAGE_SHAPE = (32, 32)
BATCH_SIZE = 1000
image_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1/255,
                                                                  validation_split=0.2)
train_data = image_generator.flow_from_directory(dataset_root, target_size=IMAGE_SHAPE,
                                                 batch_size=BATCH_SIZE,
                                                 subset='training')
validation_data = image_generator.flow_from_directory(dataset_root, target_size=IMAGE_SHAPE,
                                                      batch_size=BATCH_SIZE,
                                                      subset='validation')
label_names = sorted(validation_data.class_indices.items(), key=lambda pair:pair[1])
label_names = np.array([key.title() for key, value in label_names])

for image_batch, label_batch in validation_data:
    print(f'Image batch shape: {image_batch.shape}')
    print(f'Label batch shape: {label_batch.shape}')
    break

### 3. 학습 데이터 살펴보기

In [None]:
IDX = 0
N = 30

fig = plt.figure(figsize=(12, 2.75*(N//5)))
fig.suptitle('Example of train image', fontsize=24)
for n in range(IDX, IDX+N):
    ax = fig.add_subplot(N//5, 5, n+1)
    ax.imshow(image_batch[n])
    ax.set_title(f'{n}-{label_names[np.argmax(label_batch[n])]}', fontsize=16)
    ax.grid(False)

### 4. 모델 생성

In [None]:
model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=image_batch.shape[1:]),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(train_data.num_classes)])
model.summary()

In [None]:
learning_rate = 0.001

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])

In [None]:
EPOCHS = 10

history = model.fit(train_data,
                    epochs=EPOCHS,
                    validation_data=validation_data,
                   )

In [None]:
## get result labels
predicted_batch = model.predict(image_batch)
predicted_id = np.argmax(predicted_batch, axis=-1)
predicted_label_batch = label_names[predicted_id]

label_id = np.argmax(label_batch, axis=-1)

## plot
fig = plt.figure(figsize=(10, 10.5))
for n in range(30):
    ax = fig.add_subplot(6, 5, n+1)
    ax.imshow(image_batch[n])
    color = 'green' if predicted_id[n] == label_id[n] else 'red'
    ax.set_title(predicted_label_batch[n].title(), color=color)
    ax.axis('off')
_ = fig.suptitle('Model predictions (green: correct, red: incorrect)')