# 05_tensorflow_image_cnn

## CNN CIFAR 10 Example 2 (학습데이터 보강) - 

데이터 로드 및 정규화 --> 데이터 증강(ImageDataGenerator 사용) --> CNN 모델링
  
(7 conv. layer, 5 pooling layer)

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.datasets import cifar10

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Flatten, Dense, Dropout

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

(xtr, ytr), (xt, yt) = cifar10.load_data()
#CIFAR 10 데이터 불러오기

xtr = xtr.astype(np.float32) / 255.0
xt = xt.astype(np.float32) / 255.0
#정규화

print(xtr.shape, xt.shape)
print(ytr.shape, yt.shape)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
(50000, 32, 32, 3) (10000, 32, 32, 3)
(50000, 1) (10000, 1)


In [2]:
gen = ImageDataGenerator(rotation_range = 20, shear_range = 0.2,\
                        width_shift_range = 0.2, height_shift_range = 0.2,\
                        horizontal_flip = True)
#보강 데이터 변형 방식 설정

augment_ratio = 1.5
#전체 데이터의 150%

augment_size = int(augment_ratio * xtr.shape[0])
randidx = np.random.randint(xtr.shape[0], size = augment_size)

x_aug = xtr[randidx].copy()
y_aug = ytr[randidx].copy()
#원본 데이터의 복사본 생성

x_aug, y_aug = gen.flow(x_aug, y_aug, batch_size = augment_size, shuffle = False,\
                       ).next()
#보강할 이미지 데이터 생성

xtr = np.concatenate((xtr, x_aug))
ytr = np.concatenate((ytr, y_aug))

s = np.arange(xtr.shape[0])
np.random.shuffle(s)

xtr = xtr[s]
ytr = ytr[s]
#보강된 학습데이터, 정답데이터를 랜덤 셔플

In [3]:
#CNN Modeling

cnn = Sequential()

cnn.add(Conv2D(32, (3,3), activation = 'relu', padding = 'same', input_shape = (32,32,3)))
#CIFAR 10 텐서(높이, 너비, 채널)
cnn.add(Conv2D(32, (3,3), activation = 'relu', padding = 'same'))
cnn.add(MaxPooling2D(pool_size = (2,2)))
cnn.add(Dropout(0.25))
#conv conv pooling drop

cnn.add(Conv2D(64, (3,3), activation = 'relu', padding = 'same'))
cnn.add(Conv2D(64, (3,3), activation = 'relu', padding = 'same'))
cnn.add(MaxPooling2D(pool_size = (2,2)))
cnn.add(Dropout(0.25))
#conv conv pooling drop

cnn.add(Conv2D(128, (3,3), activation = 'relu', padding = 'same'))
cnn.add(MaxPooling2D(pool_size = (2,2)))
cnn.add(Dropout(0.25))
cnn.add(Conv2D(128, (3,3), activation = 'relu', padding = 'same'))
cnn.add(MaxPooling2D(pool_size = (2,2)))
cnn.add(Dropout(0.25))
cnn.add(Conv2D(256, (3,3), activation = 'relu', padding = 'same'))
cnn.add(MaxPooling2D(pool_size = (2,2)))
cnn.add(Dropout(0.25))
#(conv pooling drop) (conv pooling drop) (conv pooling drop)

cnn.add(Flatten())
#3차원 텐서를 1차원 벡터로 변환

cnn.add(Dense(128, activation = 'relu'))
#은닉층 
cnn.add(Dropout(0.25))
cnn.add(Dense(10, activation = 'softmax'))
#출력층

In [None]:
#컴파일 및 학습

cnn.compile(loss = 'sparse_categorical_crossentropy', optimizer = Adam(), metrics = ['accuracy'])

hist = cnn.fit(xtr, ytr, batch_size = 256, epochs = 250, validation_data = (xt, yt))

## CNN CIFAR 10 modeling (2 conv layer, 1 pooling layer) - 70.56% (accuracy)

## CNN Image augmented CIFAR 10 modeling (7 conv layer, 5 pooling layer) - 87.21% (accuracy)

이미지 증강 작업 및 conv. layer, pooling layer 추가로 예측 모델의 더 높은 정확도, 더 낮은 오버피팅을 확인할 수 있었습니다. 