# Keras를 활용한 강아지, 고양이 구분 CNN 딥러닝 모델 만들기

[Jeff Delaney의 노트북](https://www.kaggle.com/jeffd23/catdognet-keras-convnet-starter)을 기반으로 최신 Keras에 맞게 수정 및 성능 향상을 위한 작업 추가

[kaggle Dogs vs. Cats](https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition)에 출전할 만한 데이터를 만들어 봅시다! 이미지를 처리하기 위한 CNN (Convolution Neural Network)의 특성을 이해하고 70% 이상의 정확도를 보이는 모형을 만들기 위해 필요한 조건을 알아봅시다.


In [None]:
# Keras의 백엔드 프레임워크로 Tensorflow를 사용합니다
import tensorflow as tf

# 실습을 진행하기 위해 선생님 그래픽카드 중 어떤 카드를 쓸지
# 해당 카드의 GPU 메모리를 몇 % 사용할지 설정하는 부분입니다
# 실습을 위한 것이므로 일반적인 환경에서는 필요 없습니다 (몰라도 됨)
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
#config.gpu_options.per_process_gpu_memory_fraction = 0.3
config.gpu_options.visible_device_list = "0"
set_session(tf.Session(config=config))

# 이 셀을 실행하고 *이 사라진 것을 확인 후 다음으로 진행하세요

In [None]:
import os, cv2, random
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib import ticker
import seaborn as sns
%matplotlib inline 

from keras.models import Sequential
from keras.layers import Input, Dropout, Flatten, Conv2D, MaxPooling2D, Dense, Activation
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator

## 데이터 준비

train 데이터와 test 데이터를 각각의 폴더에서 읽고, 가로 128 세로 128, RGB 3채널의 이미지로 변환하여 입력 데이터로 만드는 작업을 합니다

In [None]:
TRAIN_DIR = 'data/train/'
TEST_DIR = 'data/test/'

ROWS = 128
COLS = 128
CHANNELS = 3

train_images = [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR)] # use this for full dataset
train_dogs =   [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR) if 'dog' in i]
train_cats =   [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR) if 'cat' in i]

test_images =  [TEST_DIR+i for i in os.listdir(TEST_DIR)]

# slice datasets for memory efficiency on Kaggle Kernels, delete if using full dataset
train_images = train_dogs[:] + train_cats[:]
random.shuffle(train_images)

test_images =  test_images[:]

def read_image(file_path):
    img = cv2.imread(file_path, cv2.IMREAD_COLOR) # cv2.IMREAD_COLOR / cv2.IMREAD_GRAYSCALE
    img = cv2.resize(img, (ROWS, COLS), interpolation=cv2.INTER_CUBIC)
    b, g, r = cv2.split(img)   # img파일을 b,g,r로 분리
    img2 = cv2.merge([r / 255,g / 255,b / 255]) # b, r을 바꿔서 Merge
    return img2


def prep_data(images):
    count = len(images)
    data = np.ndarray((count, CHANNELS, ROWS, COLS), dtype=np.float32)

    for i, image_file in enumerate(images):
        image = read_image(image_file)
        data[i] = image.T
        if i % 500 == 0: print('Processed {} of {}'.format(i, count))
    
    return data

train = prep_data(train_images)
test = prep_data(test_images)

print("Train shape: {}".format(train.shape))
print("Test shape: {}".format(test.shape))

### 라벨 생성

MNIST와 달리 여기서는 강아지와 고양이라는 2개의 클래스가 있습니다. 고양이를 0, 강아지를 1로 보고 각각의 이미지에 dog란 글자가 들은 것은 강아지로, 아닌 것은 고양이로 라벨을 붙입시다. 이후 해당 라벨을 딥러닝에 적합한 one-hot vector로 만듭니다.

고양이 = 0 -> [1, 0]
강아지 = 1 -> [0, 1]

In [None]:
labels = []
for i in train_images:
    if 'dog' in i:
        labels.append(1)
    else:
        labels.append(0)
        
sns.countplot(labels)
labels = np_utils.to_categorical(labels, 2)
#sns.plt.title('Cats and Dogs')

### Augment Data 데이터 증강

25000개의 데이터는 많긴 하지만 복잡하고 깊은 CNN 모델을 훈련시키기에는 조금 부족하기도 합니다. 5000개를 validation 데이터로 할당하고, 나머지 20000개의 훈련용 데이터에 대해 좌우 반전된 이미지를 추가로 생성하여 훈련에 사용하도록 합시다. 이렇게 좌우반전 혹은 이동, 자르기, 확대축소, 회전 등을 가해 원본 이미지로부터 새로운 훈련용 이미지를 만드는 것을 data augmentation 이라고 하며 특히 이미지 처리용 딥러닝에서 많이 쓰이는 기법입니다.

In [None]:
val_data = train[22500:].copy()
val_labels = labels[22500:].copy()
train = train[:22500].copy()
labels = labels[:22500].copy()

print(len(train))
print(len(val_data))

In [None]:
def horizontal_flip(img):
    rimg=img.copy()
    rimg=cv2.flip(img,1)
    return rimg.T

aug_train = np.ndarray((train.shape[0], CHANNELS, ROWS, COLS), dtype=np.float32)
aug_labels = labels.copy()

idx = 0
for i in train:
    aug_train[idx] = horizontal_flip(i.T)
    idx = idx + 1

In [None]:
plt.imshow(aug_train[0].T)
plt.show()
plt.imshow(train[0].T)
plt.show()

### 데이터 확인

우리가 다룰 강아지와 고양이 사진을 살펴봅시다.

In [None]:
def show_cats_and_dogs(idx):
    cat = read_image(train_cats[idx])
    dog = read_image(train_dogs[idx])
    pair = np.concatenate((cat, dog), axis=1)
    plt.figure(figsize=(10,5))
    plt.imshow(pair, cmap='gray')
    plt.show()

start = 1000
for idx in range(start,start + 5):
    show_cats_and_dogs(idx)

## 모델 만들기

다양한 모델에 대해 모형을 return 하는 함수를 만들어보며 테스트해봅시다.

커다란 딥러닝 모형은 설정할 부분이 매우 많고, 경우에 따라 작은 설정 값 하나의 변화가 극적인 성능변화를 일으키기도 합니다. 또한 각각의 값들이 서로 상호작용하기도 하므로, 매우 많은 시행착오가 필요합니다.

In [None]:
optimizer = Adam(lr = 0.0001)

objective = 'binary_crossentropy'

model_name = ''

In [None]:
def catdog():    
    model = Sequential()

    model.add(Conv2D(32, (3, 3), padding='same', input_shape=(CHANNELS, ROWS, COLS), activation='relu'))
    model.add(Conv2D(32, (3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))
    print(model.output_shape)
    
    model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
    model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))
    print(model.output_shape)
    
    model.add(Conv2D(128, (3, 3), padding='same', activation='relu'))
    model.add(Conv2D(128, (3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))
    print(model.output_shape)
    
    model.add(Conv2D(256, (3, 3), padding='same', activation='relu'))
    model.add(Conv2D(256, (3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))
    print(model.output_shape)

    model.add(Flatten())
    print(model.output_shape)
    
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))

    model.add(Dense(2))
    model.add(Activation('softmax'))

    model.compile(loss=objective, optimizer=optimizer, metrics=['accuracy'])
    
    model_name = 'catdog'
    return model

In [None]:
def catdog2():    
    model = Sequential()

    model.add(Conv2D(64, (3, 3), padding='same', input_shape=(CHANNELS, ROWS, COLS), activation='relu'))
    model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))
    print(model.output_shape)
    
    model.add(Conv2D(128, (3, 3), padding='same', activation='relu'))
    model.add(Conv2D(128, (3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))
    print(model.output_shape)
    
    model.add(Conv2D(128, (3, 3), padding='same', activation='relu'))
    model.add(Conv2D(128, (3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))
    print(model.output_shape)
    
    model.add(Conv2D(256, (3, 3), padding='same', activation='relu'))
    model.add(Conv2D(256, (3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))
    print(model.output_shape)

    model.add(Flatten())
    print(model.output_shape)
    
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))

    model.add(Dense(2))
    model.add(Activation('softmax'))

    model.compile(loss=objective, optimizer=optimizer, metrics=['accuracy'])
    
    model_name = 'catdog2'
    return model

In [None]:
def tri():
    model = Sequential()

    model.add(Conv2D(8, (9, 9), padding='same', input_shape=(CHANNELS, ROWS, COLS), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))
    print(model.output_shape)
    
    model.add(Conv2D(16, (7, 7), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))
    print(model.output_shape)
    
    model.add(Conv2D(32, (5, 5), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))
    print(model.output_shape)
    
    model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))
    print(model.output_shape)

    model.add(Flatten())
    print(model.output_shape)
    
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))

    model.add(Dense(2))
    model.add(Activation('softmax'))

    model.compile(loss=objective, optimizer=optimizer, metrics=['accuracy'])
    
    model_name = 'tri'
    return model

In [None]:
def big():
    model = Sequential()

    model.add(Conv2D(32, (5, 5), padding='same', input_shape=(CHANNELS, ROWS, COLS), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))
    print(model.output_shape)
    
    model.add(Conv2D(64, (5, 5), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))
    print(model.output_shape)
    
    model.add(Conv2D(128, (5, 5), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))
    print(model.output_shape)
    
    model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))
    print(model.output_shape)

    model.add(Flatten())
    print(model.output_shape)
    
    model.add(Dense(1024, activation='relu'))
    model.add(Dropout(0.8))

    model.add(Dense(2))
    model.add(Activation('softmax'))

    model.compile(loss=objective, optimizer=optimizer, metrics=['accuracy'])
    
    model_name = 'big'
    return model

### Train and Predict

I'm using Keras's early stopping callback to end training when the validation loss stops improving, otherwise the model will overfit. I will also be tracking the loss history on each epoch to visualize the overfitting trend. 

Note: A slice of 1000 images was used to fit the model for CPU efficency. The model's perfrmance improves significantly when used on the entire dataset. 

In [None]:
model = catdog()

print(model_name)

In [None]:
nb_epoch = 3
batch_size = 128

## Callback for loss logging per epoch
class LossHistory(Callback):
    def on_train_begin(self, logs={}):
        self.losses = []
        self.val_losses = []
        
    def on_epoch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))

def run_catdog():
    
    history = LossHistory()
    train_whole = np.concatenate((train, aug_train), axis = 0)
    labels_whole = np.concatenate((labels, aug_labels), axis = 0)
    model.fit(train_whole, labels_whole, batch_size=batch_size, epochs=nb_epoch,
              validation_data=(val_data, val_labels), verbose=1, shuffle=True, callbacks=[history])
    predictions = model.predict(test, verbose=0)
    return predictions, history

predictions, history = run_catdog()

loss = history.losses
val_loss = history.val_losses

plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('VGG-16 Loss Trend')
plt.plot(loss, 'blue', label='Training Loss')
plt.plot(val_loss, 'green', label='Validation Loss')
plt.xticks(range(0,nb_epoch)[0::2])
plt.legend()
plt.show()

## How'd We Do?

I'm pretty sure I can distinguish a cat from a dog 100% of the time, but how confident is the model?...

Tip: Run on the full dataset with a GPU for a LB logloss of ~0.4 and accuracy at approx 90%. 

In [None]:
plt.figure()
plt.subplots_adjust(left=0, right=2, bottom=0, top=2.5)
for i in range(0,16):
    plt.subplot(4,4,i+1)
    #idx = random.randrange(0, predictions.shape[0])
    idx = i
    if predictions[idx][0] >= 0.5: 
        plt.title('{} Cat {:.2%}'.format(idx+1, predictions[idx][0]))
    else: 
        plt.title('{} Dog {:.2%}'.format(idx+1, predictions[idx][1]))
    plt.imshow(test[idx].T)