# Dogs vs. Cats Redux: Kernels Edition

## 2. 모델구성 및 학습

In [1]:
import cv2
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from IPython.display import display, Image, HTML
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
import time

  from ._conv import register_converters as _register_converters


### 2.1 Model class
VGG19

In [2]:
class Model_Vgg19:

    def __init__(self, sess, name):
        self.sess = sess
        self.name = name

    def build_net(self, image_size):
        with tf.variable_scope(self.name):
            # dropout (keep_prob) rate  0.7~0.5 on training, but should be 1
            # for testing
            self.training = tf.placeholder(tf.bool)

            # input place holders
            self.X = tf.placeholder(tf.float32, [None, image_size, image_size, 3])
            self.Y = tf.placeholder(tf.float32, [None, 2])
            self.learning_rate = tf.placeholder(tf.float32)

            # Convolutional Layer #1
            conv1_1 = tf.layers.conv2d(inputs=self.X, filters=64, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            conv1_2 = tf.layers.conv2d(inputs=conv1_1, filters=64, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            pool1 = tf.layers.max_pooling2d(inputs=conv1_2, pool_size=[2, 2], padding="SAME", strides=2)
            
            conv2_1 = tf.layers.conv2d(inputs=pool1, filters=128, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            conv2_2 = tf.layers.conv2d(inputs=conv2_1, filters=128, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            pool2 = tf.layers.max_pooling2d(inputs=conv2_2, pool_size=[2, 2], padding="SAME", strides=2)
            
            conv3_1 = tf.layers.conv2d(inputs=pool2, filters=256, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            conv3_2 = tf.layers.conv2d(inputs=conv3_1, filters=256, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            conv3_3 = tf.layers.conv2d(inputs=conv3_2, filters=256, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            conv3_4 = tf.layers.conv2d(inputs=conv3_3, filters=256, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            pool3 = tf.layers.max_pooling2d(inputs=conv3_4, pool_size=[2, 2], padding="SAME", strides=2)
            
            #conv4_1 = tf.layers.conv2d(inputs=pool3, filters=512, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            #conv4_2 = tf.layers.conv2d(inputs=conv4_1, filters=512, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            #conv4_3 = tf.layers.conv2d(inputs=conv4_2, filters=512, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            #conv4_4 = tf.layers.conv2d(inputs=conv4_3, filters=512, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            #pool4 = tf.layers.max_pooling2d(inputs=conv4_3, pool_size=[2, 2], padding="SAME", strides=2)
            
            #conv5_1 = tf.layers.conv2d(inputs=pool4, filters=512, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            #conv5_2 = tf.layers.conv2d(inputs=conv5_1, filters=512, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            #conv5_3 = tf.layers.conv2d(inputs=conv5_2, filters=512, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            #conv5_4 = tf.layers.conv2d(inputs=conv5_3, filters=512, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            #pool5 = tf.layers.max_pooling2d(inputs=conv5_3, pool_size=[2, 2], padding="SAME", strides=2)
            
            initializer = tf.contrib.layers.xavier_initializer()
            
            # 150 -> 75 -> 38 -> 19 -> 10 -> 5
            # Dense Layer with Relu
            flat6 = tf.reshape(pool3, [-1, 256 * 19 * 19])
            #fc6 = tf.layers.dense(inputs=flat6, units=6400, activation=tf.nn.relu, kernel_initializer=initializer)
            fc6 = tf.layers.dense(inputs=flat6, units=1000, activation=tf.nn.relu, kernel_initializer=initializer)
            dropout6 = tf.layers.dropout(inputs=fc6, rate=0.5, training=self.training)

            flat7 = tf.reshape(dropout6, [-1, 1000])
            #fc7 = tf.layers.dense(inputs=flat7, units=3200, activation=tf.nn.relu, kernel_initializer=initializer)
            fc7 = tf.layers.dense(inputs=flat7, units=500, activation=tf.nn.relu, kernel_initializer=initializer)
            dropout7 = tf.layers.dropout(inputs=fc7, rate=0.5, training=self.training)

            # Logits (no activation) Layer: L7 Final FC 625 inputs -> 2 outputs
            self.logits = tf.layers.dense(inputs=dropout7, units=2)
            
        # define cost/loss & optimizer
        self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits, labels=self.Y))
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.cost)

        self.prediction = tf.argmax(tf.nn.softmax(self.logits), axis=1)
        correct_prediction = tf.equal(self.prediction, tf.argmax(self.Y, axis=1))
        self.correct_count = tf.reduce_sum(tf.cast(correct_prediction, tf.float32))
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        
        # tensorboard data
        tf.summary.scalar("cost", self.cost)
        tf.summary.scalar("accuracy", self.accuracy)
        self.summary = tf.summary.merge_all()

    def predict(self, x_test):
        return self.sess.run(self.logits, feed_dict={self.X: x_test, self.training: False})

    def countCorrect(self, x_test, y_test):
        return self.sess.run(self.correct_count, feed_dict={self.X: x_test, self.Y: y_test, self.training: False})

    def get_accuracy(self, x_test, y_test):
        return self.sess.run(self.accuracy, feed_dict={self.X: x_test, self.Y: y_test, self.training: False})
    
    def get_cost(self, x_test, y_test):
        return self.sess.run(self.cost, feed_dict={self.X: x_test, self.Y: y_test, self.training: False})

    def summary(self, x_test, y_test):
        return self.sess.run(self.summary, feed_dict={self.X: x_test, self.Y: y_test, self.training: False})
    
    def train(self, x_data, y_data, learning_rate, training=True):
        return self.sess.run([self.summary, self.cost, self.optimizer], feed_dict={self.X: x_data, self.Y: y_data, self.learning_rate: learning_rate, self.training: training})

### 2.2 이미지 파일 리스트 가져 오기

In [3]:
DATA_DIR = "../data/"
TRAIN_DIR = DATA_DIR + "train_resize/"
TEST_DIR = DATA_DIR + "test_resize/"

train_images = [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR)] 
train_dogs =   [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR) if 'dog' in i]
train_cats =   [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR) if 'cat' in i]
test_images =  [TEST_DIR+i for i in os.listdir(TEST_DIR)]

print("train_dogs: ", len(train_dogs))
print("train_cats: ", len(train_cats))
print("test_images: ", len(test_images))

train_dogs:  12500
train_cats:  12500
test_images:  12500


### 2.3 학습데이터와 valid 데이터 분리

In [4]:
#train_dogs = train_dogs[:100]
#train_cats = train_cats[:100]

train_dog_cat = train_dogs + train_cats

label_dog = [0 for i in range(len(train_dogs))]
label_cat = [1 for i in range(len(train_cats))]
label = label_dog + label_cat
label_one_hot = np.eye(2)[label]

x_train, x_valid, y_train, y_valid = train_test_split (
    train_dog_cat, label_one_hot, test_size=0.3, random_state=42)

print("train: ", len(x_train))
print("valid: ", len(x_valid))

train:  17500
valid:  7500


In [5]:
sess = tf.Session()
model = Model_Vgg19(sess, "model")
model.build_net(150)

Instructions for updating:
Use the retry module or similar alternatives.


In [6]:
learning_rate = 0.0001
epochs = 20
batch_size = 20

In [7]:
def getBatchRange (data_size, batch_size, iterater):
    begin_idx = iterater * batch_size
    end_idx = begin_idx + batch_size
    end_idx = min(end_idx, data_size - 1)
    return begin_idx, end_idx

In [8]:
def getImageData (image_file_list, image_width = 150, image_height = 150, channel = 3):
    data = np.ndarray((len(image_file_list), image_width, image_height, channel), dtype=np.uint8)
    for i, image_file in enumerate(image_file_list):
        data[i] = cv2.imread(image_file)
    return data

In [9]:
def getBatchData (x_data, y_data, batch_size, iterater):
    begin_idx, end_idx = getBatchRange(len(x_data), batch_size, i)
    
    batch_x = x_data[begin_idx : end_idx]
    batch_y = y_data[begin_idx : end_idx]
    batch_y = np.reshape(batch_y, [-1,2])
    
    # 리사이즈 이미지 로드
    batch_x_image = getImageData(batch_x)
    
    return batch_x_image, batch_y

In [10]:
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()

start_time = time.time()
train_count = len(x_train)
total_batch_train = int(train_count / batch_size)
valid_count = len(x_valid)
total_batch_valid = int(valid_count/batch_size)

# usage
# tensorboard --logdir=./log
train_writer = tf.summary.FileWriter("./log/train")
valid_writer = tf.summary.FileWriter("./log/valid")
train_writer.add_graph(sess.graph)
train_global_step = 0
valid_global_step = 0

print('Learning started. It takes sometime.') 
for epoch in range(epochs):
    avg_cost_train = 0
    avg_cost_valid = 0
    accuracy_train = 0
    accuracy_valid = 0
    
    correct_count_train = 0
    correct_count_valid = 0
    
    start_time_epoch = time.time()
    for i in range(total_batch_train):
        batch_x_image, batch_y = getBatchData(x_train, y_train, batch_size, i)
       
        s, c, _ = model.train(batch_x_image, batch_y, learning_rate)
        avg_cost_train += c / total_batch_train
        correct_count_train += model.countCorrect(batch_x_image, batch_y)
        
        train_writer.add_summary(s, global_step=train_global_step)
        train_global_step += 1
    
    accuracy_train = correct_count_train / train_count
    
    for i in range(total_batch_valid):
        batch_x_image, batch_y = getBatchData(x_valid, y_valid, batch_size, i)
        c = model.get_cost(batch_x_image, batch_y)
        avg_cost_valid += c / total_batch_valid
        
        correct_count_valid += model.countCorrect(batch_x_image, batch_y)
        #s = model.summary(batch_x_image, batch_y)
        #valid_writer.add_summary(s, global_step=valid_global_step)
        #valid_global_step += 1
        
    accuracy_valid = correct_count_valid / valid_count

    print('Epoch:', '%04d' % (epoch + 1)
          , 'train [cost: ', '{:.9f}'.format(avg_cost_train), ', acc: %.4f]' % accuracy_train
          , 'valid [cost: ', '{:.9f}'.format(avg_cost_valid), ', acc: %.4f]' % accuracy_valid
          , " %.2f seconds" % (time.time() - start_time_epoch))
    
    
    if epoch % 10 == 0 or (epoch + 1) == epochs:
        checkpoint_path = os.path.join('./', 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=epoch)
        print('Save checkpoint  : %s' %(checkpoint_path))

print('Learning Finished!') 
print("--- %.2f seconds ---" %(time.time() - start_time))


Learning started. It takes sometime.
Epoch: 0001 train [cost:  0.707457069 , acc: 0.6352] valid [cost:  0.568128114 , acc: 0.7020]  526.11 seconds
Save checkpoint  : ./model.ckpt
Epoch: 0002 train [cost:  0.544926585 , acc: 0.7542] valid [cost:  0.465250077 , acc: 0.7787]  522.48 seconds
Epoch: 0003 train [cost:  0.444990527 , acc: 0.8234] valid [cost:  0.382401820 , acc: 0.8269]  527.55 seconds
Epoch: 0004 train [cost:  0.327506952 , acc: 0.8898] valid [cost:  0.387716878 , acc: 0.8256]  530.43 seconds
Epoch: 0005 train [cost:  0.235721885 , acc: 0.9354] valid [cost:  0.401837160 , acc: 0.8369]  530.35 seconds
Epoch: 0006 train [cost:  0.168372061 , acc: 0.9675] valid [cost:  0.400888330 , acc: 0.8592]  529.98 seconds
Epoch: 0007 train [cost:  0.123762147 , acc: 0.9809] valid [cost:  0.334093438 , acc: 0.8884]  529.94 seconds
Epoch: 0008 train [cost:  0.098763611 , acc: 0.9884] valid [cost:  0.373047715 , acc: 0.8868]  530.32 seconds
Epoch: 0009 train [cost:  0.070632317 , acc: 0.9950

In [11]:
start_time = time.time()

total_batch = int(train_count / batch_size)
correct_count = 0
for i in range(total_batch):
    batch_x_image, batch_y = getBatchData(x_train, y_train, batch_size, i)
    correct_count += model.countCorrect(batch_x_image, batch_y)

accuracy = correct_count / train_count
print("test-accuracy: %.4f" % accuracy)
print("--- %.2f seconds ---" %(time.time() - start_time))

test-accuracy: 0.9922
--- 120.07 seconds ---


In [12]:
start_time = time.time()

valid_count = len(x_valid)
total_batch = int(valid_count / batch_size)
correct_count = 0
for i in range(total_batch):
    batch_x_image, batch_y = getBatchData(x_valid, y_valid, batch_size, i)
    correct_count += model.countCorrect(batch_x_image, batch_y)

accuracy = correct_count / valid_count
print("valid-accuracy: %.4f" % accuracy)
print("--- %.2f seconds ---" %(time.time() - start_time))

valid-accuracy: 0.8912
--- 51.27 seconds ---
