In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import cv2
from tensorflow.contrib.slim.nets import vgg
from tensorflow.contrib import slim
import os
import glob

In [2]:
cat_dir = 'C:/Users/p0ng5/Desktop/data/dog_vs_cat/train/cats/'
dog_dir = 'C:/Users/p0ng5/Desktop/data/dog_vs_cat/train/dogs/'

In [3]:
vgg_path = './checkpoint/vgg_16.ckpt'

In [4]:
batch_size = 32
width = 224
height = 224
learning_rate = .01

MEAN_VALUE = np.array([103.939, 116.779, 123.68])

In [5]:
all_images = glob.glob(os.path.join(cat_dir, '*.jpg')) + glob.glob(os.path.join(dog_dir, '*.jpg'))

In [6]:
print('total images:', len(all_images))

total images: 25000


In [7]:
data_train, data_test = train_test_split(all_images, test_size=.2, random_state=42)
print(len(data_train), len(data_test))

20000 5000


In [8]:
def image_preprocess(img_path, width, height):
    img = cv2.imread(img_path)
    img = cv2.resize(img, (width, height))
    return img - MEAN_VALUE


def get_batch(images, batch_size, widht, height, random_state=None):
    
    # get batch 
    n_samples = len(images)
    n_batches = int(np.ceil(n_samples / batch_size))
    
    # copy & shuffle image list
    images = list(images)
    np.random.seed(random_state)    
    while True:
        np.random.shuffle(images)

        
        for i in range(n_batches):
            a = i*batch_size
            b = min(n_samples, (i+1)*batch_size)
            image_batchs = images[a:b]

            # process images
            labels = []
            imgs = []
            for file in image_batchs:
                img = image_preprocess(file, width, height)
                filename = os.path.basename(file)
                label = 1 if filename.startswith('dog') else 0    # cat:0, dog:1
                labels.append(label)
                imgs.append(img)

            imgs = np.array(imgs, dtype=np.float32)
            labels = np.array(labels, dtype=np.float32).reshape((-1, 1))
            yield imgs, labels

In [9]:
train_generator = get_batch(data_train, batch_size, width, height, random_state=42)
val_generator = get_batch(data_test, batch_size, width, height, random_state=0)

In [10]:
tf.reset_default_graph()

with tf.Graph().as_default():
    
    X = tf.placeholder(tf.float32, [None, height, width, 3])
    Y = tf.placeholder(tf.float32, [None, 1])
    
    # laod VGG16
    with slim.arg_scope(vgg.vgg_arg_scope()):
        _, end_points = vgg.vgg_16(X, num_classes=1000, is_training=False)
        fc_7 = end_points['vgg_16/fc7']
    
    # define the last weights
    Wn = tf.Variable(tf.random_normal([4096, 1], mean=0.0, stddev=0.02), name='Wn')
    b = tf.Variable(tf.zeros([1]), name='b')
    
    fc_7 = tf.reshape(fc_7, (-1, 4096))
    logits = tf.add(tf.matmul(fc_7, Wn), b)
    probx = tf.nn.sigmoid(logits)
    
    cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=Y))
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost, var_list=[Wn, b])
    
    init_fn = slim.assign_from_checkpoint_fn(vgg_path, 
                                             slim.get_model_variables('vgg_16'))
    
    with tf.Session() as sess:
        init_op = tf.global_variables_initializer()
        sess.run(init_op)

        init_fn(sess)
        for i in range(1):
            for j in range(50):
                batch_x, batch_y= next(train_generator)
                val_x, val_y = next(val_generator)
                sess.run(optimizer, feed_dict={X:batch_x, Y:batch_y})
                cost_train = sess.run(cost, feed_dict={X:batch_x, Y:batch_y})
                cost_val = sess.run(cost, feed_dict={X:val_x, Y:val_y})
                
                print("Training cost {:.4f} Validation cost {:.4f}".format(cost_train, cost_val))
                
        val_pred = sess.run(probx, feed_dict={X:val_x})
        val_pred = (val_pred > 0.5) * 1
        acc = np.mean(val_y == val_pred)
        print('Validation accuracy: {:.4f}'.format(acc))

INFO:tensorflow:Restoring parameters from ./checkpoint/vgg_16.ckpt
Training cost 0.9680 Validation cost 1.9168
Training cost 0.0189 Validation cost 0.4371
Training cost 0.0373 Validation cost 0.1900
Training cost 0.3096 Validation cost 0.1494
Training cost 0.3532 Validation cost 0.1241
Training cost 0.2524 Validation cost 0.8112
Training cost 0.0676 Validation cost 0.0066
Training cost 0.0024 Validation cost 0.0043
Training cost 0.4182 Validation cost 0.1011
Training cost 0.0664 Validation cost 0.7965
Training cost 0.2457 Validation cost 0.0296
Training cost 0.1461 Validation cost 0.6530
Training cost 0.0006 Validation cost 0.4416
Training cost 0.2950 Validation cost 0.0023
Training cost 0.3723 Validation cost 0.0251
Training cost 0.0010 Validation cost 0.7248
Training cost 0.2752 Validation cost 0.1928
Training cost 0.0172 Validation cost 0.2008
Training cost 0.7770 Validation cost 0.5688
Training cost 0.0011 Validation cost 0.0002
Training cost 0.0581 Validation cost 0.7359
Training 

In [16]:
np.mean(val_y == val_pred)

0.9375