In [4]:
%matplotlib inline
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import glob
import os
from PIL import Image
import random
from sklearn.cross_validation import train_test_split
from tqdm import tnrange

In [5]:
def resizeImg(image):
    img_w=image.size[0]
    img_h=image.size[1]
    #print("Image: %s (%i x %i)" % (imgUrl, img_w, img_h))
    slot = int(img_w/100) +1 if img_w > img_h else int(img_h/100) +1 
    
    if slot == 1:
        img = image.resize((img_w*2,img_h*2), Image.ANTIALIAS)
    else:
        if img_w >= img_h:
            img = image.resize((250,int(250*img_h/img_w)), Image.ANTIALIAS)
        else:
            img = image.resize((int(250*img_w/img_h),250), Image.ANTIALIAS)
        
    return img;

def padImg(image):
    img_w=image.size[0]
    img_h=image.size[1]
    
    if img_w > 250 or img_h > 250:
        if img_w >= img_h:
            new_size = (img_w, img_w)
        else:
            new_size = (img_h, img_h)
    else:
        new_size = (250, 250)
        
    img = Image.new("RGB", new_size)
    img.paste(image, (int((new_size[0]-img_w)/2),int((new_size[1]-img_h)/2)))
        
    return img;

# extract labels
# positives = [1, 0], negatives = [0, 1]
def get_label(path):
    if path.split('/')[-1:][0].startswith('cat'): 
        return np.array([1, 0])
    else:
        return np.array([0, 1])

In [3]:
train_path = "data/train"
images = glob.glob(os.path.join(train_path, "*.jpg"))
random.shuffle(images)
images = images[0:250]

# extract pixels
data_images = np.array(padImg(resizeImg(Image.open(images[0])))).reshape([-1]) / 255

for i in range(1,len(images)):
    tmpimg = np.array(padImg(resizeImg(Image.open(images[i])))).reshape([-1]) / 255
    data_images = np.vstack((data_images, tmpimg))
    if i % 50 == 0:
        print("Loading %i of %i images" %(i, len(images)))
        
data_labels = np.array([get_label(p) for p in images])
data_images = data_images.reshape([-1,250,250,3])
data_labels_out = np.argmax(data_labels, 1)

print("Positive samples: %i\nNegative samples: %i \n" % (len(data_labels_out)-np.count_nonzero(data_labels_out)
                                                      , np.count_nonzero(data_labels_out)))

Loading 50 of 250 images
Loading 100 of 250 images
Loading 150 of 250 images
Loading 200 of 250 images
Positive samples: 118
Negative samples: 132 



In [6]:
#Split Data Sets
X_train, X_test, y_train, y_test = train_test_split(data_images, data_labels, test_size=0.2)
y_train_out = np.argmax(y_train, 1)
y_test_out = np.argmax(y_test, 1)

print("Train:")
print("Positive samples: %i\nNegative samples: %i" % (len(y_train_out)-np.count_nonzero(y_train_out)
                                                      , np.count_nonzero(y_train_out)))  
print("Positive ratio: %.4f" % ((len(y_train_out)-np.count_nonzero(y_train_out) ) / len(y_train_out)) )
print("-------------------------")
print("Test:")
print("Positive samples: %i\nNegative samples: %i" % (len(y_test_out)-np.count_nonzero(y_test_out)
                                                      , np.count_nonzero(y_test_out)))
print("Positive ratio: %.4f" % ((len(y_test_out)-np.count_nonzero(y_test_out) ) / len(y_test_out)) )


Train:
Positive samples: 96
Negative samples: 104
Positive ratio: 0.4800
-------------------------
Test:
Positive samples: 22
Negative samples: 28
Positive ratio: 0.4400


In [7]:
def dropout(x, prob, train_phase):
    return tf.cond(train_phase, 
                   lambda: tf.nn.dropout(x, prob),
                   lambda: x)

def weight_variable(shape):
  return tf.Variable(tf.truncated_normal(shape, stddev=0.1))

def bias_variable(shape):
  return tf.Variable(tf.constant(0.1, shape=shape))

def conv2d(x, W):
  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1], padding='SAME')

In [10]:
sess = tf.InteractiveSession()

# tf Graph Input
x = tf.placeholder(tf.float32, [None, 250, 250, 3])
y = tf.placeholder(tf.float32, [None, 2])

# dropout placeholder
keep_prob = tf.placeholder(tf.float32)

# train flag placeholder
train_phase = tf.placeholder(tf.bool)  # For Batch Normalization

# Set model weights
W1 = weight_variable([3, 3, 3, 64])
b1 = bias_variable([64])

W2 = weight_variable([3, 3, 64, 128])
b2 = bias_variable([128])

W3 = weight_variable([3, 3, 128, 256])
b3 = bias_variable([256])

W4 = weight_variable([3, 3, 256, 512])
b4 = bias_variable([512])

W5 = weight_variable([3, 3, 512, 1024])
b5 = bias_variable([1024])

W6 = weight_variable([8 * 8 * 1024, 2048])
b6 = bias_variable([2048])

W7 = weight_variable([2048, 2048])
b7 = bias_variable([2048])

W8 = weight_variable([2048, 2])
b8 = bias_variable([2])

# hidden layers
conv1 = tf.nn.relu(conv2d(x, W1) + b1)
maxp1 = max_pool_2x2(conv1)

conv2 = tf.nn.relu(conv2d(maxp1, W2) + b2)
maxp2 = max_pool_2x2(conv2)

conv3 = tf.nn.relu(conv2d(maxp2, W3) + b3)
maxp3 = max_pool_2x2(conv3)

conv4 = tf.nn.relu(conv2d(maxp3, W4) + b4)
maxp4 = max_pool_2x2(conv4)

conv5 = tf.nn.relu(conv2d(maxp4, W5) + b5)
maxp5 = max_pool_2x2(conv5)

# fully connected
maxp5_flat = tf.reshape(maxp5, [-1, 8 * 8 * 1024])

full1 = tf.nn.relu(tf.matmul(maxp5_flat, W6) + b6)
drop1 = tf.nn.dropout(full1, keep_prob)

full2 = tf.nn.relu(tf.matmul(drop1, W7) + b7)
drop2 = tf.nn.dropout(full2, keep_prob)

# output
output = tf.matmul(drop2, W8) + b8
softmax = tf.nn.softmax(output)

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(output, y))

all_variables = tf.trainable_variables()


In [11]:
starter_learning_rate = 0.002
decay_steps = 100
decay_rate  = 0.9 # the learning rate will decrement by this rate
regularization_strength = 0.001


L2_regularization = tf.add_n([tf.nn.l2_loss(v) for v in all_variables if 'BatchNorm' not in v.name])  # fix for BN
loss = loss + L2_regularization * regularization_strength

global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                           decay_steps, decay_rate, staircase=True)
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step)

# Getting predictions and accuracy
predictions = tf.argmax(softmax, 1)
correct_prediction = tf.equal(predictions, tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# Add ops to save and restore all the variables.
saver = tf.train.Saver()

print("Model constructed, initializing all variables...")

# initialize variables
sess.run(tf.initialize_all_variables())

Model constructed, initializing all variables...
Instructions for updating:
Use `tf.global_variables_initializer` instead.


In [None]:
# Hyper-parameters
training_steps = 200
batch_size = 16
display_step = 10

# Mini-batch Gradient Descent
training_accuracy = []
training_loss     = []

for i in tnrange(training_steps):
    choice = np.random.choice(range(len(X_train)), batch_size, replace=False)

    batch_accuracy, batch_loss, _ = sess.run([accuracy, loss, train_step],
                                             feed_dict={x:X_train[choice], y: y_train[choice], train_phase: True, keep_prob: 0.5})
    training_accuracy.append(batch_accuracy)
    training_loss.append(batch_loss)
    # Displaying info
    if (i+1)%display_step == 0 or i == 0:
        print("Step %05d: accuracy=%.4f\tloss=%.6f\tlearning rate=%.6f" %
              (i+1, batch_accuracy, batch_loss, learning_rate.eval()))

#save_path = saver.save(sess, "./saved/model.ckpt")
#print("Model saved in file: %s" % save_path)        
        
plt.figure(figsize=(10,4))
plot_titles = ["Training accuracy", "Training Loss"]
for i, plot_data in enumerate([training_accuracy, training_loss]):
    plt.subplot(1, 2, i+1)
    plt.plot(plot_data)
    plt.title(plot_titles[i])

Step 00001: accuracy=0.5000	loss=4332.645508	learning rate=0.002000
Step 00010: accuracy=0.5625	loss=1381.772095	learning rate=0.002000
Step 00020: accuracy=0.5625	loss=846.156494	learning rate=0.002000
Step 00030: accuracy=0.3750	loss=597.495972	learning rate=0.002000
Step 00040: accuracy=0.5625	loss=506.716644	learning rate=0.002000
Step 00050: accuracy=0.5000	loss=453.496094	learning rate=0.002000
Step 00060: accuracy=0.3750	loss=416.809631	learning rate=0.002000
Step 00070: accuracy=0.6875	loss=394.281982	learning rate=0.002000
