# Dogs vs Cats Image Recognition using TensorFlow

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
from IPython.display import display, Image, HTML
import cv2

## Load the Data
Loading the data from the training and testing folders provided by Kaggle with images of dogs and cats.

The image_size can be altered for larger or smaller images. Smaller images will be processed faster, but will lack detail for learning. 224, 150, 96, 64, 32 are all common sizing options. 

The training, testing, and validating sizes can be altered. The training and validating sizes must be equal to the total size.

In [None]:
train_dir='/Users/alexvansteel/Projects/CIS365/TensorFlowImageRecognition/train/'
test_dir='/Users/alexvansteel/Projects/CIS365/TensorFlowImageRecognition/test/'

# used for scaling/normalization
image_size=150; # 150x150.
chan=3
pixel_depth=255.0  # Number of levels per pixel.

# for small-sample testing
tv_size_dogs=1000
tv_size_cats=1000
tv_size_all=2000
train_size=1600
val_size=400
test_size=500

if (train_size + val_size != tv_size_all):
   print ("Error, check that train_size + val_size is equal to tv_size_all")
   exit ()

## Create the Directories
Creating the lists of training images for all images, dogs, and cats. As well as creating the list of testing images.

Use numpy to create the special arrays with labels for validation. 

In [None]:
train_images=[train_dir+i for i in os.listdir(train_dir)]
train_dogs=[train_dir+i for i in os.listdir(train_dir) if 'dog' in i]
train_cats=[train_dir+i for i in os.listdir(train_dir) if 'cat' in i]
test_images=[test_dir+i for i in os.listdir(test_dir)]

train_images=train_dogs[:tv_size_dogs]+train_cats[:tv_size_cats]
train_labels=np.array((['dogs']*tv_size_dogs)+(['cats']*tv_size_cats))
test_images=test_images[:test_size]
test_labels=np.array(['unknownclass']*test_size)

## Reading the Images
Reads the image and ensures that the image is a uniform square. If the image is not a uniform square, adds black space to fill out the image and prevent distortion. 

In [None]:
def read_image(file_path):
    img=cv2.imread(file_path,cv2.IMREAD_COLOR) #cv2.IMREAD_GRAYSCALE
    if (img.shape[0]>=img.shape[1]): # height is greater than width
       resizeto=(image_size,int(round(image_size*(float(img.shape[1])/img.shape[0]))));
    else:
       resizeto=(int(round(image_size*(float(img.shape[0])/img.shape[1]))),image_size);

    img2=cv2.resize(img,(resizeto[1],resizeto[0]),interpolation=cv2.INTER_CUBIC)
    img3=cv2.copyMakeBorder(img2,0,image_size-img2.shape[0],0,image_size-img2.shape[1],cv2.BORDER_CONSTANT,0)

    return img3[:,:,::-1]  # turn into rgb format

## Prepare the Data
Normalizes the colours (R,G,B) independently of one another to prepare the data for learning. 

In [None]:
def prep_data(images):
    count=len(images)
    data=np.ndarray((count,image_size,image_size,chan),dtype=np.float32)

    for i, image_file in enumerate(images):
        image=read_image(image_file);
        image_data=np.array(image,dtype=np.float32);

        image_data[:,:,0]=(image_data[:,:,0].astype(float)-pixel_depth/2)/pixel_depth
        image_data[:,:,1]=(image_data[:,:,1].astype(float)-pixel_depth/2)/pixel_depth
        image_data[:,:,2]=(image_data[:,:,2].astype(float)-pixel_depth/2)/pixel_depth

        data[i]=image_data; # image_data.T
        if i%250 == 0: print('Processed {} of {}'.format(i, count))
    return data

## Create the Normalized Lists

In [None]:
train_norm=prep_data(train_images)
test_norm=prep_data(test_images)

print('Train shape: {}'.format(train_norm.shape))
print('Test shape: {}'.format(test_norm.shape))

## Visualisation
Displays the first three images from the Dogs and Cats after the normalization has been done. 

In [None]:
plt.imshow(train_norm[0,:,:,:],interpolation='nearest')
plt.figure()
plt.imshow(train_norm[1,:,:,:],interpolation='nearest')
plt.figure()
plt.imshow(train_norm[2,:,:,:],interpolation='nearest')
plt.figure()
plt.imshow(train_norm[1000,:,:,:],interpolation='nearest')
plt.figure()
plt.imshow(train_norm[1001,:,:,:],interpolation='nearest')
plt.figure()
plt.imshow(train_norm[1002,:,:,:],interpolation='nearest')

## Randomize the Data

In [8]:
np.random.seed(121)


def randomize(dataset,labels):

    permutation=np.random.permutation(labels.shape[0])
    shuff_dataset=dataset[permutation,:,:,:]
    shuff_labels=labels[permutation]

    return shuff_dataset,shuff_labels

train_dataset_rand,train_labels_rand=randomize(train_norm,train_labels)
test_dataset,test_labels=randomize(test_norm,test_labels)

valid_dataset=train_dataset_rand[:val_size,:,:,:]
valid_labels=train_labels_rand[:val_size]
train_dataset=train_dataset_rand[val_size:val_size+train_size,:,:,:]
train_labels=train_labels_rand[val_size:val_size+val_size]
print('Training',train_dataset.shape,train_labels.shape)
print('Validation',valid_dataset.shape,valid_labels.shape)
print('Test',test_dataset.shape,test_labels.shape)

('Training', (1600, 150, 150, 3), (400,))
('Validation', (400, 150, 150, 3), (400,))
('Test', (500, 150, 150, 3), (500,))


# TensorFlow


In [9]:
import tensorflow as tf
num_labels=2
num_channels=3 # rg


def reformat(dataset, labels):
  dataset=dataset.reshape((-1,image_size,image_size,num_channels)).astype(np.float32)
  labels=(labels=='cats').astype(np.float32); # set dogs to 0 and cats to 1
  labels=(np.arange(num_labels)==labels[:,None]).astype(np.float32)
  return dataset, labels


train_dataset, train_labels=reformat(train_dataset,train_labels)
valid_dataset, valid_labels=reformat(valid_dataset, valid_labels)
test_dataset, test_labels=reformat(test_dataset, test_labels)
print('Training set',train_dataset.shape,train_labels.shape)
print('Validation set',valid_dataset.shape,valid_labels.shape)
print('Test set',test_dataset.shape,test_labels.shape)

('Training set', (1600, 150, 150, 3), (400, 2))
('Validation set', (400, 150, 150, 3), (400, 2))
('Test set', (500, 150, 150, 3), (500, 2))


## Computer 2D Convolution Graph Model
The convolution ops sweep a 2-D filter over a batch of images, applying the filter to each window of each image of the appropriate size. It should be noted that although these ops are called "convolution", they are strictly speaking "cross-correlation" since the filter is combined with an input window without reversing the filter. The convolution then returns a tensor.

The pooling ops sweep a rectangular window over the input tensor, computing a reduction operation for each window. The max-pooling used here is a special case of greyscale morphological dilation when the filter assumes all-zero values (a.k.a. flat structuring function).

In [None]:
batch_size=16
patch_size=5
depth=16
num_hidden=64

graph=tf.Graph()

with graph.as_default():

  # Input data.
  tf_train_dataset=tf.placeholder(tf.float32,shape=(batch_size,image_size,image_size,num_channels))
  tf_train_labels=tf.placeholder(tf.float32,shape=(batch_size, num_labels))
  tf_valid_dataset=tf.constant(valid_dataset)
  tf_test_dataset=tf.constant(test_dataset)

  # variables
  kernel_conv1=tf.Variable(tf.truncated_normal([3,3,3,32],dtype=tf.float32,stddev=1e-1),name='weights_conv1')
  biases_conv1=tf.Variable(tf.constant(0.0,shape=[32],dtype=tf.float32),trainable=True, name='biases_conv1')
  kernel_conv2=tf.Variable(tf.truncated_normal([3,3,32,32],dtype=tf.float32,stddev=1e-1),name='weights_conv2')
  biases_conv2=tf.Variable(tf.constant(0.0,shape=[32],dtype=tf.float32),trainable=True,name='biases_conv2')
  kernel_conv3=tf.Variable(tf.truncated_normal([3,3,32,64],dtype=tf.float32,stddev=1e-1),name='weights_conv3')
  biases_conv3=tf.Variable(tf.constant(0.0,shape=[64],dtype=tf.float32),trainable=True,name='biases_conv3')
  fc1w=tf.Variable(tf.truncated_normal([23104,64],dtype=tf.float32,stddev=1e-1),name='weights') # 23104 from pool3.gete_shape () of 19*19*64
  fc1b=tf.Variable(tf.constant(1.0,shape=[64],dtype=tf.float32),trainable=True,name='biases')
  fc2w=tf.Variable(tf.truncated_normal([64,2],dtype=tf.float32,stddev=1e-1),name='weights')
  fc2b=tf.Variable(tf.constant(1.0,shape=[2],dtype=tf.float32),trainable=True,name='biases')


  def model(data):
     parameters=[]
     with tf.name_scope('conv1_1') as scope:
         conv=tf.nn.conv2d(data,kernel_conv1,[1,1,1,1],padding='SAME')
         out=tf.nn.bias_add(conv,biases_conv1)
         conv1_1=tf.nn.relu(out,name=scope)
         parameters+=[kernel_conv1,biases_conv1]

     # pool1
     pool1=tf.nn.max_pool(conv1_1,
                            ksize=[1,2,2,1],
                            strides=[1,2,2,1],
                            padding='SAME',
                            name='pool1')

     with tf.name_scope('conv2_1') as scope:
         conv=tf.nn.conv2d(pool1,kernel_conv2,[1,1,1,1],padding='SAME')
         out=tf.nn.bias_add(conv,biases_conv2)
         conv2_1=tf.nn.relu(out,name=scope)
         parameters+=[kernel_conv2,biases_conv2]

     # pool2
     pool2=tf.nn.max_pool(conv2_1,
                            ksize=[1,2,2,1],
                            strides=[1,2,2,1],
                            padding='SAME',
                            name='pool2')

     with tf.name_scope('conv3_1') as scope:
         conv=tf.nn.conv2d(pool2,kernel_conv3,[1,1,1,1],padding='SAME')
         out=tf.nn.bias_add(conv,biases_conv3)
         conv3_1=tf.nn.relu(out,name=scope)
         parameters+=[kernel_conv3,biases_conv3]

     # pool3
     pool3=tf.nn.max_pool(conv3_1,
                            ksize=[1,2,2,1],
                            strides=[1,2,2,1],
                            padding='SAME',
                            name='pool3')

     # fc1
     with tf.name_scope('fc1') as scope:
         shape=int(np.prod(pool3.get_shape()[1:])) # except for batch size (the first one), multiple the dimensions
         pool3_flat=tf.reshape(pool3,[-1,shape])
         fc1l=tf.nn.bias_add(tf.matmul(pool3_flat, fc1w), fc1b)
         fc1=tf.nn.relu(fc1l)
         parameters+=[fc1w, fc1b]

     # fc3
     with tf.name_scope('fc3') as scope:
         fc2l=tf.nn.bias_add(tf.matmul(fc1,fc2w),fc2b)
         parameters+=[fc2w,fc2b]
     return fc2l;

  # Training computation.
  logits=model(tf_train_dataset)
  loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=tf_train_labels))

  # Optimizer.
  optimizer=tf.train.RMSPropOptimizer(0.0001).minimize(loss)

  # Predictions for the training, validation, and test data.
  train_prediction=tf.nn.softmax(logits)
  valid_prediction=tf.nn.softmax(model(tf_valid_dataset))
  test_prediction=tf.nn.softmax(model(tf_test_dataset))

# Train the Data

In [None]:
def accuracy(predictions, labels):
    return (100.0*np.sum(np.argmax(predictions,1) == np.argmax(labels,1))/predictions.shape[0])


num_steps=1001
with tf.Session(graph=graph) as session:
  tf.global_variables_initializer().run()
  print ("Initialized")
  for step in range(num_steps):
    offset=(step*batch_size)%(train_labels.shape[0]-batch_size)
    batch_data=train_dataset[offset:(offset+batch_size),:,:,:]
    batch_labels=train_labels[offset:(offset+batch_size),:]
    feed_dict={tf_train_dataset:batch_data,tf_train_labels:batch_labels}
    _, l, predictions=session.run([optimizer,loss,train_prediction],feed_dict=feed_dict)
    if (step%50 == 0):
      print("Minibatch loss at step", step, ":", l)
      print("Minibatch accuracy: %.1f%%" % accuracy(predictions,batch_labels))
      print("Validation accuracy: %.1f%%" % accuracy(valid_prediction.eval(),valid_labels))
  print ("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

Initialized
('Minibatch loss at step', 0, ':', 0.78864336)
Minibatch accuracy: 62.5%
('Minibatch loss at step', 50, ':', 0.7771731)
Minibatch accuracy: 43.8%
Validation accuracy: 46.8%
('Minibatch loss at step', 100, ':', 0.70286256)
Minibatch accuracy: 43.8%
Validation accuracy: 48.2%
('Minibatch loss at step', 150, ':', 0.67795044)
Minibatch accuracy: 50.0%
Validation accuracy: 51.2%
('Minibatch loss at step', 200, ':', 0.52479428)
Minibatch accuracy: 68.8%
Validation accuracy: 56.2%
('Minibatch loss at step', 250, ':', 0.61060107)
Minibatch accuracy: 62.5%
Validation accuracy: 56.8%
('Minibatch loss at step', 300, ':', 0.35262322)
Minibatch accuracy: 87.5%
Validation accuracy: 58.8%
('Minibatch loss at step', 350, ':', 0.29634362)
Minibatch accuracy: 93.8%
Validation accuracy: 54.0%
('Minibatch loss at step', 400, ':', 0.27464801)
Minibatch accuracy: 93.8%
Validation accuracy: 60.2%
('Minibatch loss at step', 450, ':', 0.27087396)
Minibatch accuracy: 100.0%
Validation accuracy: 61.2