**Building a strong image classification model from less data**

The implementation is a slight variation of the one in https://gist.github.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d

Mainly, in this kernel , the method flow(x,y) is used whereas, in the above gist, method flow_from_directory(directory) is used.
For more info, you can refer https://keras.io/preprocessing/image/

The change is made to have an appropriate kernel to deal with the way data is structured in kaggle. Appropriate changes in other parts of the source code is also done.

**Perform the necessary imports.**

In [1]:
import os, cv2, re, random
import numpy as np
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import img_to_array, load_img
from keras import layers, models, optimizers
from keras import backend as K
from sklearn.model_selection import train_test_split

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
from keras.applications.resnet50 import ResNet50
from keras.models       import Model
import tensorflow as tf
from keras.models       import Sequential

from keras.layers import Dense, Input

**Data dimensions and paths**

In [3]:
TRAIN_DIR = 'train2/'
TEST_DIR = 'test/'
train_images_dogs_cats = [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR)] # use this for full dataset
test_images_dogs_cats = [TEST_DIR+i for i in os.listdir(TEST_DIR)]
NO_EPOCHS=5
RESNET_WEIGHTS_PATH = '../input/keras-pretrained-models/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'


**Helper function to sort the image files based on the numeric value in each file name.**

In [4]:
len(train_images_dogs_cats)

1000

**Sort the traning set. Use 1300 images each of cats and dogs instead of all 25000 to speed up the learning process.**

**Sort the test set**

In [5]:
train_images_dogs_cats = train_images_dogs_cats[:1000]

In [6]:
from sklearn.preprocessing import LabelEncoder

**Now the images have to be represented in numbers. For this, using the openCV library read and resize the image.  **

**Generate labels for the supervised learning set.**

**Below is the helper function to do so.**

In [7]:
def prepare_data(list_of_images):
    """
    Returns two arrays: 
        x is an array of resized images
        y is an array of labels
    """
    x = [] # images as arrays
    y = [] # labels
    
    for image in (list_of_images):
        x.append(cv2.resize(cv2.imread(image), (224,224), interpolation=cv2.INTER_CUBIC))
    
    for i in list_of_images:
        if 'dog' in i:
            y.append(1)
        elif 'cat' in i:
            y.append(0)
        #else:
            #print('neither cat nor dog name present in images')
            
    return np.array(x),np.array(y)

In [8]:
X ,Y= prepare_data(train_images_dogs_cats)
print(K.image_data_format())

channels_last


In [9]:
X = np.array(X)
Y = np.array(Y)

In [10]:
np.unique(Y,return_counts=True)

(array([0, 1]), array([500, 500], dtype=int64))

In [11]:
from keras.utils import to_categorical
Y1 = to_categorical(Y)

In [12]:
# First split the data in two sets, 80% for training, 20% for Val/Test)
X_train, X_val, Y_train, Y_val = train_test_split(X,Y1, test_size=0.2, random_state=7)

In [13]:
X_train.shape, X_val.shape, Y_train.shape

((800, 224, 224, 3), (200, 224, 224, 3), (800, 2))

In [14]:
nb_train_samples = len(X_train)
nb_validation_samples = len(X_val)
batch_size = 64

In [15]:
# Parameters 
img_size = 224
img_flat_size = img_size * img_size

# If you want to train the model -> True, otherwise -> False
Is_train = True

# If you want to load saved model -> True, otherwise -> False 
Load_model = False

# Name of the save file
save_name = 'soft1'

# Numbers of sampling to test the code 
num_test_sample = 10

# labels: 0 - 9
num_label = 2

# Parameters for training
num_epoch = 5

learning_rate = 1e-4        # lr = 0.0001
epsilon = 1e-8

batch_size = 16

# Parameter for LSTM
lstm_size = 256
step_size = 4
flatten_size = img_size

gpu_fraction = 0.3

In [16]:
with tf.variable_scope("ResnetModel"):
    base_model = tf.keras.applications.ResNet50(weights='imagenet', pooling='max', include_top=False)     #top false. Removed last 2 layers(GlobalAveragePooling2 &dense_prediction) from full model


In [17]:
x_image  = tf.placeholder(tf.float32, shape = [None, 224, 224, 3])
y_target = tf.placeholder(tf.float32, shape=[None, 2])

In [18]:
last_layer = base_model(x_image)

In [19]:
# Initialize weights and bias 
def conv2d(x,w, stride):
	return tf.nn.conv2d(x,w,strides=[1, stride, stride, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

# Get Variables
def weight_variable(name, shape):
    return tf.get_variable(name,shape=shape, initializer=tf.contrib.layers.xavier_initializer())

def bias_variable(name, shape):
    return tf.get_variable(name,shape=shape, initializer=tf.contrib.layers.xavier_initializer())

In [20]:
w_fc1 = weight_variable('w_fc8',[2048, 2])
b_fc1 = bias_variable('b_fc8', [2])

output = tf.matmul(last_layer, w_fc1)+b_fc1
output = tf.nn.softmax(output)
# Training 
Loss = tf.keras.backend.categorical_crossentropy(target = y_target, output = output)
Cost = tf.reduce_mean(Loss)
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate, epsilon = epsilon).minimize(Cost)

correct_prediction = tf.equal(tf.argmax(y_target,1), tf.argmax(output,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


In [21]:
# Create Session
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = gpu_fraction

sess = tf.keras.backend.get_session()
initialize_variables_list = list(set(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))^set(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='ResnetModel')))
writer = tf.summary.FileWriter("output", sess.graph)
sess.run(initialize_variables_list )


[0.9, 0.999, array([[-0.00094979, -0.03933541],
        [ 0.01569667,  0.04805824],
        [-0.04523479,  0.04087614],
        ...,
        [ 0.04599   ,  0.00376365],
        [-0.0099079 ,  0.03910299],
        [-0.03322377,  0.04828329]], dtype=float32), array([0., 0.], dtype=float32), array([0., 0.], dtype=float32), array([[0., 0.],
        [0., 0.],
        [0., 0.],
        ...,
        [0., 0.],
        [0., 0.],
        [0., 0.]], dtype=float32), array([[0., 0.],
        [0., 0.],
        [0., 0.],
        ...,
        [0., 0.],
        [0., 0.],
        [0., 0.]], dtype=float32), array([-0.3406089 , -0.39275515], dtype=float32)]

In [23]:
# Training
batch_size=16
Is_train = True
img_size = 224
num_epoch = 1
if Is_train == True:
    train_data_num = X_train.shape[0]

    for i in range(num_epoch):
        # Making batches
        random_idx = np.arange(train_data_num)
        np.random.shuffle(random_idx)

        batch_count = 1
    
        for j in range(0, train_data_num, batch_size):
            if j + batch_size < train_data_num:
                batch_index = [j, j + batch_size]

                batch_x_train = X_train[random_idx[batch_index[0]:batch_index[1]],:,:]
                batch_y_train = Y_train[random_idx[batch_index[0]:batch_index[1]],:]
            else:
                batch_index = [j, j + train_data_num-1]

                batch_x_train = X_train[random_idx[batch_index[0]:batch_index[-1]],:,:]
                batch_y_train = Y_train[random_idx[batch_index[0]:batch_index[-1]],:]

            # Make image as fractions for attention
            train_batch = np.reshape(batch_x_train, (batch_x_train.shape[0], img_size, img_size, 3))
            validation_batch = np.reshape(X_val, (X_val.shape[0], img_size, img_size, 3))
            
            # Training
            optimizer.run(session=sess, feed_dict = {x_image: train_batch, y_target: batch_y_train})
            cost = Cost.eval( session=sess, feed_dict = {x_image: train_batch, y_target: batch_y_train})
            acc = accuracy.eval( session=sess, feed_dict = {x_image: train_batch, y_target: batch_y_train})
            val_acc = accuracy.eval( session=sess, feed_dict = {x_image: X_val, y_target: Y_val})

            # Print Progress
            print("Epoch: " + str(i+1) + ' / ' + 
                  "Batch: " + str(j) + '/' + str(train_data_num) + ' / ' + 
                  "Cost: " + str(cost) + ' / ' + 
                  "Training Accuracy: " + str(acc) + ' / ' + 
                  "Validation Accuracy: " + str(val_acc))  

        print('Model is saved!!!')

Epoch: 1 / Batch: 0/800 / Cost: 0.02107138 / Training Accuracy: 1.0 / Validation Accuracy: 0.96
Epoch: 1 / Batch: 16/800 / Cost: 0.007492285 / Training Accuracy: 1.0 / Validation Accuracy: 0.95
Epoch: 1 / Batch: 32/800 / Cost: 0.021728104 / Training Accuracy: 1.0 / Validation Accuracy: 0.95
Epoch: 1 / Batch: 48/800 / Cost: 0.037527107 / Training Accuracy: 1.0 / Validation Accuracy: 0.96
Epoch: 1 / Batch: 64/800 / Cost: 0.0031573616 / Training Accuracy: 1.0 / Validation Accuracy: 0.965
Epoch: 1 / Batch: 80/800 / Cost: 0.003676218 / Training Accuracy: 1.0 / Validation Accuracy: 0.985
Epoch: 1 / Batch: 96/800 / Cost: 0.040438753 / Training Accuracy: 1.0 / Validation Accuracy: 0.985
Epoch: 1 / Batch: 112/800 / Cost: 0.015245602 / Training Accuracy: 1.0 / Validation Accuracy: 0.94
Epoch: 1 / Batch: 128/800 / Cost: 0.03253888 / Training Accuracy: 1.0 / Validation Accuracy: 0.93
Epoch: 1 / Batch: 144/800 / Cost: 0.023627542 / Training Accuracy: 1.0 / Validation Accuracy: 0.945
Epoch: 1 / Batc