[View in Colaboratory](https://colab.research.google.com/github/gauravbansal98/Cat-vs-Dog-Classifier-Using-CNN/blob/master/Cat_vs_Dog_Classifier_using_VGG16.ipynb)

In [29]:
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse
from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

gpg: keybox '/tmp/tmp92m1l23v/pubring.gpg' created
gpg: /tmp/tmp92m1l23v/trustdb.gpg: trustdb created
gpg: key AD5F235DF639B041: public key "Launchpad PPA for Alessandro Strada" imported
gpg: Total number processed: 1
gpg:               imported: 1
··········


In [0]:
!mkdir -p drive
!google-drive-ocamlfuse drive

In [0]:
import os
os.chdir("drive/Cat vs Dog Classifier")
import tensorflow as tf
import numpy as np
import cv2
from random import shuffle

In [0]:
TRAIN_DIR = 'training_data'
IMG_SIZE = 224
data_to_save_dir = 'data'

In [0]:
def label_img(img):
    word_label = img.split('.')[-3]
    if word_label == 'cat': return [1,0]
    elif word_label == 'dog': return [0,1]

In [0]:
import inspect
import os

import numpy as np
import tensorflow as tf
import time

VGG_MEAN = [103.939, 116.779, 123.68]


class Vgg16:
    def __init__(self, vgg16_npy_path=None):
        if vgg16_npy_path is None:
            path = inspect.getfile(Vgg16)
            path = os.path.abspath(os.path.join(path, os.pardir))
            path = os.path.join(path, "vgg16.npy")
            vgg16_npy_path = path
            print(path)

        self.data_dict = np.load(vgg16_npy_path, encoding='latin1').item()
        print("npy file loaded")

    def build(self, rgb):
        """
        load variable from npy to build the VGG
        :param rgb: rgb image [batch, height, width, 3] values scaled [0, 1]
        """

        start_time = time.time()
        print("build model started")
        #rgb_scaled = rgb * 255.0

        # Convert RGB to BGR
        red, green, blue = tf.split(axis=3, num_or_size_splits=3, value=rgb)
        assert red.get_shape().as_list()[1:] == [224, 224, 1]
        assert green.get_shape().as_list()[1:] == [224, 224, 1]
        assert blue.get_shape().as_list()[1:] == [224, 224, 1]
        bgr = tf.concat(axis=3, values=[
            blue - VGG_MEAN[0],
            green - VGG_MEAN[1],
            red - VGG_MEAN[2],
        ])
        assert bgr.get_shape().as_list()[1:] == [224, 224, 3]

        self.conv1_1 = self.conv_layer(bgr, "conv1_1")
        self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2")
        self.pool1 = self.max_pool(self.conv1_2, 'pool1')

        self.conv2_1 = self.conv_layer(self.pool1, "conv2_1")
        self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2")
        self.pool2 = self.max_pool(self.conv2_2, 'pool2')

        self.conv3_1 = self.conv_layer(self.pool2, "conv3_1")
        self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2")
        self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3")
        self.pool3 = self.max_pool(self.conv3_3, 'pool3')

        self.conv4_1 = self.conv_layer(self.pool3, "conv4_1")
        self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2")
        self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3")
        self.pool4 = self.max_pool(self.conv4_3, 'pool4')

        self.conv5_1 = self.conv_layer(self.pool4, "conv5_1")
        self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2")
        self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3")
        self.pool5 = self.max_pool(self.conv5_3, 'pool5')

        self.fc6 = self.fc_layer(self.pool5, "fc6")
        assert self.fc6.get_shape().as_list()[1:] == [4096]
        self.relu6 = tf.nn.relu(self.fc6)

        self.fc7 = self.fc_layer(self.relu6, "fc7")
        self.relu7 = tf.nn.relu(self.fc7)

        self.data_dict = None
        print(("build model finished: %ds" % (time.time() - start_time)))
        
        return self.relu7

    def avg_pool(self, bottom, name):
        return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)

    def max_pool(self, bottom, name):
        return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)

    def conv_layer(self, bottom, name):
        with tf.variable_scope(name):
            filt = self.get_conv_filter(name)

            conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME')

            conv_biases = self.get_bias(name)
            bias = tf.nn.bias_add(conv, conv_biases)

            relu = tf.nn.relu(bias)
            return relu

    def fc_layer(self, bottom, name):
        with tf.variable_scope(name):
            shape = bottom.get_shape().as_list()
            dim = 1
            for d in shape[1:]:
                dim *= d
            x = tf.reshape(bottom, [-1, dim])

            weights = self.get_fc_weight(name)
            biases = self.get_bias(name)

            # Fully connected layer. Note that the '+' operation automatically
            # broadcasts the biases.
            fc = tf.nn.bias_add(tf.matmul(x, weights), biases)

            return fc

    def get_conv_filter(self, name):
        return tf.constant(self.data_dict[name][0], name="filter")

    def get_bias(self, name):
        return tf.constant(self.data_dict[name][1], name="biases")

    def get_fc_weight(self, name):
        return tf.constant(self.data_dict[name][0], name="weights")

In [0]:
X = tf.placeholder('float', [None, 224, 224, 3])

In [8]:
vgg = Vgg16('vgg16.npy')
class_output = vgg.build(X)

npy file loaded
build model started
build model finished: 2s


In [0]:
training_data = []
i = 0
with tf.Session() as sess:
  sess.run(tf.global_variables_initializer())
  for img in os.listdir(TRAIN_DIR):
    print(i)
    i = i + 1
    label = label_img(img)
    path = os.path.join(TRAIN_DIR,img)
    img = cv2.imread(path)
    img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
    img = np.reshape(np.array(img), (1, 224, 224, 3))
    img = sess.run(class_output, feed_dict = {X:img})
    training_data.append([np.array(img),np.array(label)])
  shuffle(training_data)
  np.save('bottleneck_layer.npy', training_data)

In [0]:
training_data = np.load('bottleneck_layer.npy')
training_x = []
training_y = []
testing_x = []
testing_y = []
for i in range(23552):
  training_x.append(training_data[i][0])
  training_y.append(training_data[i][1])
  
for i in range(1148):
  testing_x.append(training_data[23552+i][0])
  testing_y.append(training_data[23552+i][1])

In [0]:
tf.reset_default_graph()
hm_epochs = 10


X = tf.placeholder(dtype = tf.float32, shape = [None, 4096,], name = 'input_labels')
y = tf.placeholder(dtype = tf.int32, shape = [None, 2], name = 'output_labels')
learning_rate = tf.placeholder('float')
weights = {'weight' : tf.Variable(tf.random_normal(shape = [4096, 2])), 'bias' : tf.Variable(tf.random_normal((1, 2)))}

In [37]:
prediction = tf.add(tf.matmul(X, weights['weight']), weights['bias'])

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = prediction, labels = y))

optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())



Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.



In [58]:
for i in range(100):
  epoch_x = training_x
  epoch_x = np.reshape(epoch_x, (-1, 4096))
  epoch_y = training_y
  o, c = sess.run([optimizer, cost], feed_dict = {X : epoch_x, y : epoch_y, learning_rate : .00001})
  epoch_loss = c

  print('epoch' , i, 'completed out of ', hm_epochs, 'loss ', epoch_loss)

epoch 0 completed out of  10 loss  1.5383178
epoch 1 completed out of  10 loss  1.5383064
epoch 2 completed out of  10 loss  1.5382949
epoch 3 completed out of  10 loss  1.5382833
epoch 4 completed out of  10 loss  1.538272
epoch 5 completed out of  10 loss  1.5382607
epoch 6 completed out of  10 loss  1.5382491
epoch 7 completed out of  10 loss  1.5382376
epoch 8 completed out of  10 loss  1.5382261
epoch 9 completed out of  10 loss  1.5382147
epoch 10 completed out of  10 loss  1.5382032
epoch 11 completed out of  10 loss  1.5381914
epoch 12 completed out of  10 loss  1.5381798
epoch 13 completed out of  10 loss  1.5381682
epoch 14 completed out of  10 loss  1.5381566
epoch 15 completed out of  10 loss  1.538145
epoch 16 completed out of  10 loss  1.5381334
epoch 17 completed out of  10 loss  1.5381218
epoch 18 completed out of  10 loss  1.5381101
epoch 19 completed out of  10 loss  1.5380982
epoch 20 completed out of  10 loss  1.5380867
epoch 21 completed out of  10 loss  1.538075
e

epoch 65 completed out of  10 loss  1.5375404
epoch 66 completed out of  10 loss  1.5375278
epoch 67 completed out of  10 loss  1.5375153
epoch 68 completed out of  10 loss  1.5375028
epoch 69 completed out of  10 loss  1.5374901
epoch 70 completed out of  10 loss  1.5374776
epoch 71 completed out of  10 loss  1.537465
epoch 72 completed out of  10 loss  1.5374523
epoch 73 completed out of  10 loss  1.5374397
epoch 74 completed out of  10 loss  1.5374272
epoch 75 completed out of  10 loss  1.5374144
epoch 76 completed out of  10 loss  1.5374016
epoch 77 completed out of  10 loss  1.537389
epoch 78 completed out of  10 loss  1.5373763
epoch 79 completed out of  10 loss  1.5373634
epoch 80 completed out of  10 loss  1.5373509
epoch 81 completed out of  10 loss  1.5373378
epoch 82 completed out of  10 loss  1.537325
epoch 83 completed out of  10 loss  1.5373123
epoch 84 completed out of  10 loss  1.5372994
epoch 85 completed out of  10 loss  1.5372865
epoch 86 completed out of  10 loss  1

In [59]:
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))

accuracy = tf.reduce_mean(tf.cast(correct, 'float'))

print('Accuracy:',sess.run(accuracy, feed_dict = {X:np.reshape(testing_x, (-1, 4096)), y:testing_y}))


Accuracy: 0.9703833
