## **VGG16 Classification for Cats & Dogs using Transfer Learning.**

In [None]:
# Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from numpy.random import seed
import os

In [None]:
# Deep learning utilities
from keras.preprocessing.image import ImageDataGenerator, load_img
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from keras.applications import VGG16
from keras.models import Model
from keras.layers import Dropout, Flatten, Dense
from keras.layers.convolutional import Conv2D, MaxPooling2D 
from keras import backend as K
from keras import optimizers
from skimage.transform import resize


In [None]:
# Data imported from Google Drive
from google.colab import drive
drive.mount('/content/drive')

Prepare data for training 

In [None]:
# Get filenames with its respective classification and save into a dataframe
filenames = os.listdir("/content/drive/My Drive/train_catsdogs")

categories = []

for f_name in filenames:
    category = f_name.split('.')[0]
    if category == 'dog':
        categories.append(1)
    else:
        categories.append(0)

In [None]:
df = pd.DataFrame({'filename': filenames, 'category': categories})
df.head()

In [None]:
# Number of data per class
df['category'].value_counts().plot.bar()

In [None]:
# Visualize a sample of the traininng data
sample = random.choice(filenames)
image = load_img("/content/drive/My Drive/train_catsdogs/" + sample)
plt.imshow(image)

In [None]:
# Set parameters for images and training
image_size = 224
input_shape = (image_size, image_size, 3)
epochs = 70 
batch_size = 16 
seed(1)

In [None]:
# Keras VGG16 model
model = VGG16(input_shape=input_shape, weights='imagenet', include_top=False)

In [None]:
# Transfer learning
for layer in model.layers:
    layer.trainable = False

In [None]:
# Develop structure in model
x = model.output

x = Conv2D(1, (1,1), activation='relu')(x)
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(1, activation='sigmoid')(x)

model = Model(model.input, x)

In [None]:
# Set the classifiers parameters
model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy'])

In [None]:
# Summary
model.summary()

In [None]:
# Prepare train data
df["category"] = df["category"].replace({0:'cat',1:'dog'})

In [None]:
# Split into train and validation sets
train_df, validate_df = train_test_split(df, test_size=0.10) 

In [None]:
# Set dataframe with categories for training set
train_df = train_df.reset_index()
total_train = train_df.shape[0]
train_df.head()

In [None]:
# Data augmentation in the training set
train_datagen = ImageDataGenerator(rotation_range=15, rescale=1./255, shear_range=0.2, zoom_range=0.2, 
                                   horizontal_flip = True, width_shift_range=0.1, height_shift_range=0.1)
train_generator = train_datagen.flow_from_dataframe(train_df, "/content/drive/My Drive/train_catsdogs", x_col='filename', y_col='category',
                                                   target_size=(image_size, image_size), class_mode='binary', batch_size=batch_size)

In [None]:
# Visualization on data augmentation
example_df = train_df.sample(n=1).reset_index(drop=True)
example_generator = train_datagen.flow_from_dataframe(example_df, "/content/drive/My Drive/train_catsdogs", x_col='filename',
    y_col='category', target_size=(image_size, image_size))
plt.figure(figsize=(12, 12))
for i in range(0, 9):
    plt.subplot(3, 3, i+1)
    for X_batch, Y_batch in example_generator:
        image = X_batch[0]
        plt.imshow(image)
        break
plt.tight_layout()
plt.savefig("/content/drive/My Drive/train_catsdogs/example.jpg")

In [None]:
# Validation set preparation (same as training)
validate_df = validate_df.reset_index()
total_validate = validate_df.shape[0]
validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(validate_df, "/content/drive/My Drive/train_catsdogs", 
    x_col='filename', y_col='category', class_mode='binary', target_size=(image_size, image_size), batch_size=batch_size)

In [None]:
# Training
history = model.fit_generator(train_generator, epochs=epochs, validation_data=validation_generator, 
                              validation_steps=total_validate//batch_size, steps_per_epoch=total_train//batch_size)

In [None]:
# Best accuracy obtained
loss, accuracy = model.evaluate_generator(validation_generator, total_validate//batch_size, workers=12)
print("Accuracy = %f  ;  loss = %f " % (accuracy, loss))

In [None]:
# Save model
model.save("/content/drive/My Drive/vgg16_catdog_trained.h5")

In [None]:
# Testing the model
test_filenames = os.listdir("/content/drive/My Drive/test_catsdogs")
test_df = pd.DataFrame({
    'filename': test_filenames
})
nb_samples = test_df.shape[0]

In [None]:
test_df.head()

In [None]:
# Prepare the images for testing. In this case, we don't have the category of images
test_gen = ImageDataGenerator(rescale=1./255)
test_generator = test_gen.flow_from_dataframe(test_df, "/content/drive/My Drive/test_catsdogs", x_col='filename',
    y_col=None, class_mode=None, batch_size=batch_size, target_size=(image_size, image_size), shuffle=False)

In [None]:
# Predictions
predict = model.predict_generator(test_generator, steps=np.ceil(nb_samples/batch_size))
threshold = 0.5
test_df['category'] = np.where(predict > threshold, 1,0)

In [None]:
# For testing images provided by the competition
sample_test = test_df.sample(n=9).reset_index()
sample_test.head()
plt.figure(figsize=(12, 12))
for index, row in sample_test.iterrows():
    filename = row['filename']
    category = row['category']
    img = load_img("/content/drive/My Drive/test_catsdogs/"+filename, target_size=(256, 256))
    plt.subplot(3, 3, index+1)
    plt.imshow(img)
    plt.xlabel(filename + '(' + "{}".format(category) + ')')
plt.tight_layout()
plt.savefig("/content/drive/My Drive/test_catsdogs/prediction2.jpg")

In [None]:
# Using a second testing set
# Testing the model
test_filenames_sec = os.listdir("/content/drive/My Drive/test2_catsdogs")
test_df_sec = pd.DataFrame({
    'filename': test_filenames_sec
})
nb_samples_sec = test_df_sec.shape[0]

In [None]:
test_gen_sec = ImageDataGenerator(rescale=1./255)
test_generator_sec = test_gen_sec.flow_from_dataframe(test_df_sec, "/content/drive/My Drive/test2_catsdogs", x_col='filename',
    y_col=None, class_mode=None, batch_size=batch_size, target_size=(image_size, image_size), shuffle=False)

In [None]:
# Predictions
#test_df_sec = test_df.head(6)
predict_sec = model.predict_generator(test_generator_sec, steps=np.ceil(nb_samples_sec/batch_size))
test_df_sec['category'] = np.where(predict_sec > threshold, 1,0)

In [None]:
# Visualizations for extra testing data
#test_df_sec = test_df.head(6)

sample_test_sec = test_df_sec.sample(n=9).reset_index()
sample_test_sec.head()
plt.figure(figsize=(12, 12))
for index, row in sample_test_sec.iterrows():
    filename = row['filename']
    category = row['category']
    img = load_img("/content/drive/My Drive/test2_catsdogs/"+filename, target_size=(256, 256))
    plt.subplot(3, 3, index+1)
    plt.imshow(img)
    plt.xlabel(filename + '(' + "{}".format(category) + ')')
plt.tight_layout()
plt.savefig("/content/drive/My Drive/test2_catsdogs/pred2.jpg")