In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import numpy as np
import random
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

# Prepare Traning Data

In [None]:
filenames = os.listdir("../input/train/train")
categories = []
for filename in filenames:
    category = filename.split('.')[0]
    if category == 'dog':
        categories.append(1)
    else:
        categories.append(0)
        
        
df = pd.DataFrame({'filename':filenames,
                  'category':categories})

df.head()

In [None]:
filenames = os.listdir("../input/train/train")
sample = random.choice(filenames)
image = load_img("../input/train/train/"+sample)
plt.imshow(image)

In [None]:
df['category'].value_counts().plot.bar()

In [None]:
train_df, validation_df = train_test_split(df,test_size = 0.20, random_state = 42)

train_df = train_df.reset_index(drop=True)
validation_df = validation_df.reset_index(drop=True)

total_train = train_df.shape[0]
total_validate = validation_df.shape[0]
batch_size=15

## Build the classifier model
* Build a CNN.CNN has mostly four fucntions:
    * Convolution: Add the first layer which is a convolutional layer. Set the number of filters as 32, the shape of each filter as 3x3 and the input shape and the type of image as 50,50,3 i.e. the input is of a 50x50 RGB image and the activation function as relu.
    * Pooling: Add a pooling layer to reduce the total number of nodes for the upcoming layers. It takes a 2x2 matrix thus giving minimum pixel loss and a precise region where the features are located.
    * Flatten : Flattens the pooled images.
    * Dense : add a fully connected layer to feed the images to the output layer. Set the number of nodes as 256, as its a common practice to use a power of 2 and a rectifier function asthe activation function, relu.

* Define the output layer. Set number of units to 1 as this is a binary classifier and sigmoid as the activation function
* Compile the model. Set adam as the optimizer and binary_crossentropy as the loss fucntion, as this is a binary classifier.

In [None]:
from keras.models import Sequential,model_from_json
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization

In [None]:
image_width = 50
image_height = 50
image_size = (image_width, image_height)
image_channel = 3 # RGB color

In [None]:
classifier1 = Sequential()
classifier1.add(Conv2D(32,(3,3),input_shape = (image_width,image_height,image_channel),activation = 'relu'))
classifier1.add(BatchNormalization())
classifier1.add(MaxPooling2D(pool_size = (2,2)))
                
classifier1.add(Conv2D(64,(3,3),activation = 'relu'))
classifier1.add(BatchNormalization())
classifier1.add(MaxPooling2D(pool_size = (2,2)))
                
classifier1.add(Flatten())
classifier1.add(Dense(256,activation = 'relu'))
classifier1.add(Dense(units = 1, activation = 'sigmoid'))
classifier1.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [None]:
classifier1.summary()

## Fitting the CNN to the images
* Improve the dataset using the ImageDataGenerator method which generate batches of tensor image data with real-time data augmentation. 
    * rescale: rescaling factor. If None or 0, no rescaling is applied, otherwise the data is multiplied by the value provided.
    * shear_range: Shear Intensity
    * zoom_range: Range for random zoom.
    * horizontal_flip: Randomly flip inputs horizontally if true.
* Define the training and test datasets using the flow_from_directory which takes the path to a directory, and generates batches of augmented/normalized data.
    * directory: path to the target directory. It should contain one subdirectory per class.
    * target_size: The dimensions to which all images found will be resized.
    * class_mode: one of "categorical", "binary", "sparse", "input" or None. Determines the type of label arrays that are returned
    * batch_size: size of the batches of data

In [None]:
train_datagen = ImageDataGenerator(
    rotation_range=15,
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1
)


train_generator = train_datagen.flow_from_dataframe(
    train_df, 
    "../input/train/train/", 
    x_col='filename',
    y_col='category',
    target_size=image_size,
    class_mode='binary',
    batch_size=batch_size
)

## Validation Image Generator

In [None]:
validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(
    validation_df, 
    "../input/train/train/", 
    x_col='filename',
    y_col='category',
    target_size=image_size,
    class_mode='binary',
    batch_size=batch_size
)

## Explore the dataset
* Print a preprocessed image from the dataset

In [None]:
x , y  = train_generator.next()
for i in range(0,1):
    random_image = x[i]
    plt.imshow(random_image)
    plt.show()

## Define an earlystopping callback
* Import EarlyStopping - method to stop training when a monitored quantity has stopped improving.
* Define a callback.Set monitor as val_acc, patience as 5 and mode as max so that if val_acc does not improve over 5 epochs, terminate the training process.

In [None]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

earlystop = EarlyStopping(patience=10)

In [None]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=2, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

callbacks = [earlystop, learning_rate_reduction]

## Fit the model
* Invoke the fit_generator to fits the model on data generated batch-by-batch by a Python generator.
    * steps_per_epoch’ holds the number of training images, i.e 8000
    * A single epoch is a single step in training a neural network,set it at 25.
    * callbacks: List of callbacks to apply during training.
    * validation_data: test data
    * validation_steps: Total number of steps (batches of samples) to yield from  validation_data generator before stopping at the end of every epoch. It should typically be equal to the number of samples of your validation dataset divided by the batch size.

In [None]:
history = classifier1.fit_generator(
    train_generator, 
    epochs= 25,
    validation_data=validation_generator,
    validation_steps=total_validate//batch_size,
    steps_per_epoch=total_train//batch_size,
    callbacks=callbacks
)

# Save Model

In [None]:
model_json = classifier1.to_json()
!mkdir Saved_models
with open("Saved_models/cnn_base_model.json","w") as json_file:
    json_file.write(model_json)
    
classifier1.save_weights("Saved_models/cnn_base_model.h5")
print("Saved model to disk")

## Evaluate the model
* Load model from disk.
* Preprocess and feed a random input image to the model for prediction.
* Test the accuracy and loss using the evaluate_generator method.

In [None]:
json_file = open('Saved_models/cnn_base_model.json', 'r')

loaded_classifier_json = json_file.read()

json_file.close()

loaded_classifier = model_from_json(loaded_classifier_json)

loaded_classifier.load_weights("Saved_models/cnn_base_model.h5")
print("Loaded model from disk")

loaded_classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

# Virtualize Training and loss

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))
ax1.plot(history.history['loss'], color='b', label="Training loss")
ax1.plot(history.history['val_loss'], color='r', label="validation loss")
ax1.set_xticks(np.arange(1, 25, 1))
ax1.set_yticks(np.arange(0, 1, 0.1))

ax2.plot(history.history['acc'], color='b', label="Training accuracy")
ax2.plot(history.history['val_acc'], color='r',label="Validation accuracy")
ax2.set_xticks(np.arange(1, 25, 1))

legend = plt.legend(loc='best', shadow=True)
plt.tight_layout()
plt.show()



In [None]:
test_image = load_img('../input/test1/test1/1.jpg', target_size = (50, 50))
plt.imshow(test_image)
plt.show()

In [None]:
test_image = img_to_array(test_image)
test_image = np.expand_dims(test_image, axis = 0)
result = loaded_classifier.predict(test_image)

In [None]:
if result[0][0] == 1:
    prediction = 'This is a dog'
else:
    prediction = 'This is a cat'

print (prediction)

# Prepare Testing Data

In [None]:
test_filenames = os.listdir("../input/test1/test1")
test_df = pd.DataFrame({
    'filename': test_filenames
})
nb_samples = test_df.shape[0]


# Create Testing Data Generator

In [None]:
test_gen = ImageDataGenerator(rescale=1./255)
test_generator = test_gen.flow_from_dataframe(
    test_df, 
    "../input/test1/test1/", 
    x_col='filename',
    y_col=None,
    class_mode=None,
    target_size=image_size,
    batch_size=batch_size,
    shuffle=False
)

# Prediction

In [None]:
predict = loaded_classifier.predict_generator(test_generator, steps=np.ceil(nb_samples/batch_size))


As predicted of binary classification result return probability that image likely to be a dog. So we will have threshold 0.5 which mean if predicted value more than 50% it is a dog and under 50% will be a cat.


In [None]:
threshold = 0.5
test_df['probability'] = predict
test_df['category'] = np.where(test_df['probability'] > threshold, 1,0)

In [None]:
test_df['category'].value_counts().plot.bar()

In [None]:
sample_test = test_df.head(18)
sample_test.head()
plt.figure(figsize=(12, 24))
for index, row in sample_test.iterrows():
    filename = row['filename']
    category = row['category']
    probability = row['probability']
    img = load_img("../input/test1/test1/"+filename, target_size=image_size)
    plt.subplot(6, 3, index+1)
    plt.imshow(img)
    plt.xlabel(filename + '(' + "{}".format(category) + ')' '(' + "{}".format(round(probability, 2)) + ')')
plt.tight_layout()
plt.show()
