# Link: https://www.kaggle.com/c/dogs-vs-cats/data?select=sampleSubmission.csv

# Import libraries

In [None]:
import numpy as np
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator # To expand the size of the dataset
,load_img
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import random
import os

# Defining the Image Properties

In [None]:
Image_Width=128
Image_Height=128
Image_Size=(Image_Width,Image_Height)
Image_Channels=3 #RGB

# Preparing the dataset

In [None]:
filenames=os.listdir("./dogs-vs-cats/train")
# train = ['cat.0.jpg', 'cat.1.jpg', .... , 'dog.98.jpg']
categories=[]
for f_name = 'cat.0.jpg' in filenames:
    category=f_name.split('.')[0] # ['cat', '0', 'jpg']
    # category='cat'
    if category=='dog':
        categories.append(1)
    else:
        categories.append(0)

# Dog - 1
# Cat - 0

In [None]:
df=pd.DataFrame({
    'filename':filenames,
    'category':categories
})

# CNN

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D,# Convolutional Layer
MaxPooling2D, # Pooling Layer
     Dropout, # To Prevent Neural Networks from Overfitting
     Flatten, # Flattening the matrix Eg. From 2D -> 1D
     Dense, # Connected Neural Network
     Activation, # Activation Function
     BatchNormalization # Feature Scaling


model=Sequential()

model.add(Conv2D(32,(3,3),activation='relu',input_shape=(Image_Width,Image_Height,Image_Channels)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(64,(3,3),activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(128,(3,3),activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))


* Dropout

When created, the dropout rate can be specified to the layer as the probability of setting each input to the layer to zero. This is different from the definition of dropout rate from the papers, in which the rate refers to the probability of retaining an input.

Therefore, when a dropout rate of 0.8 is suggested in a paper (retain 80%), this will, in fact, will be a dropout rate of 0.2 (set 20% of inputs to zero).

Below is an example of creating a dropout layer with a 50% chance of setting inputs to zero.

Info: https://towardsdatascience.com/understanding-and-implementing-dropout-in-tensorflow-and-keras-a8a3a02c1bfa



Rate: the parameter 𝑝 which determines the odds(chances) of dropping out neurons. When you did not validate which 𝑝 works best for you with a validation set, recall that it’s best to set it to 𝑟𝑎𝑡𝑒≈0.5 for hidden layers and 𝑟𝑎𝑡𝑒≈0.1 for the input layer (note that 𝑟𝑎𝑡𝑒≈0.1 equals 𝑝≈0.9 – Keras turns the logic upside down, making rate the odds of dropping out rather than keeping neurons!)


In [None]:
model.add(Flatten())
# 1st Hidden Layer
model.add(Dense(512,activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

# 2nd Hidden Layer
model.add(Dense(2,activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

* Activation Function

Link: https://www.analyticsvidhya.com/blog/2020/01/fundamentals-deep-learning-activation-functions-when-to-use-them/

# Model Analysis

In [None]:
model.summary()

# Callbacks & Learning Rate

Link: https://keras.io/api/callbacks/

In [None]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
earlystop = EarlyStopping(patience = 10)
learning_rate_reduction = ReduceLROnPlateau(monitor = 'val_acc',patience = 2,verbose = 1,factor = 0.5,min_lr = 0.00001)
callbacks = [earlystop,learning_rate_reduction]

# Managing the Data

In [None]:
df["category"] = df["category"].replace({0:'cat',1:'dog'})
train_df,validate_df = train_test_split(df,test_size=0.20, random_state=42)
train_df = train_df.reset_index(drop=True)
validate_df = validate_df.reset_index(drop=True)
total_train=train_df.shape[0]
total_validate=validate_df.shape[0]

# Data Generator

In [None]:
batch_size=15

train_datagen = ImageDataGenerator(rotation_range=15,
                                rescale=1./255,
                                shear_range=0.1,
                                zoom_range=0.2,
                                horizontal_flip=True,
                                width_shift_range=0.1,
                                height_shift_range=0.1
                                )
train_generator = train_datagen.flow_from_dataframe(train_df,
                                                 "./dogs-vs-cats/train/",
                                                 x_col='filename',
                                                 y_col='category',
                                                 target_size=Image_Size,
                                                 class_mode='categorical',
                                                 batch_size=batch_size)

validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(
    validate_df, 
    "./dogs-vs-cats/train/", 
    x_col='filename',
    y_col='category',
    target_size=Image_Size,
    class_mode='categorical',
    batch_size=batch_size
)

test_datagen = ImageDataGenerator(rotation_range=15,
                                rescale=1./255,
                                shear_range=0.1,
                                zoom_range=0.2,
                                horizontal_flip=True,
                                width_shift_range=0.1,
                                height_shift_range=0.1)
test_generator = train_datagen.flow_from_dataframe(train_df,
                                                 "./dogs-vs-cats/test/",x_col='filename',y_col='category',
                                                 target_size=Image_Size,
                                                 class_mode='categorical',
                                                 batch_size=batch_size)

# Model Training

In [None]:
epochs=10
history = model.fit_generator(
    train_generator, 
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=total_validate//batch_size,
    steps_per_epoch=total_train//batch_size,
    callbacks=callbacks
)

# Saving the model

In [None]:
model.save("model1_catsVSdogs_10epoch.h5")

# Prediction

In [None]:
sample_test = test_df.head(18)
sample_test.head()
plt.figure(figsize=(12, 24))
for index, row in sample_test.iterrows():
    filename = row['filename']
    category = row['category']
    img = load_img("./dogs-vs-cats/test1/"+filename, target_size=Image_Size)
    plt.subplot(6, 3, index+1)
    plt.imshow(img)
    plt.xlabel(filename + '(' + "{}".format(category) + ')' )
plt.tight_layout()
plt.show()