# Cats Vs Dogs

In [1]:
import numpy as np 
import pandas as pd 
import os
print(os.listdir("../input"))

['train', 'test']


In [2]:
import cv2                 
import numpy as np         
import os                  
from random import shuffle
from tqdm import tqdm      

train_dir = '../input/train'
test_dir = '../input/test'

In [3]:
def get_label(img):
    label = img.split('.')[0]
    if label == 'cat': 
        return [1,0]
    elif label == 'dog': 
        return [0,1]

**Y and test_y as one hot array.**

**Preprocessing and collecting train and test data.**

In [4]:
def making_train_data():
    training_data = []
    
    for img in tqdm(os.listdir(train_dir)):
        label = get_label(img)
        path = os.path.join(train_dir,img)
        img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (50,50))
        training_data.append([np.array(img),np.array(label)])
        
    shuffle(training_data)
    np.save('train_data.npy', training_data)
    return training_data

In [5]:
def making_test_data():
    testing_data = []
    
    for img in tqdm(os.listdir(test_dir)):
        path = os.path.join(test_dir , img)
        img_num = img.split('.')[0]
        img = cv2.imread(path , cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img , (50,50))
        testing_data.append([np.array(img), img_num])
        
    shuffle(testing_data)
    np.save('test_data.npy', testing_data)
    return testing_data

In [6]:
train_data = making_train_data()

100%|██████████| 25000/25000 [01:17<00:00, 321.78it/s]


In [7]:
import numpy
from keras.datasets import cifar10
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.constraints import maxnorm
from keras.optimizers import SGD
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.utils import np_utils
from keras import backend as K
K.set_image_dim_ordering('th')

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [8]:
train = train_data[0:20000]
test = train_data[20000:25000]

In [9]:
print(len(train) , len(test))

20000 5000


In [18]:
X = np.array([i[0] for i in train]).reshape(-1,1,50,50)
Y = [i[1] for i in train]

test_x = np.array([i[0] for i in test]).reshape(-1,1,50,50)
test_y = [i[1] for i in test]

**ImageDataGenerator to avoid overfitting the model with training data.**

In [19]:
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
        featurewise_center=False,  
        samplewise_center=False,  
        featurewise_std_normalization=False,  
        samplewise_std_normalization=False,  
        zca_whitening=False,  
        rotation_range=10,  
        zoom_range = 0.0,  
        width_shift_range=0.1,  
        height_shift_range=0.1,  
        horizontal_flip=False, 
        vertical_flip=False)  

datagen.fit(X)

**Using ReduceLROnPlateau to reduce the learning rate after certain epochs with callbacks.**

In [20]:
from keras.callbacks import ReduceLROnPlateau
lr_reduce = ReduceLROnPlateau(monitor='val_acc', factor=0.1, epsilon=0.0001, patience=1, verbose=1)



In [21]:
Y = np.asarray(Y)
Y.reshape(len(Y) , 2)

array([[0, 1],
       [0, 1],
       [0, 1],
       ...,
       [1, 0],
       [0, 1],
       [1, 0]])

In [22]:
test_y = np.asarray(test_y)
test_y.reshape(len(test_y) , 2)

array([[1, 0],
       [0, 1],
       [1, 0],
       ...,
       [0, 1],
       [0, 1],
       [0, 1]])

In [23]:
test_x = test_x.reshape(-1, 1, 50, 50)

In [24]:
test_x = test_x / 255
X = X / 255

In [25]:
from keras.layers import Dense , Activation
from keras.layers import Dropout
from keras.layers import Flatten
from keras.constraints import maxnorm
from keras.optimizers import SGD
from keras.layers import Conv2D , BatchNormalization
from keras.layers import MaxPooling2D
from keras.utils import np_utils
from keras import backend as K
K.set_image_dim_ordering('th')

In [78]:
def swish_activation(x):
    return (K.sigmoid(x) * x)

model = Sequential()

model.add(Conv2D(32, (3, 3), activation='relu', padding="same", input_shape=(1,50,50)))
model.add(Conv2D(32, (3, 3), padding="same", activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu', padding="same"))
model.add(Conv2D(64, (3, 3), padding="same", activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(96, (3, 3), dilation_rate=(2, 2), activation='relu', padding="same"))
model.add(Conv2D(96, (3, 3), padding="valid", activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), dilation_rate=(2, 2), activation='relu', padding="same"))
model.add(Conv2D(128, (3, 3), padding="valid", activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64, activation=swish_activation))
model.add(Dropout(0.4))
model.add(Dense(2 , activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_59 (Conv2D)           (None, 32, 50, 50)        320       
_________________________________________________________________
conv2d_60 (Conv2D)           (None, 32, 50, 50)        9248      
_________________________________________________________________
max_pooling2d_30 (MaxPooling (None, 32, 25, 25)        0         
_________________________________________________________________
conv2d_61 (Conv2D)           (None, 64, 25, 25)        18496     
_________________________________________________________________
conv2d_62 (Conv2D)           (None, 64, 25, 25)        36928     
_________________________________________________________________
max_pooling2d_31 (MaxPooling (None, 64, 12, 12)        0         
_________________________________________________________________
conv2d_63 (Conv2D)           (None, 96, 12, 12)        55392     
__________

In [79]:
batch_size = 128
epochs = 20

model.compile(loss='binary_crossentropy', optimizer='adam' , metrics=['accuracy'])
steps_per_epoch = len(train_data) // batch_size
validation_steps = len((test_x, test_y)) // batch_size

In [80]:
history = model.fit_generator(datagen.flow(X, Y, batch_size=batch_size),
                    steps_per_epoch=X.shape[0] // batch_size,
                    callbacks=[lr_reduce],
                    validation_data=(test_x, test_y),
                    epochs = epochs, verbose = 2)

Epoch 1/20
 - 15s - loss: 0.6923 - acc: 0.5042 - val_loss: 0.6888 - val_acc: 0.4936
Epoch 2/20
 - 13s - loss: 0.6821 - acc: 0.5622 - val_loss: 0.6638 - val_acc: 0.6023
Epoch 3/20
 - 13s - loss: 0.6474 - acc: 0.6266 - val_loss: 0.6047 - val_acc: 0.6751
Epoch 4/20
 - 13s - loss: 0.6114 - acc: 0.6651 - val_loss: 0.5596 - val_acc: 0.7155
Epoch 5/20
 - 13s - loss: 0.5688 - acc: 0.7070 - val_loss: 0.5109 - val_acc: 0.7480
Epoch 6/20
 - 13s - loss: 0.5372 - acc: 0.7328 - val_loss: 0.4811 - val_acc: 0.7640
Epoch 7/20
 - 13s - loss: 0.4782 - acc: 0.7704 - val_loss: 0.4526 - val_acc: 0.7904
Epoch 8/20
 - 13s - loss: 0.4410 - acc: 0.7945 - val_loss: 0.4365 - val_acc: 0.7935
Epoch 9/20
 - 13s - loss: 0.4108 - acc: 0.8122 - val_loss: 0.3799 - val_acc: 0.8251
Epoch 10/20
 - 13s - loss: 0.3820 - acc: 0.8278 - val_loss: 0.3600 - val_acc: 0.8349
Epoch 11/20
 - 13s - loss: 0.3651 - acc: 0.8346 - val_loss: 0.3503 - val_acc: 0.8390
Epoch 12/20
 - 13s - loss: 0.3373 - acc: 0.8506 - val_loss: 0.3689 - val_a

# Gaining a validation accuracy of 87.15 % with a self network architecture laid down.

# VGG-16 gave around 88.3 % as validation accuracy.

score = model.evaluate(test_x, test_y, verbose=0)
print('valid loss:', score[0])
print('valid accuracy:', score[1])

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

**The Graphs are laid separately.**

In [None]:
test_data = making_test_data()

In [None]:
with open('submission_file.csv','w') as f:
    f.write('id,label\n')
            
with open('submission_file.csv','a') as f:
    for data in tqdm(test_data):
        img_num = data[1]
        img_data = data[0]
        orig = img_data
        data = img_data.reshape(1,1,50,50)
        model_out = model.predict([data])[0]
        f.write('{},{}\n'.format(img_num,model_out[1]))

**'submission_file' as the final submission file for the predictions in the test_data.**