[View in Colaboratory](https://colab.research.google.com/github/gowtham91m/cats-and-dogs-classification/blob/master/cats_and_dogs_classificatoin.ipynb)

In [0]:
%%capture
!pip install kaggle

from zipfile import ZipFile
import io, cv2, fnmatch, shutil, os, getpass, subprocess, random

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.optimizers import SGD
from keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

import numpy as np
from time import time
from glob import glob
from sklearn.utils import class_weight

In [0]:
os.chdir('/content')
if 'kaggle.txt' not in os.listdir('/content'):
  from google.colab import files
  downloaded = files.upload()

In [36]:
with open('kaggle.txt') as f: key = f.read()
if 'cats_dogs' not in os.listdir('/content'):
  os.mkdir('/content/cats_dogs')
os.chdir('/content/cats_dogs')
os.environ['KAGGLE_USERNAME']="gowham91m"
os.environ['KAGGLE_KEY']=key

if 'cats_dogs' in os.listdir('/content'):shutil.rmtree('/content/cats_dogs')
!kaggle competitions download -c dogs-vs-cats

Downloading sampleSubmission.csv to /content/cats_dogs
  0% 0.00/86.8k [00:00<?, ?B/s]
100% 86.8k/86.8k [00:00<00:00, 22.1MB/s]
Downloading test1.zip to /content/cats_dogs
 94% 255M/271M [00:02<00:00, 96.4MB/s]
100% 271M/271M [00:02<00:00, 111MB/s] 
Downloading train.zip to /content/cats_dogs
 99% 537M/543M [00:04<00:00, 110MB/s]
100% 543M/543M [00:04<00:00, 131MB/s]


In [37]:
!unzip -q -o train.zip
!unzip -q -o test1.zip

cat_pattern = '*cat.*.jpg'
dog_pattern = '*dog.*.jpg'

images = glob('/content/cats_dogs/train/*.jpg', recursive=True)
cats = fnmatch.filter(images,cat_pattern)
dogs = fnmatch.filter(images,dog_pattern)

os.listdir('/content/cats_dogs')
if 'data' not in os.listdir('/content/cats_dogs'):os.mkdir('/content/cats_dogs/data')
if 'train' not in os.listdir('/content/cats_dogs/data'):os.mkdir('/content/cats_dogs/data/train')
if 'dogs' not in os.listdir('/content/cats_dogs/data/train'):os.mkdir('/content/cats_dogs/data/train/dogs')
if 'cats' not in os.listdir('/content/cats_dogs/data/train'):os.mkdir('/content/cats_dogs/data/train/cats')
  
if 'val' not in os.listdir('/content/cats_dogs/data'):os.mkdir('/content/cats_dogs/data/val')
if 'dogs' not in os.listdir('/content/cats_dogs/data/val'):os.mkdir('/content/cats_dogs/data/val/dogs')
if 'cats' not in os.listdir('/content/cats_dogs/data/val'):os.mkdir('/content/cats_dogs/data/val/cats')

train_dogs_path = '/content/cats_dogs/data/train/dogs'
train_cats_path = '/content/cats_dogs/data/train/cats'

val_dogs_path = '/content/cats_dogs/data/val/dogs'
val_cats_path = '/content/cats_dogs/data/val/cats'

for file in cats: shutil.copy2(file, train_cats_path)
for file in dogs: shutil.copy2(file, train_dogs_path)
  
  
# split train date into train and validation
train_len = len(os.listdir('/content/cats_dogs/data/train/dogs'))
val_len = train_len * 0.3
val_dogs = random.sample(os.listdir(train_dogs_path),int(val_len))
val_cats = random.sample(os.listdir(train_cats_path),int(val_len))


for file in val_dogs:
  try: shutil.move(os.path.join(train_dogs_path,file), val_dogs_path)
  except: pass
for file in val_cats:
  try: shutil.move(os.path.join(train_cats_path,file), val_cats_path)
  except: pass
  
print(len(os.listdir(train_cats_path)))
print(len(os.listdir(val_cats_path)))

print(len(os.listdir(train_dogs_path)))
print(len(os.listdir(val_dogs_path)))

print('total train samples ', len(os.listdir(train_cats_path)) + len(os.listdir(train_dogs_path)))
print('total train samples ', len(os.listdir(val_cats_path)) + len(os.listdir(val_dogs_path)))

8750
3750
8750
3750
total train samples  17500
total train samples  7500


In [39]:
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)
val_datagen = ImageDataGenerator(rescale=1./255)

batch_size=64
train_generator = train_datagen.flow_from_directory(
        '/content/cats_dogs/data/train',  # this is the target directory
        target_size=(150, 150),  # all images will be resized to 150x150
        batch_size=batch_size,
        class_mode='binary') 

validation_generator = val_datagen.flow_from_directory(
        '/content/cats_dogs/data/val',
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode='binary')

Found 17500 images belonging to 2 classes.
Found 7500 images belonging to 2 classes.


In [22]:
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=( 150, 150, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

start_time = time()
model.fit_generator(
        train_generator,
        #steps_per_epoch=18631 // batch_size,
        epochs=8,
        validation_data=validation_generator,
        #validation_steps=10119 // batch_size
        )
model.save_weights('first_try.h5')

print('time taken ',time()-start_time)

Found 17500 images belonging to 2 classes.
Found 7500 images belonging to 2 classes.
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
time taken  1376.3395261764526


In [40]:
from keras.applications.vgg16 import VGG16, preprocess_input

img_rows, img_cols, img_channel = 150, 150, 3
base_model = VGG16(weights='imagenet'
                   ,include_top=False, input_shape=(img_rows, img_cols, img_channel), classes = 2)

add_model = Sequential()
add_model.add(Flatten(input_shape=base_model.output_shape[1:]))
add_model.add(Dense(128, activation='relu'))
add_model.add(Dense(1, activation='sigmoid'))

vgg_model = Model(inputs=base_model.input, outputs=add_model(base_model.output))


for layer in base_model.layers:
    layer.trainable = False
    
    
vgg_model.compile(loss='binary_crossentropy', optimizer=SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy'])
start_time = time()
vgg_model.fit_generator(
        train_generator,
        epochs=16,
        validation_data=validation_generator,
        #class_weight = class_weights,
        callbacks=[ModelCheckpoint('VGG16-transferlearning.model', monitor='val_acc', save_best_only=True)])
vgg_model.save_weights('vgg16.h5')

print('time taken ',time()-start_time)

Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16
time taken  3267.4616470336914
