### Implementation of Keras Xception using google Collab
This script was used to generate 4 different submission files
1. Xception with 5 epoch - adam optimizer
2. Xception with 10 epoch - adam optimizer
4. Xception with 10 epoch - sgd optimizer
4. Xception with 20 epoch - adam optimizer

Setting up drive in google collab using kaggle API token

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"guptapiyush340","key":"62df6a18f42c7fa85bf789546fb4c6a4"}'}

In [0]:
!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!ls ~/.kaggle
!chmod 600 /root/.kaggle/kaggle.json

kaggle.json


In [0]:
!kaggle competitions download -c dogs-vs-cats-redux-kernels-edition

Downloading test.zip to /content
 91% 246M/271M [00:01<00:00, 164MB/s]
100% 271M/271M [00:01<00:00, 169MB/s]
Downloading train.zip to /content
100% 543M/544M [00:05<00:00, 64.8MB/s]
100% 544M/544M [00:05<00:00, 103MB/s] 
Downloading sample_submission.csv to /content
  0% 0.00/111k [00:00<?, ?B/s]
100% 111k/111k [00:00<00:00, 105MB/s]


In [None]:
!mkdir data && unzip test.zip -d data/
!unzip train.zip -d data/

Importing required libraries

In [0]:
# xception model used for transfer learning on the dogs and cats dataset
import sys
from keras.models import load_model
from matplotlib import pyplot
from keras.utils import to_categorical
from keras.applications.vgg16 import VGG16
from keras.applications.xception import Xception
from keras.models import Model
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K
from keras.models import Sequential, Model, model_from_json
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization, merge, Input
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D, AveragePooling2D, GlobalAveragePooling2D
from keras.utils.data_utils import get_file
from keras.utils import plot_model,np_utils
from keras.preprocessing import sequence,image
from keras import optimizers
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import cross_val_score, train_test_split
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from mpl_toolkits.mplot3d import Axes3D
from sklearn import metrics
import numpy as np
import pandas as pd
import math
import warnings
import random
import os
import cv2
import tensorflow as tf
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')
#%matplotlib inline
plt.style.use("ggplot")

In [0]:
# organize dataset into a useful structure
from os import makedirs
from os import listdir
from shutil import copyfile
from random import seed
from random import random
# create directories
dataset_home = 'dataset_dogs_vs_cats/'
subdirs = ['train/', 'test/']
for subdir in subdirs:
	# create label subdirectories
	labeldirs = ['dogs/', 'cats/']
	for labldir in labeldirs:
		newdir = dataset_home + subdir + labldir
		makedirs(newdir, exist_ok=True)
# seed random number generator
seed(1)
# define ratio of pictures to use for validation
val_ratio = 0.25
# copy training dataset images into subdirectories
src_directory = 'data/train/'
for file in listdir(src_directory):
	src = src_directory + '/' + file
	dst_dir = 'train/'
	if random() < val_ratio:
		dst_dir = 'test/'
	if file.startswith('cat'):
		dst = dataset_home + dst_dir + 'cats/'  + file
		copyfile(src, dst)
	elif file.startswith('dog'):
		dst = dataset_home + dst_dir + 'dogs/'  + file
		copyfile(src, dst)

In [0]:
# define cnn model
def define_model():
	# load model
	model = Xception(include_top=False, input_shape=(299, 299, 3))
	# mark loaded layers as not trainable
	for layer in model.layers:
		layer.trainable = False
	# add new classifier layers
	#flat1 = Flatten()(model.layers[-1].output)
	#class1 = Dense(128, activation='relu', kernel_initializer='he_uniform')(flat1)
	#output = Dense(1, activation='sigmoid')(class1)
	# define new model
	#model = Model(inputs=model.inputs, outputs=output)
 
	x = model.output
	x = GlobalAveragePooling2D()(x)
	# let's add a fully-connected layer
	x = Dense(1024, activation='relu')(x)
	# add dropout
	x = Dropout (0.5)(x)
	# one fully connected layer more
	x = Dense(256, activation='relu')(x)
	# add dropout
	x = Dropout (0.5)(x)
	# one fully connected layer more
	x = Dense(32, activation='relu')(x)
	# and a logistic layer --
	output = Dense(1, activation='sigmoid')(x)

	# this is the model we will train
	model = Model(inputs=model.inputs, outputs=output)

	model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

	# compile model
	#opt = SGD(lr=0.001, momentum=0.9)
	#model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
	return model

# plot diagnostic learning curves
def summarize_diagnostics(history):
	# plot loss
	pyplot.subplot(211)
	pyplot.title('Cross Entropy Loss')
	pyplot.plot(history.history['loss'], color='blue', label='train')
	pyplot.plot(history.history['val_loss'], color='orange', label='test')
	# plot accuracy
	pyplot.subplot(212)
	pyplot.title('Classification Accuracy')
	pyplot.plot(history.history['acc'], color='blue', label='train')
	pyplot.plot(history.history['val_acc'], color='orange', label='test')
	# save plot to file
	filename = sys.argv[0].split('/')[-1]
	pyplot.savefig(filename + '_plot.png')
	pyplot.close()

# run the test harness for evaluating a model
def run_test_harness():
	# define model
	model = define_model()
	# create data generator
	datagen = ImageDataGenerator(featurewise_center=True,rescale=1./255,)
	# specify imagenet mean values for centering
	#datagen.mean = [123.68, 116.779, 103.939]
	# prepare iterator
	train_it = datagen.flow_from_directory('dataset_dogs_vs_cats/train/',
		class_mode='binary', batch_size=64, target_size=(299, 299))
	test_it = datagen.flow_from_directory('dataset_dogs_vs_cats/test/',
		class_mode='binary', batch_size=64, target_size=(299, 299))
	# fit model
	history = model.fit_generator(train_it, steps_per_epoch=len(train_it),
		validation_data=test_it, validation_steps=len(test_it), epochs=20, verbose=1)
	# evaluate model
	_, acc = model.evaluate_generator(test_it, steps=len(test_it), verbose=0)
	print('> %.3f' % (acc * 100.0))
	# learning curves
	summarize_diagnostics(history)

# entry point, run the test harness
run_test_harness()

Found 18697 images belonging to 2 classes.
Found 6303 images belonging to 2 classes.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
> 98.620


In [0]:
# organize dataset into a useful structure
from os import makedirs
from os import listdir
from shutil import copyfile
# create directories
dataset_home = 'finalize_dogs_vs_cats/'
# create label subdirectories
labeldirs = ['dogs/', 'cats/']
for labldir in labeldirs:
	newdir = dataset_home + labldir
	makedirs(newdir, exist_ok=True)
# copy training dataset images into subdirectories
src_directory = 'data/train/'
for file in listdir(src_directory):
	src = src_directory + '/' + file
	if file.startswith('cat'):
		dst = dataset_home + 'cats/'  + file
		copyfile(src, dst)
	elif file.startswith('dog'):
		dst = dataset_home + 'dogs/'  + file
		copyfile(src, dst)

In [0]:
# save the final model to file
from keras.applications.vgg16 import VGG16
from keras.models import Model
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator

# define cnn model
def define_model():
	# load model
	model = Xception(include_top=False, input_shape=(299, 299, 3))
	# mark loaded layers as not trainable
	for layer in model.layers:
		layer.trainable = False
	# add new classifier layers
	#flat1 = Flatten()(model.layers[-1].output)
	#class1 = Dense(128, activation='relu', kernel_initializer='he_uniform')(flat1)
	#output = Dense(1, activation='sigmoid')(class1)
	# define new model
	#model = Model(inputs=model.inputs, outputs=output)
 
	x = model.output
	x = GlobalAveragePooling2D()(x)
	# let's add a fully-connected layer
	x = Dense(1024, activation='relu')(x)
	# add dropout
	x = Dropout (0.5)(x)
	# one fully connected layer more
	x = Dense(256, activation='relu')(x)
	# add dropout
	x = Dropout (0.5)(x)
	# one fully connected layer more
	x = Dense(32, activation='relu')(x)
	# and a logistic layer --
	output = Dense(1, activation='sigmoid')(x)

	# this is the model we will train
	model = Model(inputs=model.inputs, outputs=output)

	model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

	# compile model
	#opt = SGD(lr=0.001, momentum=0.9)
	#model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
	return model

 
# run the test harness for evaluating a model
def run_test_harness():
	model = define_model()
	datagen = ImageDataGenerator(featurewise_center=True,rescale=1./255,)
	train_it = datagen.flow_from_directory('finalize_dogs_vs_cats/',
		class_mode='binary', batch_size=64, target_size=(299, 299))
	model.fit_generator(train_it, steps_per_epoch=len(train_it), epochs=20, verbose=1)
	model.save('xception.h5')


run_test_harness()

Found 25000 images belonging to 2 classes.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [0]:
# Prepare testing data
test_filenames = os.listdir("data/test/")
test_df = pd.DataFrame({
    'filename': test_filenames
})
nb_samples = test_df.shape[0]
batch_size=64

In [0]:
datagen = ImageDataGenerator(featurewise_center=True,rescale=1./255,)

test_generator = datagen.flow_from_dataframe(
    test_df, 
    "data/test/", 
    x_col='filename',
    y_col=None,
    class_mode=None,
    target_size=(299,299),
    batch_size=batch_size,
    shuffle=False)

Found 12500 validated image filenames.


In [0]:
model = load_model('xception.h5')
predict = model.predict_generator(test_generator, steps=np.ceil(nb_samples/batch_size),verbose=1)
test_df['category'] = predict



In [0]:
submission_df = test_df.copy()
submission_df['id'] = submission_df['filename'].str.split('.').str[0]
submission_df['label'] = submission_df['category']
submission_df.drop(['filename', 'category'], axis=1, inplace=True)
submission_df.to_csv('submission.csv', index=False)

The submission file is saved and used to stack with other submissions to calculate final prediction