In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os, cv2, random, time, shutil, csv
import tensorflow as tf
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from tqdm import tqdm
np.random.seed(42)
%matplotlib inline 

import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.layers import BatchNormalization, Dense, GlobalAveragePooling2D, Lambda, Dropout, InputLayer, Input
from keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img
from keras import Sequential
from keras.layers import Dense, Dropout, InputLayer, Lambda, Input

Using TensorFlow backend.


In [2]:
#Data Paths
train_dir = '/kaggle/input/dog-breed-identification/train'
test_dir = '/kaggle/input/dog-breed-identification/test'

In [3]:
#Read train labels.
labels_dataframe = pd.read_csv('/kaggle/input/dog-breed-identification/labels.csv')
#Read sample_submission file to be modified by pridected labels.
sample_df = pd.read_csv('/kaggle/input/dog-breed-identification/sample_submission.csv')
#Incpect labels_dataframe.
labels_dataframe.head(5)

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


In [4]:
#Create list of alphabetically sorted labels.
dog_breeds = sorted(list(set(labels_dataframe['breed'])))
n_classes = len(dog_breeds)

In [5]:
#Map each label string to an integer label.
class_to_num = dict(zip(dog_breeds, range(n_classes)))

In [6]:
def images_to_array(data_dir, labels_dataframe, img_size = (224,224,3)):

    images_names = labels_dataframe['id']
    images_labels = labels_dataframe['breed']
    data_size = len(images_names)
    #initailize output arrays.
    X = np.zeros([data_size, img_size[0], img_size[1], img_size[2]], dtype=np.uint8)
    y = np.zeros([data_size,1], dtype=np.uint8)
    #read data and lables.
    for i in tqdm(range(data_size)):
        image_name = images_names[i]
        img_dir = os.path.join(data_dir, image_name+'.jpg')
        img_pixels = load_img(img_dir, target_size=img_size)
        X[i] = img_pixels
        
        image_breed = images_labels[i]
        y[i] = class_to_num[image_breed]
    
    #One hot encoder
    y = to_categorical(y)
    #shuffle    
    ind = np.random.permutation(data_size)
    X = X[ind]
    y = y[ind]
    print('Ouptut Data Size: ', X.shape)
    print('Ouptut Label Size: ', y.shape)
    return X, y

In [7]:
#img_size chosen to be 331 to suit the used architectures.
img_size = (331,331,3)
X, y = images_to_array(train_dir, labels_dataframe, img_size)

100%|██████████| 10222/10222 [01:01<00:00, 167.16it/s]


Ouptut Data Size:  (10222, 331, 331, 3)
Ouptut Label Size:  (10222, 120)


In [8]:
def get_features(model_name, data_preprocessor, input_size, data):
    base_model = model_name(weights='imagenet', include_top=False,
                            input_shape=input_size)

    model = Sequential()
    model.add(InputLayer(input_shape = input_size))
    model.add(Lambda(preprocess_input))
    model.add(base_model)
    model.add(GlobalAveragePooling2D())
    
    feature_maps = model.predict(data)
    
    print('Feature maps shape: ', feature_maps.shape)
    return feature_maps

In [9]:
# Extract features using InceptionV3 as extractor.
from keras.applications.inception_v3 import InceptionV3, preprocess_input
inception_preprocessor = preprocess_input
inception_features = get_features(InceptionV3,inception_preprocessor,img_size, X)

Feature maps shape:  (10222, 2048)


In [10]:
# Extract features using VGG16 as extractor.
from keras.applications.vgg16 import VGG16, preprocess_input
vgg16_preprocessor = preprocess_input
vgg16_features = get_features(VGG16,vgg16_preprocessor,img_size, X)

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Feature maps shape:  (10222, 512)


In [11]:
# Extract features using ResNet50 as extractor.
from keras.applications.resnet50 import ResNet50, preprocess_input
resnet_preprocessor = preprocess_input
resnet_features = get_features(ResNet50,resnet_preprocessor,img_size, X)



Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Feature maps shape:  (10222, 2048)


In [12]:
del X

In [13]:
from keras.callbacks import EarlyStopping
EarlyStop_callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
my_callback=[EarlyStop_callback]

In [14]:
model = Sequential()
model.add(InputLayer(input_shape = (vgg16_features.shape[1:] )))
model.add(Dropout(0.7))
model.add(Dense(120, activation = 'softmax'))
    
model.compile(loss = 'categorical_crossentropy', optimizer ='Adam', metrics = ['accuracy'])
h=model.fit(vgg16_features, y,
            batch_size=128,
            epochs=60,
            validation_split=0.1,
            callbacks=my_callback)

Train on 9199 samples, validate on 1023 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60


In [15]:
model = Sequential()
model.add(InputLayer(input_shape = (resnet_features.shape[1:] )))
model.add(Dropout(0.7))
model.add(Dense(120, activation = 'softmax'))
    
model.compile(loss = 'categorical_crossentropy', optimizer ='Adam', metrics = ['accuracy'])
h=model.fit(resnet_features, y,
            batch_size=128,
            epochs=60,
            validation_split=0.1,
            callbacks=my_callback)

Train on 9199 samples, validate on 1023 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60


In [16]:
model = Sequential()
model.add(InputLayer(input_shape = (inception_features.shape[1:] )))
model.add(Dropout(0.7))
model.add(Dense(120, activation = 'softmax'))
    
model.compile(loss = 'categorical_crossentropy', optimizer ='Adam', metrics = ['accuracy'])
h=model.fit(inception_features, y,
            batch_size=128,
            epochs=60,
            validation_split=0.1,
            callbacks=my_callback)

Train on 9199 samples, validate on 1023 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60


In [17]:
def images_to_array2(data_dir, labels_dataframe, img_size = (224,224,3)):
   
    images_names = labels_dataframe['id']
    data_size = len(images_names)
    X = np.zeros([data_size, img_size[0], img_size[1], 3], dtype=np.uint8)
    
    for i in tqdm(range(data_size)):
        image_name = images_names[i]
        img_dir = os.path.join(data_dir, image_name+'.jpg')
        img_pixels = tf.keras.preprocessing.image.load_img(img_dir, target_size=img_size)
        X[i] = img_pixels
        
    print('Ouptut Data Size: ', X.shape)
    return X

test_data = images_to_array2(test_dir, sample_df, img_size)

100%|██████████| 10357/10357 [00:48<00:00, 213.53it/s]

Ouptut Data Size:  (10357, 331, 331, 3)





In [None]:
#Extract test data features.
inception_features = get_features(InceptionV3, inception_preprocessor, img_size, test_data)

In [None]:
#Free up some space.
del test_data

In [None]:
#Predict test labels given test data features.
y_pred = model.predict(inception_features, batch_size=128)

In [None]:
#Create submission file
for b in dog_breeds:
    sample_df[b] = y_pred[:,class_to_num[b]]
sample_df.to_csv('pred.csv', index=None)