In [2]:
import numpy as np
import pandas as pd
import os, cv2, random, time, shutil, csv
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from tqdm import tqdm
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.layers import BatchNormalization, Dense, GlobalAveragePooling2D, Lambda, Dropout, InputLayer, Input
from keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img

In [3]:
train_dir = '../input/dog-breed-identification/train'
test_dir = '../input/dog-breed-identification/test'

In [4]:
labels = pd.read_csv('../input/dog-breed-identification/labels.csv')

In [5]:
dog_breeds = sorted(list(set(labels['breed'])))

In [6]:
class_to_num = dict(zip(dog_breeds, range(len(dog_breeds))))
class_to_num

{'affenpinscher': 0,
 'afghan_hound': 1,
 'african_hunting_dog': 2,
 'airedale': 3,
 'american_staffordshire_terrier': 4,
 'appenzeller': 5,
 'australian_terrier': 6,
 'basenji': 7,
 'basset': 8,
 'beagle': 9,
 'bedlington_terrier': 10,
 'bernese_mountain_dog': 11,
 'black-and-tan_coonhound': 12,
 'blenheim_spaniel': 13,
 'bloodhound': 14,
 'bluetick': 15,
 'border_collie': 16,
 'border_terrier': 17,
 'borzoi': 18,
 'boston_bull': 19,
 'bouvier_des_flandres': 20,
 'boxer': 21,
 'brabancon_griffon': 22,
 'briard': 23,
 'brittany_spaniel': 24,
 'bull_mastiff': 25,
 'cairn': 26,
 'cardigan': 27,
 'chesapeake_bay_retriever': 28,
 'chihuahua': 29,
 'chow': 30,
 'clumber': 31,
 'cocker_spaniel': 32,
 'collie': 33,
 'curly-coated_retriever': 34,
 'dandie_dinmont': 35,
 'dhole': 36,
 'dingo': 37,
 'doberman': 38,
 'english_foxhound': 39,
 'english_setter': 40,
 'english_springer': 41,
 'entlebucher': 42,
 'eskimo_dog': 43,
 'flat-coated_retriever': 44,
 'french_bulldog': 45,
 'german_shepherd'

In [7]:
def process_img_data(data_dir, labels, img_size=(224, 224, 3)):
    
    image_names = labels['id']
    image_labels = labels['breed']
    
    n_data_size = len(image_names)

    x = np.zeros([ n_data_size, img_size[0], img_size[1], img_size[2]], dtype=np.uint8)
    y = np.zeros([ n_data_size, 1 ], dtype=np.uint8)
        
    for i in tqdm(range(n_data_size)):
        image_name = image_names[i]
        img_dir = os.path.join(data_dir, image_name + '.jpg')
        img_pixels = load_img(img_dir, target_size=img_size)
        x[i] = img_pixels
        
        image_breed = image_labels[i]
        y[i] = class_to_num[image_breed]
    
    y = to_categorical(y)
    ind = np.random.permutation(n_data_size)
    
    x = x[ind]
    y = y[ind]
    
    print(x.shape, y.shape)
    
    return x, y

In [8]:
img_size = (331, 331, 3)
x, y = process_img_data(train_dir, labels, img_size)

100%|██████████| 10222/10222 [00:47<00:00, 215.02it/s]


(10222, 331, 331, 3) (10222, 120)


In [9]:
def extract_features(model_name, data_processor, input_size, data):
    input_layer = Input(input_size)
    processor = Lambda(data_processor)(input_layer)
    base_model = model_name(weights='imagenet', include_top=False, input_shape=input_size)(processor)
    avg = GlobalAveragePooling2D()(base_model)
    feature_extractor = Model(inputs=input_layer, outputs=avg)
    return feature_extractor.predict(data, batch_size=128)

In [10]:
from keras.applications.inception_v3 import InceptionV3, preprocess_input

inception_preprocessor = preprocess_input
inception_features = extract_features(InceptionV3, inception_preprocessor, img_size, x)

In [11]:
from keras.applications.xception import Xception, preprocess_input

xception_preprocessor = preprocess_input
xception_features = extract_features(Xception, xception_preprocessor, img_size, x)

In [12]:
del x

In [13]:
final_features = np.concatenate([
    inception_features, xception_features
], axis=-1)

final_features.shape

(10222, 4096)

In [14]:
model = keras.models.Sequential([
    InputLayer(final_features.shape[1:]),
    Dropout(0.5),
    Dense(120, activation='softmax')
])

In [15]:
from keras.callbacks import EarlyStopping
stop_callback = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [16]:
model.fit(final_features, y, batch_size=128, epochs=2, validation_split=0.2, callbacks=[stop_callback])

NameError: name 'y' is not defined

In [None]:
def images_to_array2(data_dir, labels_dataframe, img_size = (224,224,3)):
    '''
    Do same as images_to_array but omit some unnecessary steps for test data.
    '''
    images_names = labels_dataframe['id']
    data_size = len(images_names)
    X = np.zeros([data_size, img_size[0], img_size[1], 3], dtype=np.uint8)
    
    for i in tqdm(range(data_size)):
        image_name = images_names[i]
        img_dir = os.path.join(data_dir, image_name+'.jpg')
        img_pixels = tf.keras.preprocessing.image.load_img(img_dir, target_size=img_size)
        X[i] = img_pixels
        
    print('Ouptut Data Size: ', X.shape)
    return X

sample_df = pd.read_csv('/kaggle/input/dog-breed-identification/sample_submission.csv')

test_data = images_to_array2(test_dir, sample_df, img_size)

In [None]:
inception_features = extract_features(InceptionV3, inception_preprocessor, img_size, test_data)
xception_features = extract_features(Xception, xception_preprocessor, img_size, test_data)

test_features = np.concatenate([inception_features,
                                 xception_features,
                               ],axis=-1)
print('Final feature maps shape', test_features.shape)

In [None]:
del test_data

In [None]:
y_pred = model.predict(test_features, batch_size=128)

In [None]:
for b in dog_breeds:
    sample_df[b] = y_pred[:,class_to_num[b]]
sample_df.to_csv('pred.csv', index=None)