In [1]:
import numpy as np
import pandas as pd 
import keras
from keras.applications.inception_v3 import preprocess_input
from keras.models import Model
from keras.layers import Dense, Dropout, Flatten, Input, Lambda, GlobalAveragePooling2D
from keras.preprocessing import image

import os
from tqdm import tqdm

Using TensorFlow backend.


In [2]:
from keras.applications import xception
from keras.applications import inception_v3, resnet50

In [3]:
data_dir = os.getcwd()
df_train = pd.read_csv('labels.csv')
df_test = pd.read_csv('sample_submission.csv')

In [4]:
df_train.head(10)

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever
5,002211c81b498ef88e1b40b9abf84e1d,bedlington_terrier
6,00290d3e1fdd27226ba27a8ce248ce85,bedlington_terrier
7,002a283a315af96eaea0e28e7163b21b,borzoi
8,003df8b8a8b05244b1d920bb6cf451f9,basenji
9,0042188c895a2f14ef64a918ed9c7b64,scottish_deerhound


In [5]:
target_series = pd.Series(df_train['breed'])
one_hot = pd.get_dummies(target_series, sparse=True)

In [6]:
one_hot_labels = np.asarray(one_hot)

In [7]:
def read_img(img_id, train_or_test, size):
    """Read and resize image.
    # Arguments
        img_id: string
        train_or_test: string 'train' or 'test'.
        size: resize the original image.
    # Returns
        Image as numpy array.
    """
    img = image.load_img(os.path.join(data_dir, train_or_test, '%s.jpg' % img_id), target_size=size)
    #img = image.img_to_array(img)
    return img

In [8]:
IM_SIZE = 299

In [9]:
x_train = np.zeros((len(df_train), IM_SIZE, IM_SIZE, 3), dtype=np.uint8)
y_train = np.zeros((one_hot_labels.shape), dtype=np.uint8)
for i, img_id in tqdm(enumerate(df_train['id'])):
    img = read_img(img_id, 'train', (IM_SIZE, IM_SIZE))
    x_train[i] = img
    y_train[i] = one_hot_labels[i]
    
print('Train Images shape: {} size: {:,}'.format(x_train.shape, x_train.size))

10222it [00:54, 187.26it/s]

Train Images shape: (10222, 299, 299, 3) size: 2,741,571,066





In [10]:
print(y_train.shape, x_train.shape)

(10222, 120) (10222, 299, 299, 3)


In [11]:
num_class = y_train.shape[1]

### Extract Xception and Inception bottleneck features

In [12]:
def get_features(MODEL, data=x_train):
    cnn_model = MODEL(include_top=False, input_shape=(IM_SIZE, IM_SIZE, 3), weights='imagenet')
    
    inputs = Input((IM_SIZE, IM_SIZE, 3))
    x = inputs
    x = Lambda(preprocess_input, name='preprocessing')(x)
    x = cnn_model(x)
    x = GlobalAveragePooling2D()(x)
    cnn_model = Model(inputs, x)

    features = cnn_model.predict(data, batch_size=64, verbose=1)
    return features

In [13]:
inception_features = get_features(inception_v3.InceptionV3, x_train)
xception_features = get_features(xception.Xception, x_train)




In [32]:
resnet_features = get_features(resnet50.ResNet50, x_train)



In [33]:
features = np.concatenate([inception_features, xception_features, resnet_features], axis=-1)

### Model training

In [55]:
sgd = keras.optimizers.SGD(lr=0.01, momentum=0.9, decay=1e-2)
callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss',
                                          patience=10, verbose=1),
            keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1,
                              patience=3, verbose=1)]

In [56]:
inputs = Input(features.shape[1:])
x = inputs
x = Dropout(0.5)(x)
x = Dense(num_class, activation='softmax')(x)
model = Model(inputs, x)

In [57]:
model.compile(loss='categorical_crossentropy', optimizer=sgd,
              metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_20 (InputLayer)        (None, 6144)              0         
_________________________________________________________________
dropout_6 (Dropout)          (None, 6144)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 120)               737400    
Total params: 737,400
Trainable params: 737,400
Non-trainable params: 0
_________________________________________________________________


In [58]:
model.fit(features, y_train, batch_size=128, epochs=150, validation_split=0.1, verbose=1, callbacks=callbacks)

Train on 9199 samples, validate on 1023 samples
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150

<keras.callbacks.History at 0x7f7d00684f98>

### Testing

In [44]:
x_test = np.zeros((len(df_test), IM_SIZE, IM_SIZE, 3), dtype='float32')
for i, img_id in tqdm(enumerate(df_test['id'])):
    img = read_img(img_id, 'test', (IM_SIZE, IM_SIZE))
    x_test[i] = img
    
print('Test Images shape: {} size: {:,}'.format(x_test.shape, x_test.size))

10357it [00:28, 364.76it/s]

Test Images shape: (10357, 299, 299, 3) size: 2,777,778,471





In [59]:
test_x_features = get_features(xception.Xception, x_test)
test_i_features = get_features(inception_v3.InceptionV3, x_test)
test_resnet_features = get_features(resnet50.ResNet50, x_test)
test_features = np.concatenate([test_i_features, test_x_features, test_resnet_features], axis=-1)



In [60]:
y_pred = model.predict(test_features, batch_size=128)

In [61]:
sub = pd.DataFrame(y_pred)
col_names = one_hot.columns.values
sub.columns = col_names

sub.insert(0, 'id', df_test['id'])
sub.head(10)

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,5.1e-05,3.3e-05,2.4e-05,1e-05,1.8e-05,1.9e-05,1.9e-05,9e-06,9e-06,...,2.3e-05,3.2e-05,2.1e-05,1.3e-05,1.7e-05,1.3e-05,1.7e-05,9e-06,5.3e-05,3.4e-05
1,00102ee9d8eb90812350685311fe5890,2.7e-05,2.4e-05,4.1e-05,2.7e-05,5.1e-05,0.000168,3.3e-05,3.5e-05,3.6e-05,...,3.3e-05,3.1e-05,3e-05,5.9e-05,5.4e-05,5.3e-05,0.00043,2.6e-05,6.9e-05,2.1e-05
2,0012a730dfa437f5f3613fb75efcd4ce,3.4e-05,0.011641,0.000245,0.000221,0.000103,0.000139,6.2e-05,7.5e-05,0.000235,...,0.000118,0.000185,0.00034,0.000509,0.000321,0.001896,4.9e-05,0.000225,0.000253,0.000153
3,001510bc8570bbeee98c8d80c8a95ec1,0.001762,0.00017,6.9e-05,2.4e-05,0.000155,0.00015,1.7e-05,0.000137,0.000114,...,0.00012,4.8e-05,5.2e-05,6.9e-05,4.1e-05,2.1e-05,2.9e-05,0.000195,8e-06,7.3e-05
4,001a5f3114548acdefa3d4da05474c2e,0.04906,0.000555,0.000343,3.9e-05,0.000276,0.000225,0.000328,0.000218,0.000192,...,0.000461,0.000537,0.000462,0.000298,0.000199,0.0001,0.000268,0.000449,0.000432,0.001543
5,00225dcd3e4d2410dd53239f95c0352f,0.000653,0.013143,0.000793,0.00414,0.000384,0.000649,0.000586,0.000864,0.000297,...,0.054726,0.000659,0.000468,0.000428,0.000272,0.000489,0.000256,0.00025,0.004086,0.000621
6,002c2a3117c2193b4d26400ce431eebd,3.5e-05,5.3e-05,0.000109,5.6e-05,4.8e-05,2.4e-05,0.986311,6.7e-05,1.2e-05,...,2.7e-05,2.9e-05,0.000107,3.3e-05,2e-05,3.2e-05,4.3e-05,1.2e-05,0.000134,0.001218
7,002c58d413a521ae8d1a5daeb35fc803,5.4e-05,5e-05,7.9e-05,3.7e-05,5.2e-05,0.00012,9.1e-05,3.1e-05,4.4e-05,...,0.000108,3.4e-05,3.9e-05,6.9e-05,3.9e-05,8.3e-05,0.001569,3.6e-05,0.00012,5.4e-05
8,002f80396f1e3db687c5932d7978b196,7e-06,3.7e-05,1.7e-05,3.1e-05,1.1e-05,1.4e-05,4e-06,8e-06,3e-06,...,4e-06,3e-06,2.1e-05,1e-05,2.2e-05,6e-06,6e-06,4.2e-05,4.6e-05,4e-06
9,0036c6bcec6031be9e62a257b1c3c442,0.000194,0.000369,0.000362,6.6e-05,0.000194,8.2e-05,0.000124,4.1e-05,0.000688,...,0.000178,9.5e-05,0.000367,8.1e-05,0.000172,0.000632,0.000196,5.1e-05,0.000171,0.000107


In [62]:
sub.to_csv('inc_exc_submission1.csv', index=False)

In [63]:
sub.tail()

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
10352,ffeda8623d4eee33c6d1156a2ecbfcf8,0.000158,0.002164,0.000474,0.000282,0.000255,0.000304,0.000174,0.000192,0.00061,...,0.007343,0.000584,0.000219,0.00063,0.000501,0.000504,0.000113,0.000489,0.000698,0.000153
10353,fff1ec9e6e413275984966f745a313b0,2.1e-05,3.3e-05,7.2e-05,4.6e-05,0.00028,2e-05,3.8e-05,2.7e-05,0.00015,...,2.1e-05,3e-05,0.006166,7.4e-05,0.972087,2.3e-05,1.4e-05,0.000121,5.9e-05,6.5e-05
10354,fff74b59b758bbbf13a5793182a9bbe4,7.7e-05,0.000123,0.001822,4.5e-05,3e-05,9e-05,0.000164,0.000141,5.4e-05,...,0.000171,0.000101,0.000122,0.000132,4.3e-05,0.000125,7.1e-05,6.7e-05,0.000157,7e-05
10355,fff7d50d848e8014ac1e9172dc6762a3,0.000298,9.9e-05,3.6e-05,1.7e-05,7.8e-05,4.1e-05,8e-05,3.4e-05,3.7e-05,...,0.000866,4.7e-05,8.2e-05,5.8e-05,1.8e-05,1.5e-05,0.00013,3e-05,0.000138,0.000225
10356,fffbff22c1f51e3dc80c4bf04089545b,0.000119,0.001478,0.000398,0.00013,0.000224,0.000302,8.5e-05,8.4e-05,8.3e-05,...,5.9e-05,7.6e-05,0.000145,0.000233,0.000214,6.5e-05,0.0001,0.001715,0.000367,5.9e-05
