In [1]:
import cv2
import numpy as np
from tqdm import tqdm
import pandas as pd

In [2]:
df = pd.read_csv('labels.csv')
df.head()

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


In [3]:
n = len(df)
breed = set(df['breed'])
n_class = len(breed)
class_to_num = dict(zip(breed, range(n_class)))
num_to_class = dict(zip(range(n_class), breed))

In [4]:
width = 299
X = np.zeros((n, width, width, 3), dtype=np.uint8)
y = np.zeros((n, n_class), dtype=np.uint8)
for i in tqdm(range(n)):
    X[i] = cv2.resize(cv2.imread('train/%s.jpg' % df['id'][i]), (width, width))
    y[i][class_to_num[df['breed'][i]]] = 1

100%|███████████████████████████████████| 10222/10222 [00:41<00:00, 249.03it/s]


In [5]:
from keras.layers import *
from keras.models import *
from keras.applications import *
from keras.optimizers import *
from keras.regularizers import *
from keras.applications.inception_v3 import preprocess_input
from keras.callbacks import ModelCheckpoint

Using TensorFlow backend.


In [6]:
# define get_features function to extract features using pre-trained CNNs
def get_features(MODEL, data=X):
    cnn_model = MODEL(include_top=False, input_shape=(width, width, 3), weights='imagenet')

    inputs = Input((width, width, 3))
    x = inputs
    x = Lambda(preprocess_input, name='preprocessing')(x)
    x = cnn_model(x)
    x = GlobalAveragePooling2D()(x)
    cnn_model = Model(inputs, x)

    features = cnn_model.predict(data, batch_size=64, verbose=1)
    return features

In [7]:
inception_features = get_features(InceptionV3, X)
xception_features = get_features(Xception, X)
resnet_features = get_features(ResNet50, X)
inresv2_features = get_features(InceptionResNetV2, X)



In [8]:
# combine all bottleneck features into one for training
features = np.concatenate([inception_features, xception_features, resnet_features, inresv2_features], axis=-1)

In [9]:
# set up our model and train with the bottleneck features from above
# save weights when a better model is produced using the callbacks functionality
inputs = Input(features.shape[1:])
x = inputs
x = Dropout(0.5)(x)
x = Dense(n_class, activation='softmax')(x)
model = Model(inputs, x)
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
checkpointer = ModelCheckpoint(filepath='weights.best.from_scratch_with_4_predictors.hdf5', verbose=1, save_best_only=True)
h = model.fit(features, y, batch_size=128, epochs=100, callbacks=[checkpointer], validation_split=0.1)

Train on 9199 samples, validate on 1023 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100


Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100


Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100


Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100


Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100


Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100


Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100


Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100


Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100


Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100


Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100


Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100


Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100


Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100


Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100


Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100


Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100


Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100


Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100


Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [10]:
# set up to read the image files for the test set and pair them with their correct labels
df2 = pd.read_csv('sample_submission.csv')

n_test = len(df2)
X_test = np.zeros((n_test, width, width, 3), dtype=np.uint8)
for i in tqdm(range(n_test)):
    X_test[i] = cv2.resize(cv2.imread('test/%s.jpg' % df2['id'][i]), (width, width))

100%|███████████████████████████████████| 10357/10357 [00:39<00:00, 261.83it/s]


In [11]:
# using the get_features function to extract bottleneck features from the test set
inception_t_features = get_features(InceptionV3, X_test)
xception_t_features = get_features(Xception, X_test)
resnet_t_features = get_features(ResNet50, X_test)
inresv2_t_features = get_features(InceptionResNetV2, X_test)



In [12]:
# combine all bottleneck features into one for testing
features_test = np.concatenate([inception_t_features, xception_t_features, resnet_t_features, inresv2_t_features], axis=-1)

In [13]:
# make predictions with our trained model
y_pred = model.predict(features_test, batch_size=128)

In [14]:
# export predictions to a csv file
for b in breed:
    df2[b] = y_pred[:,class_to_num[b]]

df2.to_csv('new_predictions.csv', index=None)