In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
np.random.seed(2018)
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('seaborn-colorblind')
import seaborn as sns
import gc
import os
import time
from tqdm import tqdm_notebook as tqdm

import keras
from keras.preprocessing.image import load_img,img_to_array
from keras.applications import inception_resnet_v2,resnet50,xception
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.applications.inception_resnet_v2 import preprocess_input as incep_v2_pre
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input as res_50_pre
from keras.applications.xception import Xception
from keras.applications.xception import preprocess_input as xcep_pre

from keras.models import Model
from keras.layers import Dense,Dropout
from keras import losses,optimizers,metrics
from sklearn.model_selection import train_test_split

from os import makedirs
from os.path import expanduser,exists,join

Using TensorFlow backend.


### 创建keras根目录

In [2]:
cache_dir = expanduser(join('~','.keras'))
if not exists(cache_dir):
    makedirs(cache_dir)
models_dir = join(cache_dir,'models')
if not exists(models_dir):
    makedirs(models_dir)

将keras的预训练模型copy到keras/models目录中

In [3]:
!cp ../input/keras-pretrained-models/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5 ~/.keras/models
!cp ../input/keras-pretrained-models/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5 ~/.keras/models
!cp ../input/keras-pretrained-models/xception_weights_tf_dim_ordering_tf_kernels_notop.h5 ~/.keras/models

In [4]:
!ls ~/.keras/models

inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5
resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
xception_weights_tf_dim_ordering_tf_kernels_notop.h5


In [5]:
train_folder = "../input/dog-breed-identification/train/"
test_folder = "../input/dog-breed-identification/test/"

In [6]:
train_dogs = pd.read_csv("../input/dog-breed-identification/labels.csv")
test_dogs = pd.read_csv("../input/dog-breed-identification/sample_submission.csv")
#train_dogs = train_dogs.iloc[:100,]
#test_dogs = test_dogs.iloc[:100,]
display(train_dogs.head())
display(test_dogs.head())

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
1,00102ee9d8eb90812350685311fe5890,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
2,0012a730dfa437f5f3613fb75efcd4ce,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
3,001510bc8570bbeee98c8d80c8a95ec1,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
4,001a5f3114548acdefa3d4da05474c2e,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333


In [7]:
train_dogs['image_path'] = train_dogs['id'].apply(lambda x: train_folder+str(x)+'.jpg')
test_dogs['image_path'] = test_dogs['id'].apply(lambda x: test_folder+str(x)+'.jpg')
display(train_dogs['image_path'].head())
display(test_dogs['image_path'].head())

0    ../input/dog-breed-identification/train/000bec...
1    ../input/dog-breed-identification/train/001513...
2    ../input/dog-breed-identification/train/001cdf...
3    ../input/dog-breed-identification/train/00214f...
4    ../input/dog-breed-identification/train/0021f9...
Name: image_path, dtype: object

0    ../input/dog-breed-identification/test/000621f...
1    ../input/dog-breed-identification/test/00102ee...
2    ../input/dog-breed-identification/test/0012a73...
3    ../input/dog-breed-identification/test/001510b...
4    ../input/dog-breed-identification/test/001a5f3...
Name: image_path, dtype: object

### 定义迁移学习特征提取函数

In [8]:
def feat_transfer(image_path_list,image_size,pre_trained_model,preprocessor,batch_size=50):
    for i in tqdm(range(0,len(image_path_list),batch_size)):
        if (i + batch_size) <= len(image_path_list)-1:
            image_paths = image_path_list[i:i+batch_size]
        else:
            image_paths = image_path_list[i:]
        
        images = np.array([img_to_array(load_img(path=image_path,target_size=(image_size,image_size),interpolation='box')) for image_path in image_paths],dtype='float32')
        image_preprocessed = preprocessor(images)
        features_array = pre_trained_model.predict(image_preprocessed,batch_size=batch_size)
        if i == 0:
            features_arrays = features_array
        else:
            features_arrays = np.concatenate([features_arrays,features_array],axis=0)
    return features_arrays

### InceptionResNetV2做特征转换

In [9]:
image_size = 299
Incep_res_v2 = InceptionResNetV2(include_top=False,weights='imagenet',input_shape=(image_size,image_size,3),pooling='avg')
precessor = incep_v2_pre

In [10]:
train_incep_feats = feat_transfer(train_dogs['image_path'].tolist(),image_size,Incep_res_v2,precessor)
test_incep_feats = feat_transfer(test_dogs['image_path'].tolist(),image_size,Incep_res_v2,precessor)

A Jupyter Widget




A Jupyter Widget




In [11]:
del Incep_res_v2

In [12]:
gc.enable()
gc.collect()

359859

### resnet50做特征转换

In [13]:
image_size = 224
Res_50 = ResNet50(include_top=False,weights='imagenet',input_shape=(image_size,image_size,3),pooling='avg')
precessor = res_50_pre

In [14]:
train_res50_feats = feat_transfer(train_dogs['image_path'].tolist(),image_size,Res_50,precessor)
test_res50_feats = feat_transfer(test_dogs['image_path'].tolist(),image_size,Res_50,precessor)

A Jupyter Widget




A Jupyter Widget




In [15]:
del Res_50
gc.enable()
gc.collect()

107893

### Xception做特征转换

In [16]:
image_size = 299
Xcep = Xception(include_top=False,weights='imagenet',input_shape=(image_size,image_size,3),pooling='avg')
precessor = xcep_pre

In [17]:
train_xcep_feats = feat_transfer(train_dogs['image_path'].tolist(),image_size,Xcep,precessor)
test_xcep_feats = feat_transfer(test_dogs['image_path'].tolist(),image_size,Xcep,precessor)

A Jupyter Widget




A Jupyter Widget




In [18]:
del Xcep
gc.enable()
gc.collect()

83027

In [19]:
train_feats = np.concatenate([train_incep_feats,train_res50_feats,train_xcep_feats],axis=1)
test_feats = np.concatenate([test_incep_feats,test_res50_feats,test_xcep_feats],axis=1)
print(train_feats.shape)
print(test_feats.shape)

(10222, 5632)
(10357, 5632)


In [20]:
labels = pd.get_dummies(train_dogs['breed'])
label_names = labels.columns
label_values = labels.values
print(label_values.shape)
display(labels.head())

(10222, 120)


Unnamed: 0,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,beagle,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [21]:
from keras.layers import Dense,Dropout,Input
from keras.models import Model

In [22]:
n_feats = train_feats.shape[1]
n_labels = label_values.shape[1]

In [23]:
inputs = Input(shape=(n_feats,),name='inputs')
x = Dropout(0.5)(inputs)
x = Dense(units=512,activation='selu',kernel_initializer='lecun_normal')(x)
x = Dropout(0.5)(x)
outputs = Dense(units=n_labels,activation='softmax',kernel_initializer='lecun_normal')(x)

model = Model(inputs=inputs,outputs=outputs)
model.compile(loss=losses.categorical_crossentropy,optimizer=optimizers.RMSprop(lr=1e-3),metrics=['acc'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          (None, 5632)              0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 5632)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               2884096   
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 120)               61560     
Total params: 2,945,656
Trainable params: 2,945,656
Non-trainable params: 0
_________________________________________________________________
None


In [24]:
history = model.fit(x=train_feats,y=label_values,batch_size=32,epochs=100,validation_split=0.2,shuffle=True,verbose=1)

Train on 8177 samples, validate on 2045 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100


Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [25]:
model.fit(x=train_feats,y=label_values,batch_size=32,epochs=10,shuffle=True,verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fa401dc50f0>

In [26]:
sub_pred = model.predict(test_feats,batch_size=32,verbose=1)



In [27]:
sub_pred = pd.DataFrame(data=sub_pred,columns=label_names)
sub_pred = pd.concat([test_dogs['id'],sub_pred],axis=1)
display(sub_pred.head())

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,0.0,6.198493e-38,0.0,1.021871e-34,0.0,0.0,0.0,0.0,0.0,...,4.554688e-34,0.0,2.648189e-32,0.0,8.634777e-34,0.0,0.0,0.0,9.714988e-38,2.884459e-38
1,00102ee9d8eb90812350685311fe5890,1.9120609999999998e-38,0.0,6.301164e-34,1.6572669999999998e-36,7.992262e-34,0.0,7.161228e-37,1.829108e-37,2.2193009999999998e-36,...,2.4362350000000002e-29,0.0,2.2824680000000003e-28,5.9562e-35,4.901032e-25,1.406435e-35,1.030736e-24,0.0,1.1971110000000002e-33,9.165062e-34
2,0012a730dfa437f5f3613fb75efcd4ce,0.0,2.4833169999999997e-38,9.86446e-34,0.0,0.0,0.0,0.0,0.0,0.0,...,6.499187999999999e-38,0.0,2.760938e-32,0.0,3.1484950000000003e-27,0.0,0.0,0.0,0.0,4.894014e-37
3,001510bc8570bbeee98c8d80c8a95ec1,9.818113e-23,0.0,3.409144e-34,1.066576e-22,0.0,3.188479e-34,0.0,2.837722e-33,8.177265e-35,...,3.857262e-28,0.0,6.644867e-30,4.4113810000000005e-23,1.7565550000000002e-22,5.706822e-35,1.222892e-28,1.1299819999999999e-26,0.0,3.894204e-30
4,001a5f3114548acdefa3d4da05474c2e,3.814397e-16,1.062198e-23,1.257534e-22,8.670181e-21,1.532103e-34,1.5424239999999998e-26,0.0,1.5425620000000001e-22,5.362913e-31,...,1.03019e-29,9.365861e-31,4.1046119999999995e-26,1.12388e-22,1.564591e-21,5.535877e-30,1.3331060000000001e-27,1.092481e-22,1.576876e-27,5.6417830000000005e-25


In [28]:
sub_pred.to_csv("transfer_learning_&_ensemble.csv",index=False)