## 导入模型

In [1]:
import h5py
import numpy as np
from sklearn.utils import shuffle
np.random.seed(2018)

X_train = []
X_test = []

for filename in ["feature_resnet50.h5", "feature_xception.h5", "feature_inception_v3.h5"]:
    with h5py.File(filename, 'r') as h:
        X_train.append(np.array(h['train']))
        X_test.append(np.array(h['test']))
        y_train = np.array(h['label'])

X_train = np.concatenate(X_train, axis=1)
X_test = np.concatenate(X_test, axis=1)

X_train, y_train = shuffle(X_train, y_train)

  from ._conv import register_converters as _register_converters


## 构建模型

In [2]:
from keras.models import *
from keras.layers import *

input_tensor = Input(X_train.shape[1:])
x = input_tensor
x = Dropout(0.5)(x)
x = Dense(1, activation='sigmoid')(x)
model = Model(input_tensor, x)

model.compile(optimizer='adadelta',
              loss='binary_crossentropy',
              metrics=['accuracy'])

Using TensorFlow backend.
  (fname, cnt))
  (fname, cnt))


## 模型可视化

In [2]:
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

SVG(model_to_dot(model, show_shapes=True).create(prog='dot', format='svg'))

NameError: name 'model' is not defined

## 训练

每次epoch保存较好的模型权重进行下一步的训练

In [4]:
checkpointer = ModelCheckpoint(filepath='bast_weights.h5', verbose=1, save_best_only=True, validation_split=0.2)
model.fit(x_train, y_train, batch_size=128, epochs=20, verbose=0, validation_data=(X_train, y_train), 
          callbacks=[checkpointer])
model.save()
# model.fit(X_train, y_train, batch_size=128, nb_epoch=8, validation_split=0.2)

  if __name__ == '__main__':


Train on 20000 samples, validate on 5000 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7f54262545f8>

In [5]:
# 保存模型
model.save('merged_model.h5')

In [6]:
# 模型预测
y_pred = model.predict(X_test, verbose=1)
y_pred = y_pred.clip(min=0.005, max=0.995)



In [8]:
# 测试集预测
import os
import pandas as pd
from keras.preprocessing.image import *

# 获得当前目录
current_dir = os.getcwd()
df = pd.read_csv("sample_submission.csv")

image_size = (224, 224)
gen = ImageDataGenerator()
test_generator = gen.flow_from_directory(current_dir + "/data/test", image_size, shuffle=False, 
                                         batch_size=16, class_mode=None)

for i, fname in enumerate(test_generator.filenames):
    index = int(fname[fname.rfind('/')+1:fname.rfind('.')])
    df.set_value(index-1, 'label', y_pred[i])

df.to_csv('merged_pred.csv', index=None)
df.head(10)

Found 12500 images belonging to 1 classes.




Unnamed: 0,id,label
0,1,0.995
1,2,0.995
2,3,0.995
3,4,0.995
4,5,0.005
5,6,0.005
6,7,0.005
7,8,0.005
8,9,0.005
9,10,0.005
