In [1]:
import h5py
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
from keras.models import *
from keras.layers import *
from keras.preprocessing.image import *
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot, plot_model

import os
np.random.seed(2018)


  from ._conv import register_converters as _register_converters
  (fname, cnt))
  (fname, cnt))
Using TensorFlow backend.
  return f(*args, **kwds)


### Read feature：

In [2]:
h5_filelist = ["feature_Xception.h5", "feature_InceptionV3.h5", "feature_InceptionResNetV2.h5"]

X_train = []
X_test = []

for filename in h5_filelist:
    with h5py.File(filename, 'r') as h:
        X_train.append(np.array(h['train']))
        X_test.append(np.array(h['test']))
        y_train = np.array(h['label'])

X_train = np.concatenate(X_train, axis=1)
X_test = np.concatenate(X_test, axis=1)

X_train, y_train = shuffle(X_train, y_train)


In [3]:
print(X_train.shape)

(25000, 5632)


### Model adjustment:

In [44]:
input_tensor = Input(X_train.shape[1:])
x = input_tensor
x = BatchNormalization(axis=1, name='bn_1')(x)
x = Dropout(0.8, name='dropout_1')(x)
x = Dense(1024, activation='relu', name='fc_1')(x)
x = BatchNormalization(axis=1, name='bn_2')(x)
x = Dropout(0.9, name='dropout_2')(x)

x = Dense(1, activation='sigmoid', name='sigmoid')(x)
model = Model(input_tensor, x)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
def identity_block(x, dense_num: int, block_index: int):
    block_name = 'top_block_' + str(block_index) + '_'
    res = x
    x = Dense(dense_num, activation=None, name=block_name + 'fc_1')(x)
    x = BatchNormalization(axis=1, name=block_name + 'bn_1')(x)
    x = Activation('relu')(x)
    x = Dense(dense_num, activation=None, name=block_name + 'fc_2')(x)
    x = BatchNormalization(axis=1, name=block_name + 'bn_2')(x)
    x = add([x, res])
    x = Activation('relu')(x)
    return x


In [81]:
input_tensor = Input(X_train.shape[1:])
x = input_tensor
x = BatchNormalization(axis=1, name='top_bn_1')(x)
x = Dropout(0.7)(x)
x = Dense(1024, activation=None, name='top_fc_1')(x)
x = BatchNormalization(axis=1, name='top_bn_2')(x)
x = Activation('relu')(x)
x = identity_block(x, 1024, block_index=1)
x = Dropout(0.7)(x)
x = Dense(1, activation='sigmoid', name='top_sigmoid')(x)

model = Model(input_tensor, x)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [80]:
model.fit(X_train, y_train, batch_size=256, epochs=10, validation_split=0.2)

Train on 20000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f5e501d6898>

### Train in total training data:

In [82]:
if os.path.isfile('model_weights.h5'):
    os.remove('model_weights.h5')

model.save_weights('model_weights.h5')
SVG(model_to_dot(model, show_shapes=True).create(prog='dot', format='svg'))

In [83]:
model.fit(X_train, y_train, batch_size=256, epochs=10, validation_split=0)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f5e501c66d8>

### Predict:

In [84]:
y_pred = model.predict_generator(X_test, verbose=1)
y_pred = y_pred.clip(min=0.005, max=0.995)




In [85]:
df = pd.read_csv("sampleSubmission.csv")

image_size = (224, 224)
gen = ImageDataGenerator()
test_generator = gen.flow_from_directory("test", image_size, shuffle=False, 
                                         batch_size=16, class_mode=None)

for i, fname in enumerate(test_generator.filenames):
    index = int(fname[fname.rfind('/')+1:fname.rfind('.')])
    df.loc[index-1, ['label']] = y_pred[i]

df.to_csv('pred.csv', index=None)
df.head(10)

Found 12500 images belonging to 1 classes.


Unnamed: 0,id,label
0,1,0.995
1,2,0.995
2,3,0.995
3,4,0.995
4,5,0.005
5,6,0.005
6,7,0.005
7,8,0.005
8,9,0.005
9,10,0.005
