In [None]:
import numpy as np
np.random.seed(19906)
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from keras.preprocessing import image
import cv2
import os
import h5py
%matplotlib inline
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.callbacks import EarlyStopping
from keras.models import *
from keras.layers import *
from keras.layers.core import Dropout
from keras.optimizers import Adam, SGD, RMSprop
from keras.regularizers import l2
from keras.applications.inception_v3 import InceptionV3, preprocess_input as inceptionv3_preinput
from keras.applications.xception import Xception, preprocess_input as xception_preinput
from keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input as inception_resnet_preinput
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

In [None]:
train_dir = "data/imgs/train2"
val_dir = "data/imgs/val2"
test_dir = "data/imgs/test1"
saved_weights_dir = "saved_weights"

加载特征向量

In [None]:
premodels = [
    "bottleneck_InceptionV3.h5",
    "bottleneck_Xception.h5",
    "bottleneck_InceptionResNetV2.h5"
]

X_train = []
X_valid = []

for filename in premodels:
    with h5py.File(os.path.join(saved_weights_dir, filename), "r") as h:
        X_train.append(np.array(h["train"]))
        X_valid.append(np.array(h["valid"]))
        y_train = np.array(h["label"])
        y_valid = np.array(h["valid_label"])
        
X_train = np.concatenate(X_train, axis=1)
X_valid = np.concatenate(X_valid, axis=1)

X_train, y_train = shuffle(X_train, y_train)
X_valid, y_valid = shuffle(X_valid, y_valid)

构建模型

In [None]:
input_tensor = Input(X_train.shape[1:])
x = input_tensor
x = Dropout(0.5)(x)
x = Dense(10, activation='softmax')(x)
mix_model = Model(input_tensor, x)

op = Adam(lr=0.00001)
mix_model.compile(optimizer=op, loss='categorical_crossentropy', metrics=['accuracy'])

开始训练

In [None]:
epochs = 10
batch_size = 128

history = mix_model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_valid,y_valid))

保存模型

In [None]:
save_file = os.path.join(saved_weights_dir, "mixed_model.h5")
print("model will save at:", save_file)
mix_model.save(save_file)
print("save model successed")

In [None]:
# 绘制图型
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.title('Training and validation loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.legend(['train', 'test'], loc='upper left')

plt.subplot(1, 2, 2)
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Training and validation acc')
plt.ylabel('acc')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')

plt.show()

开始预测

加载测试集特征向量

In [None]:
test_premodels = [
    "bottleneck_test_InceptionV3.h5",
    "bottleneck_test_Xception.h5",
    "bottleneck_test_InceptionResNetV2.h5"
]

X_test = []

for filename in test_premodels:
    with h5py.File(os.path.join(saved_weights_dir, filename), 'r') as h:
        X_test.append(np.array(h['test']))
        
X_test = np.concatenate(X_test, axis=1)

test_datagen = ImageDataGenerator()
test_generator = test_datagen.flow_from_directory(test_dir, (299, 299), shuffle=False, 
                                         batch_size=128, class_mode=None)

预测并输出可提交kaggle的结果文件

In [None]:
sub_df = pd.read_csv("data/sample_submission.csv")

y_preds = mix_model.predict(X_test, verbose=1)
y_preds = y_preds.clip(min=0.005, max=0.995)
print("y_pred shape {}".format(y_preds.shape))

for i, fname in enumerate(test_generator.filenames):
    y_pred = y_preds[i]
    for k, c in enumerate(y_pred):
        sub_df.at[i, 'c'+str(k)] = c

print(sub_df.head())

sub_df.to_csv('data/pred.csv', index=None)
print("predict done.")