## 数据预处理

In [1]:
import os
import cv2
import h5py
import numpy as np
import pandas as pd
from keras.applications import Xception, xception
from keras.models import Sequential, Model
from keras.layers import Activation, Dropout, Flatten, Dense, Input, Lambda
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from tqdm import tqdm

Using TensorFlow backend.


In [2]:

nb_train = 0
nb_test = 0
data_path_train = './dataset-mini-30-15/train'
data_path_test = './dataset-mini-30-15/test'
image_names_train = os.listdir(data_path_train)
image_names_test = os.listdir(data_path_test)
# 训练样本数量
nb_train = len(image_names_train)
# 测试样本数量
nb_test = len(image_names_test)
input_shape = (299, 299, 3)
labels = np.zeros(nb_train)
trains = np.zeros((nb_train,) + input_shape, dtype=np.uint8)
tests = np.zeros((nb_test,) + input_shape, dtype=np.uint8)

for i in tqdm(range(nb_train)):
    image_name = image_names_train[i]
    image_path = data_path_train + '/' + image_name
    image = cv2.imread(image_path)
    image = cv2.resize(image, (input_shape[0], input_shape[1]))
    trains[i] = image[:, :, ::-1]
    # cat: 0, dog: 1
    category = 1 if 'dog' in image_name else 0
    labels[i] = category
    
for i in tqdm(range(nb_test)):
    image_name = image_names_test[i]
    image_path = data_path_test + '/' + image_name
    image = cv2.imread(image_path)
    image = cv2.resize(image, (input_shape[0], input_shape[1]))
    tests[i] = image[:, :, ::-1]

print('Training data size: %d' % nb_train)
print('Training data size: %d' % nb_test)

100%|██████████| 30/30 [00:00<00:00, 107.35it/s]
100%|██████████| 15/15 [00:00<00:00, 179.04it/s]

Training data size: 30
Training data size: 15





## 特征提取

In [3]:
x = Input(shape=input_shape)
x = Lambda(xception.preprocess_input)(x)
model = Xception(input_tensor=x, input_shape=input_shape, weights='imagenet', include_top=False, pooling='avg')
bottleneck_features_train = model.predict(trains, batch_size=128)
bottleneck_features_test = model.predict(tests, batch_size=128)

with h5py.File("bottleneck_features.h5", 'w') as h:
    h.create_dataset('trains', data=bottleneck_features_train)
    h.create_dataset('labels', data=labels)
    h.create_dataset('tests', data=bottleneck_features_test)

print('bottleneck features have been wrote to bottleneck_features.h5')

bottleneck features have been wrote to bottleneck_features.h5


## 构建模型

In [5]:
with h5py.File('bottleneck_features.h5','r') as h:
    X_train = np.array(h['trains'])
    y_train = np.array(h['labels'])
    X_test = np.array(h['tests'])


X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, shuffle=True, test_size=0.2, random_state=2018)

x = Input(shape=(X_train.shape[1],))
y = Dropout(0.3)(x)
y = Dense(1, activation='sigmoid')(y)
model = Model(x, y)

model.compile(optimizer='adadelta', loss='binary_crossentropy', metrics=['accuracy'])

print('Model ready!')

Model ready!


## 训练

In [6]:
print(labels.shape)
print(X_train.shape)
print(y_train.shape)
print(X_val.shape)
print(y_val.shape)

model.fit(x=X_train, y=y_train, batch_size=15, epochs=10, validation_data=(X_val, y_val))


(30,)
(24, 2048)
(24,)
(6, 2048)
(6,)
Train on 24 samples, validate on 6 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x12374bc50>

## 测试

In [8]:
y_pred = model.predict(X_test, verbose=1)
y_pred = y_pred.clip(min=0.005, max=0.995)

df = pd.read_csv("sample_submission.csv")

for i in range(nb_test):
    image_name = image_names_test[i]
    index = int(str.split(image_name, '.')[0]) - 1
    df.iat[index, 1] = y_pred[i]

df.to_csv('pred.csv', index=None)
print('The prediction result has been wrote to pred.csv')

The prediction result has been wrote to pred.csv
