## 特征提取

In [48]:
from keras.applications import Xception, xception
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, Model
from keras.layers import Activation, Dropout, Flatten, Dense, Input, Lambda
from keras.layers.pooling import GlobalAveragePooling2D
import numpy as np
import h5py
import datetime

print('start')
starttime = datetime.datetime.now()

image_size = (299, 299)
input_shape = image_size + (3,)
x = Input(input_shape)
x = Lambda(xception.preprocess_input)(x)
model = Xception(input_tensor=x, input_shape=input_shape, weights='imagenet', include_top=False, pooling='avg')
print('input shape: ', model.input.shape)
print('output shape: ', model.output.shape)


batch_size = 2
datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

train_generator = datagen.flow_from_directory(
    'dataset-mini10/train',
    target_size=image_size,
    batch_size=batch_size,
    shuffle=False,
    save_to_dir='save', save_prefix='catpre', save_format='jpeg')
print('train_generator.samples:', train_generator.samples)
print('train_generator.classes:', train_generator.classes)
bottleneck_features_train = model.predict_generator(train_generator, train_generator.samples)
print('bottleneck_features_train.shape:', bottleneck_features_train.shape)


test_generator = datagen.flow_from_directory(
    'dataset-mini10/test',
    target_size=image_size,
    batch_size=batch_size,
    class_mode=None,
    shuffle=False)
print('test_generator.samples:', test_generator.samples)
print('test_generator.classes:', test_generator.classes)
bottleneck_features_test = model.predict_generator(test_generator, test_generator.samples)
print('bottleneck_features_test.shape:', bottleneck_features_test.shape)

with h5py.File("bottleneck_features.h5", 'w') as h:
    h.create_dataset('train', data=bottleneck_features_train)
    h.create_dataset('labels', data=train_generator.classes)
    h.create_dataset('test', data=bottleneck_features_test)

print('complete!')
endtime = datetime.datetime.now()
print (endtime - starttime)

start
input shape:  (?, 299, 299, 3)
output shape:  (?, 2048)
Found 10 images belonging to 2 classes.
train_generator.samples: 10
train_generator.classes: [0 0 0 0 0 1 1 1 1 1]
bottleneck_features_train.shape: (10, 2048)
Found 3 images belonging to 1 classes.
test_generator.samples: 3
test_generator.classes: [0 0 0]
bottleneck_features_test.shape: (3, 2048)
complete!
0:01:21.425308


## 搭建模型

In [50]:
from sklearn.utils import shuffle

with h5py.File('bottleneck_features.h5','r') as h:
    X_train = np.array(h['train'])
    y_train = np.array(h['labels'])
    X_test = np.array(h['test'])
    print('type:', type(X_train))

print('X_train', X_train.shape)
print('y_train', y_train.shape)
print('X_test', X_test.shape)

X_train, y_train = shuffle(X_train, y_train)

model = Sequential()
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.fit(X_train,
          y_train,
          epochs=10,
          batch_size=batch_size,
          validation_split=0.2)
model.save_weights('bottleneck_fc_model.h5')

ModuleNotFoundError: No module named 'sklearn'