# 0 环境配置

In [1]:
basedir = "/home/ubuntu/dishui_ddd/input/"

In [2]:
import os
import cv2
import glob
import numpy as np
import pandas as pd

#seed
np.random.seed(2017)

from keras.models import *
from keras.layers import *
from keras.optimizers import *
from keras.applications import *
from keras.preprocessing.image import *
print("import done")

Using TensorFlow backend.


import done


# 1 预设参数

In [3]:
modelname = "Vgg16"

In [4]:
from keras.applications.vgg16 import preprocess_input

In [5]:
# changing
model_image_size = (224, 224)
batch_size = 16

# 2 数据生成

In [6]:
train_gen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
)

gen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
)

train_generator = train_gen.flow_from_directory(os.path.join(basedir, 'train'),  model_image_size, \
                                                shuffle=True, batch_size=batch_size, class_mode="categorical")

valid_generator = gen.flow_from_directory(os.path.join(basedir, 'valid'),  model_image_size, \
                                          shuffle=True, batch_size=batch_size, class_mode="categorical")

test_generator = gen.flow_from_directory(os.path.join(basedir,"test"),  model_image_size, \
                                         shuffle=False, batch_size=batch_size, class_mode=None)

Found 19529 images belonging to 10 classes.
Found 2072 images belonging to 10 classes.
Found 79726 images belonging to 1 classes.


# 3 搭建模型

In [7]:
from keras import backend as K
K.clear_session()

In [8]:
inputs = Input((*model_image_size, 3))
base_model = VGG16(input_tensor=inputs, weights='imagenet', include_top=False)
x = GlobalAveragePooling2D()(base_model.output)
x = Dropout(0.5)(x)
x = Dense(10, activation='softmax')(x)
model = Model(inputs, x)

print("total layer count {}".format(len(base_model.layers)))

for i in range(len(base_model.layers)):
    model.layers[i].trainable = False

total layer count 19


# 4 训练模型

In [9]:
steps_train_sample = int(train_generator.samples)//batch_size + 1
steps_valid_sample = int(valid_generator.samples)//batch_size + 1
print("train_generator.samples = {}".format(steps_train_sample))
print("valid_generator.samples = {}".format(steps_valid_sample))

train_generator.samples = 1221
valid_generator.samples = 130


## （1） epoch 1，optimizer='rmsprop'

In [10]:
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(train_generator, steps_per_epoch=int(steps_train_sample), epochs=1, \
                    validation_data=valid_generator, validation_steps=steps_valid_sample)

Epoch 1/1


<keras.callbacks.History at 0x7f97fc2d4eb8>

In [11]:
model.save("models/"+modelname+".h5")
print("model saved!")

model saved!


## （2） epoch 2，optimizer='rmsprop'

In [12]:
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(train_generator, steps_per_epoch=int(steps_train_sample), epochs=1, \
                    validation_data=valid_generator, validation_steps=steps_valid_sample)

Epoch 1/1


<keras.callbacks.History at 0x7f97fc2d4c50>

# 5 输出测试

In [16]:
def predict_test(model,  model_image_size, csv_name):

    y_pred = model.predict_generator(test_generator,  steps=test_generator.samples//batch_size+1,  verbose=1)

    l = list()
    for i, fname in enumerate(test_generator.filenames):
        name = fname[fname.rfind('/')+1:]
        l.append( [name, *y_pred[i]] )

    l = np.array(l)
    data = {'img': l[:,0]}
    for i in range(10):
        data["c%d"%i] = l[:,i+1]
    df = pd.DataFrame(data, columns=['img'] + ['c%d'%i for i in range(10)])
    df = df.sort_values(by='img')
    df.to_csv(csv_name, index=None, float_format='%.3f')
    print("submission saved")

In [17]:
predict_test(model,  model_image_size, "submission/"+modelname+".csv")

submission saved
