### Imports

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
import os

# Project path
cwd = os.getcwd()
data_dir = os.path.join(cwd, 'data/')

images_data = pd.read_csv(cwd+"/images_data.csv")
images_data['label'] = images_data['label'].astype(str)

train_data = images_data.loc[images_data['classes']=='train']
img_test = images_data.loc[images_data['classes']=='test']

# random_state保证每次分割都一样
img_train, img_valid = train_test_split(train_data, test_size=0.2,random_state = 0)
print(len(img_train), len(img_valid))
print(len(img_test))

19997 5000
12500


In [6]:
img_train.head()

Unnamed: 0,classes,full_path,label,width,height,ratio
33982,train,/Users/honghuanghui/MLClass/src/capstone/proje...,1,302,404,0.747525
14539,train,/Users/honghuanghui/MLClass/src/capstone/proje...,0,375,499,0.751503
30594,train,/Users/honghuanghui/MLClass/src/capstone/proje...,1,374,500,0.748
13049,train,/Users/honghuanghui/MLClass/src/capstone/proje...,1,374,500,0.748
30529,train,/Users/honghuanghui/MLClass/src/capstone/proje...,1,499,400,1.2475


In [7]:
from keras.preprocessing.image import ImageDataGenerator

target_image_size = (224, 224)

img_gen = ImageDataGenerator(rescale=1.0/255)
train_img_generator = img_gen.flow_from_dataframe(
                                dataframe=img_train,
                                directory=data_dir+'train',
                                x_col='full_path',
                                y_col='label',
                                target_size=target_image_size,
                                class_mode='binary',
                                batch_size=100
                                )

validation_img_generator = img_gen.flow_from_dataframe(
                                dataframe=img_valid,
                                directory=data_dir+'train',
                                x_col='full_path',
                                y_col='label',
                                target_size=target_image_size,
                                class_mode='binary',
                                batch_size=100)

Found 19997 validated image filenames belonging to 2 classes.
Found 5000 validated image filenames belonging to 2 classes.


In [None]:
def train_func(loss_name="binary_crossentropy", optimizer_name="adam"):
    base_model = ResNet50(input_tensor=Input((224, 224, 3)), weights='imagenet',
                          include_top=False)
    for layer in base_model.layers:
        layer.trainable = False
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dropout(0.5)(x)
    x = Dense(1, activation='sigmoid')(x)
    model = Model(base_model.input, x)
    model.compile(loss=loss_name, optimizer=optimizer_name, metrics=['accuracy'])
    best_model = 
        ModelCheckpoint("resnet_best_{}_{}.h5".format(loss_name, optimizer_name),
                        monitor='val_acc', verbose=0, save_best_only=True)
        tensor_log = "./resnet_best_{}_{}_tensor_log".format(loss_name, optimizer_name)
        return model, best_model, tensor_log
    

In [None]:
model, best_model, tensor_log = train_func("binary_crossentropy", "adam")
model.fit_generator(
        train_img_generator,
        samples_per_epoch=2048,
        nb_epoch=50,
        validation_data=validation_img_generator,
        nb_val_samples=1024,
        callback=[best_model, TensorBoard(log_dir=tensor_log)])