# 数据预处理

In [1]:
#载入必要库
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from glob import glob
# from tqdm import tqdm
import cv2
from PIL import Image

In [2]:
#读取图片路径+标签文件
train_df = pd.read_csv('visual_china_train.csv')
for i in range(35000):
    train_df['img_path'].iloc[i] = 'train/' + train_df['img_path'].iloc[i].split('/')[-1]
img_paths = list(train_df['img_path'])

#制作对于标签对应的哈希表
def hash_tag(filepath):
    fo = open(filepath, "r",encoding='utf-8')
    hash_tag = {}
    i = 0
    for line in fo.readlines():                         
        line = line.strip()                               
        hash_tag[i] = line
        i += 1
    return hash_tag

def load_ytrain(filepath):  
    y_train = np.load(filepath)
    y_train = y_train['tag_train']
    
    return y_train

def arr2tag(arr):
    tags = []
    for i in range(arr.shape[0]):
        tag = []
        index = np.where(arr[i] > 0.5)  
        index = index[0].tolist()
        tag =  [hash_tag[j] for j in index]

        tags.append(tag)
    return tags

filepath = "valid_tags.txt"
hash_tag = hash_tag(filepath)

y_train = load_ytrain('tag_train.npz')

In [3]:
img_paths[:10]

['train/2074d1cd049f38bb42198e18b23c0443230afb68.jpg',
 'train/40591a781c7a3af93232a83dc2e1665d38b772bf.jpg',
 'train/d41f7b535f13c15802aa393bc41d2b257992faee.jpg',
 'train/f7ecc1a8182256885efce7edd40bfd84be6f05fb.jpg',
 'train/09574a46403ab85188404d9383fe7fff51fac3a7.jpg',
 'train/a386169ad6ffef3e1082084406f4a2cfbc698e6c.jpg',
 'train/6df3766a0fee6493ed6daf5928dd16469c834dfd.jpg',
 'train/ff67cccfbdbeee70d12f9b96678d200732be659d.jpg',
 'train/e03bc9055cffad23a9fa96f5b4216ce26b79ae71.jpg',
 'train/180aa34f37c9ab2600b4156b6bdab3aa42c418be.jpg']

In [4]:
hash_tag[0]

'0到11个月'

In [5]:
y_train.shape

(35000, 6941)

In [6]:
#打乱并分割训练集和验证集
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

np.random.seed(2018)  
X_train_path,y_train = shuffle(np.array(img_paths),y_train)
X_train_path,X_val_path,y_train,y_val = train_test_split(X_train_path,y_train,test_size=0.2,random_state=0)
X_train_path,X_val_path = X_train_path.tolist(),X_val_path.tolist()

In [7]:
X_train_path[:10],X_val_path[:10]

(['train/d06a3cf891f199c38f924ac531dafca3ab7c11ac.jpg',
  'train/07f45aaa584000e57c6e458c518918ce2f650f3d.jpg',
  'train/524a45d93cc271933d0f256a42c5b9a18d212a39.jpg',
  'train/5f7d75849ac3f5843f6c0716b51e2af20c85ecec.jpg',
  'train/0b09c36dc627e05819ead79bae716e7802d5e152.jpg',
  'train/efa31b69c7e54ee6afb4fa996ef8f1c6087fb595.jpg',
  'train/5d664de92e1fe65d81426dc167cb06b269b3df2f.jpg',
  'train/aa5470708339820c26ae5f00ce4c7692c0d970b5.jpg',
  'train/554b8e38f922179d7c6d4067fe91c7e08681bbca.jpg',
  'train/618d265000c09c047442911b7f0b262f93949a0e.jpg'],
 ['train/2ae46e9b9ba3216b59a2d5586653d685c1c2ed71.jpg',
  'train/82d48cba0f4812c12d614eae91c5f798d5b47ec0.jpg',
  'train/86de6fa92945a342892fafdc1b226e2304ff2a54.jpg',
  'train/d7a439b554bbd6bb52c8521eed23aad5045cf579.jpg',
  'train/59481eae768f08ab8e25357e7f00a00a1faf8d2e.jpg',
  'train/5cca490c0b9a10cca0caf9067d2a8d8a912bf312.jpg',
  'train/5828ceb7feb72d201fa05e787eabce24c1e01f43.jpg',
  'train/e48686a491b1124a188ce454ac9fa5d8360e87

定义分批读取图片的生成器函数，不用将图片全部读入内存

In [8]:
#读取图片函数
def get_image(img_paths, img_size):
    X = np.zeros((len(img_paths),img_size,img_size,3),dtype=np.uint8)
    i = 0
    blackIm = Image.new('RGB',(800, 800), 'Black')
    for img_path in img_paths:
        img = Image.open(img_path)
        if img.mode != 'RGB':
            img = img.convert("RGB")
        #平铺图片，不改变图片比例
        width, height = img.size
        copyIm = blackIm.copy()
        for left in range(0, 800, width):
            for top in range(0, 800, height):
                copyIm.paste(img, (left, top))
        img = copyIm
        img = img.resize((img_size,img_size),Image.LANCZOS) #用LANCZOS插值算法，resize质量高
        arr = np.asarray(img)
        X[i,:,:,:] = arr
        i += 1
    return X

def get_data_batch(X_path, Y, batch_size, img_size):
    while 1:
        for i in range(0, len(X_path), batch_size):
            x = get_image(X_path[i:i+batch_size], img_size)
            y = Y[i:i+batch_size]
            yield x, y  #返回生成器

# 自定义metrics

In [9]:
#建立keras后端计算fmeasure函数
import keras.backend as K

def precision(y_true, y_pred):
    # Calculates the precision
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def recall(y_true, y_pred):
    # Calculates the recall
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def fbeta_score(y_true, y_pred, beta=1):
    # Calculates the F score, the weighted harmonic mean of precision and recall.
    if beta < 0:
        raise ValueError('The lowest choosable beta is zero (only precision).')
    
    # If there are no true positives, fix the F score at 0 like sklearn.
    if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
        return 0

    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    bb = beta ** 2
    fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
    return fbeta_score*100

def fmeasure(y_true, y_pred):
    # Calculates the f-measure, the harmonic mean of precision and recall.
    return fbeta_score(y_true, y_pred, beta=1)

Using TensorFlow backend.


In [10]:
#得到生成器
batch_size = 8
img_size = 500
train_generator = get_data_batch(X_train_path,y_train,batch_size=batch_size,img_size=img_size) 
val_generator = get_data_batch(X_val_path,y_val,batch_size=batch_size,img_size=img_size)

# 搭建预训练fine-tune模型

1、预训练模型——InceptionResNetV2进行fine-tune训练

In [11]:
from keras.layers import *
from keras.models import *
from keras.callbacks import *
from keras.optimizers import *
from keras.applications import *

def MODEL(MODEL,img_size,out_dims,func=None,weights=None,include_top=False):
    inputs = Input((img_size,img_size,3)) #实例化一个tensor
    x = inputs
    x = Lambda(func)(x)
    
    base_model = MODEL(weights=weights, include_top=include_top)
    x = base_model(x)
    x = GlobalAveragePooling2D()(x)
#     x = Flatten()(x)
    x = Dropout(0.3)(x)
    x = Dense(3072,activation='relu')(x)
    x = Dropout(0.3)(x)
    x = Dense(out_dims, activation='sigmoid')(x)
    model = Model(inputs, x)
    return model

In [12]:
from tensorflow.python.keras.applications.inception_resnet_v2 import preprocess_input
model = MODEL(InceptionResNetV2,500,out_dims=6941,func=preprocess_input,weights='imagenet')
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 500, 500, 3)       0         
_________________________________________________________________
lambda_1 (Lambda)            (None, 500, 500, 3)       0         
_________________________________________________________________
inception_resnet_v2 (Model)  (None, None, None, 1536)  54336736  
_________________________________________________________________
global_average_pooling2d_1 ( (None, 1536)              0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 1536)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 3072)              4721664   
_________________________________________________________________
dropout_2 (Dropout)          (None, 3072)              0         
__________

In [13]:
checkpointer = ModelCheckpoint(filepath='Inresv2_weights_best.hdf5', 
                            monitor='val_fmeasure',verbose=1, save_best_only=True, mode='max')
reduce = ReduceLROnPlateau(monitor='val_fmeasure',factor=0.5,patience=1,verbose=1,min_delta=1e-4, mode='max')

adam = Adam(0.0001)
model.compile(optimizer = adam,
           loss='binary_crossentropy',
           metrics=['accuracy',fmeasure,recall,precision])
epochs = 20
history = model.fit_generator(train_generator,
       validation_data = val_generator,
       epochs=epochs,
       callbacks=[checkpointer,reduce],
       verbose=1,steps_per_epoch=np.ceil(28000/batch_size),validation_steps=np.ceil(7000/batch_size))

Epoch 1/20
   8/3500 [..............................] - ETA: 5:49:57 - loss: 0.6393 - acc: 0.6912 - fmeasure: 1.3470 - recall: 0.3470 - precision: 0.0069

KeyboardInterrupt: 

2、预训练模型——Xception进行fine-tune训练

In [11]:
from keras.layers import *
from keras.models import *
from keras.callbacks import *
from keras.optimizers import *
from keras.applications import *

def MODEL(MODEL,img_size,out_dims,func=None,weights=None,include_top=False):
    inputs = Input((img_size,img_size,3)) #实例化一个tensor
    x = inputs
    x = Lambda(func)(x)
    
    base_model = MODEL(weights=weights, include_top=include_top)
    x = base_model(x)
    x = GlobalAveragePooling2D()(x)
#     x = Flatten()(x)
    x = Dropout(0.3)(x)
    x = Dense(2048,activation='relu')(x) #此处全连接与InceptionResNetV2不同
    x = Dropout(0.3)(x)
    x = Dense(out_dims, activation='sigmoid')(x)
    model = Model(inputs, x)
    return model

In [12]:
from tensorflow.python.keras.applications.xception import preprocess_input
model = MODEL(Xception,500,out_dims=6941,func=preprocess_input,weights='imagenet')
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 500, 500, 3)       0         
_________________________________________________________________
lambda_1 (Lambda)            (None, 500, 500, 3)       0         
_________________________________________________________________
xception (Model)             multiple                  20861480  
_________________________________________________________________
global_average_pooling2d_1 ( (None, 2048)              0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 2048)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 2048)              4196352   
_________________________________________________________________
dropout_2 (Dropout)          (None, 2048)              0         
__________

In [13]:
checkpointer = ModelCheckpoint(filepath='xception_weights_best.hdf5', 
                            monitor='val_fmeasure',verbose=1, save_best_only=True, mode='max')
reduce = ReduceLROnPlateau(monitor='val_fmeasure',factor=0.5,patience=1,verbose=1,min_delta=1e-4, mode='max')

adam = Adam(0.0001)
model.compile(optimizer = adam,
           loss='binary_crossentropy',
           metrics=['accuracy',fmeasure,recall,precision])
epochs = 20
history = model.fit_generator(train_generator,
       validation_data = val_generator,
       epochs=epochs,
       callbacks=[checkpointer,reduce],
       verbose=1,steps_per_epoch=np.ceil(28000/batch_size),validation_steps=np.ceil(7000/batch_size))

Epoch 1/20
  86/3500 [..............................] - ETA: 43:13 - loss: 0.1609 - acc: 0.9566 - fmeasure: 11.2254 - recall: 0.1124 - precision: 0.4973

KeyboardInterrupt: 

3、预训练模型——InceptionV3进行fine-tune训练

In [12]:
from keras.layers import *
from keras.models import *
from keras.callbacks import *
from keras.optimizers import *
from keras.applications import *

def MODEL(MODEL,img_size,out_dims,func=None,weights=None,include_top=False):
    inputs = Input((img_size,img_size,3)) #实例化一个tensor
    x = inputs
    x = Lambda(func)(x)
    
    base_model = MODEL(weights=weights, include_top=include_top)
    x = base_model(x)
    x = GlobalAveragePooling2D()(x)
#     x = Flatten()(x)
    x = Dropout(0.3)(x)
    x = Dense(2048,activation='relu')(x) #此处全连接与InceptionResNetV2不同
    x = Dropout(0.3)(x)
    x = Dense(out_dims, activation='sigmoid')(x)
    model = Model(inputs, x)
    return model

In [13]:
from tensorflow.python.keras.applications.inception_v3 import preprocess_input
model = MODEL(InceptionV3,500,out_dims=6941,func=preprocess_input,weights='imagenet')
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 500, 500, 3)       0         
_________________________________________________________________
lambda_1 (Lambda)            (None, 500, 500, 3)       0         
_________________________________________________________________
inception_v3 (Model)         multiple                  21802784  
_________________________________________________________________
global_average_pooling2d_1 ( (None, 2048)              0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 2048)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 2048)              4196352   
_________________________________________________________________
dropout_2 (Dropout)          (None, 2048)              0         
__________

In [14]:
checkpointer = ModelCheckpoint(filepath='inceptionv3_weights_best.hdf5', 
                            monitor='val_fmeasure',verbose=1, save_best_only=True, mode='max')
reduce = ReduceLROnPlateau(monitor='val_fmeasure',factor=0.5,patience=1,verbose=1,min_delta=1e-4, mode='max')

adam = Adam(0.0001)
model.compile(optimizer = adam,
           loss='binary_crossentropy',
           metrics=['accuracy',fmeasure,recall,precision])
epochs = 20
history = model.fit_generator(train_generator,
       validation_data = val_generator,
       epochs=epochs,
       callbacks=[checkpointer,reduce],
       verbose=1,steps_per_epoch=np.ceil(28000/batch_size),validation_steps=np.ceil(7000/batch_size))

Epoch 1/20
  89/3500 [..............................] - ETA: 26:12 - loss: 0.1266 - acc: 0.9599 - fmeasure: 12.3410 - recall: 0.1136 - precision: 0.4840

KeyboardInterrupt: 

# 模型融合

In [15]:
#得到所有训练集和测试集
X_train_path = img_paths
X_test_path = glob('test/*.jpg') #决赛测试集
y_train2 = load_ytrain('tag_train.npz')

#test的生成器中没有y
def get_X_batch(X_path,batch_size,img_size):
    while 1:
        for i in range(0, len(X_path), batch_size):
            x = get_image(X_path[i:i+batch_size], img_size)

            yield x

In [16]:
def build_MODEL(MODEL,img_size,out_dims,func=None,weights=None,include_top=False):
    inputs = Input((img_size,img_size,3)) 
    x = inputs
    x = Lambda(func)(x)
    
    base_model = MODEL(weights=weights, include_top=include_top) 
    x = base_model(x)
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.3)(x)
    x = Dense(2048,activation='relu')(x)
    x = Dropout(0.3)(x)
    x = Dense(out_dims, activation='sigmoid')(x)
    model = Model(inputs, x)
    return model

def FeatureExtract(MODEL,img_size,func=None,weight_path=None):
    base_model = build_MODEL(MODEL,img_size,out_dims=6941,func=func,weights=None)
    base_model.load_weights(weight_path)
    model = Model(inputs=base_model.input, outputs=base_model.layers[-3].output)
    
    batch_size = 8
    X_train_generator = get_X_batch(X_train_path, batch_size = batch_size, img_size = img_size)
    X_test_generator = get_X_batch(X_test_path, batch_size = batch_size, img_size = img_size)
    
    train_features = model.predict_generator(X_train_generator, steps = np.ceil(len(X_train_path) / batch_size), verbose=1) 
    test_features = model.predict_generator(X_test_generator,steps = np.ceil(len(X_test_path) / batch_size), verbose=1)
    
    # 保存bottleneck特征
    with h5py.File('%s_data.h5'%MODEL.__name__) as h:
        h.create_dataset("train",data = train_features)
        h.create_dataset("test",data = test_features)
        h.create_dataset('label',data = y_train2)

分别提取特征向量，便于后面进行融合

In [18]:
from tensorflow.python.keras.applications.inception_v3 import preprocess_input
FeatureExtract(InceptionV3,500,func=preprocess_input,weight_path='inception_v3_weights_best_9_15_sigmoid_44.34666.hdf5')



In [19]:
from tensorflow.python.keras.applications.xception import preprocess_input
FeatureExtract(Xception,500,func=preprocess_input,weight_path='xception_weights_best_9_15_sigmoid.hdf5')



In [20]:
def build_MODEL2(MODEL,img_size,out_dims,func=None,weights=None,include_top=False):
    inputs = Input((img_size,img_size,3))
    x = inputs
    x = Lambda(func)(x)
    
    base_model = MODEL(weights=weights, include_top=include_top) 
    x = base_model(x)
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.3)(x)
    x = Dense(3072,activation='relu')(x) #此处全连接与上面不同
    x = Dropout(0.3)(x)
    x = Dense(out_dims, activation='sigmoid')(x)
    model = Model(inputs, x)
    return model

def FeatureExtract(MODEL,img_size,func=None,weight_path=None):
    base_model = build_MODEL2(MODEL,img_size,out_dims=6941,func=func,weights=None)
    base_model.load_weights(weight_path)
    model = Model(inputs=base_model.input, outputs=base_model.layers[-3].output)
    
    batch_size = 8
    X_train_generator = get_X_batch(X_train_path, batch_size = batch_size, img_size = img_size)
    X_test_generator = get_X_batch(X_test_path, batch_size = batch_size, img_size = img_size)
    
    train_features = model.predict_generator(X_train_generator, steps = np.ceil(len(X_train_path) / batch_size), verbose=1) 
    test_features = model.predict_generator(X_test_generator,steps = np.ceil(len(X_test_path) / batch_size), verbose=1)
    
    # 保存bottleneck特征
    with h5py.File('%s_data.h5'%MODEL.__name__) as h:
        h.create_dataset("train",data = train_features)
        h.create_dataset("test",data = test_features)
        h.create_dataset('label',data = y_train2)

In [21]:
from tensorflow.python.keras.applications.inception_resnet_v2 import preprocess_input
FeatureExtract(InceptionResNetV2,500,func=preprocess_input,weight_path='Inresv2_weights_best_9_13_46.49088.hdf5')



In [22]:
import h5py
X_train = []
X_test = []

#将保存好的特征向量提取出来并进行串接融合
for filename in ['Xception_data.h5','InceptionV3_data.h5','InceptionResNetV2_data.h5']:
    with h5py.File(filename,'r') as h:
        X_train.append(np.array(h['train']))
        X_test.append(np.array(h['test']))
        y_train = np.array(h['label'])
X_train = np.concatenate(X_train,axis=1)
X_test = np.concatenate(X_test,axis=1)

from sklearn.utils import shuffle
np.random.seed(2018)
X_train,y_train = shuffle(X_train,y_train)

from sklearn.model_selection import train_test_split
X_train,X_val,y_train,y_val = train_test_split(X_train,y_train,test_size=0.2,random_state=2018)

In [23]:
#搭建融合后的模型
inputs = Input((X_train.shape[1:]))
x = Dropout(0.7)(inputs)
x = Dense(6941, activation='sigmoid')(x)
model = Model(inputs, x)

checkpointer = ModelCheckpoint(filepath='embedding.best_dropout0.7_9_20.hdf5',monitor='val_fmeasure',mode='max',
                               verbose=1, save_best_only=True) #保存最好模型权重
reduce = ReduceLROnPlateau(monitor='val_fmeasure',factor=0.5,patience=5,verbose=1,mode='max')
adam = Adam(0.0001)
model.compile(optimizer = adam,
           loss='binary_crossentropy',
           metrics=['accuracy',fmeasure,recall,precision])
epochs = 200
history = model.fit(X_train, y_train,
                    validation_data=(X_val, y_val),
                    epochs=epochs, batch_size=128,callbacks=[checkpointer,reduce],verbose=1)

Train on 28000 samples, validate on 7000 samples
Epoch 1/200

Epoch 00001: val_fmeasure improved from -inf to 41.90044, saving model to embedding.best_dropout0.7_9_20.hdf5
Epoch 2/200

Epoch 00002: val_fmeasure improved from 41.90044 to 48.01198, saving model to embedding.best_dropout0.7_9_20.hdf5
Epoch 3/200

Epoch 00003: val_fmeasure improved from 48.01198 to 50.96903, saving model to embedding.best_dropout0.7_9_20.hdf5
Epoch 4/200

Epoch 00004: val_fmeasure improved from 50.96903 to 52.52689, saving model to embedding.best_dropout0.7_9_20.hdf5
Epoch 5/200

Epoch 00005: val_fmeasure improved from 52.52689 to 53.64528, saving model to embedding.best_dropout0.7_9_20.hdf5
Epoch 6/200

Epoch 00006: val_fmeasure improved from 53.64528 to 54.36768, saving model to embedding.best_dropout0.7_9_20.hdf5
Epoch 7/200

Epoch 00007: val_fmeasure improved from 54.36768 to 55.08346, saving model to embedding.best_dropout0.7_9_20.hdf5
Epoch 8/200

Epoch 00008: val_fmeasure improved from 55.08346 to 5


Epoch 00022: val_fmeasure improved from 58.52919 to 58.54630, saving model to embedding.best_dropout0.7_9_20.hdf5
Epoch 23/200

Epoch 00023: val_fmeasure improved from 58.54630 to 58.65163, saving model to embedding.best_dropout0.7_9_20.hdf5
Epoch 24/200

Epoch 00024: val_fmeasure improved from 58.65163 to 58.82641, saving model to embedding.best_dropout0.7_9_20.hdf5
Epoch 25/200

Epoch 00025: val_fmeasure improved from 58.82641 to 58.89960, saving model to embedding.best_dropout0.7_9_20.hdf5
Epoch 26/200

Epoch 00026: val_fmeasure improved from 58.89960 to 58.90988, saving model to embedding.best_dropout0.7_9_20.hdf5
Epoch 27/200

Epoch 00027: val_fmeasure improved from 58.90988 to 58.95106, saving model to embedding.best_dropout0.7_9_20.hdf5
Epoch 28/200

Epoch 00028: val_fmeasure improved from 58.95106 to 59.11047, saving model to embedding.best_dropout0.7_9_20.hdf5
Epoch 29/200

Epoch 00029: val_fmeasure did not improve from 59.11047
Epoch 30/200

Epoch 00030: val_fmeasure improve


Epoch 00045: val_fmeasure did not improve from 59.74476
Epoch 46/200

Epoch 00046: val_fmeasure did not improve from 59.74476
Epoch 47/200

Epoch 00047: val_fmeasure did not improve from 59.74476
Epoch 48/200

Epoch 00048: val_fmeasure did not improve from 59.74476
Epoch 49/200

Epoch 00049: val_fmeasure did not improve from 59.74476

Epoch 00049: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
Epoch 50/200

Epoch 00050: val_fmeasure improved from 59.74476 to 59.74652, saving model to embedding.best_dropout0.7_9_20.hdf5
Epoch 51/200

Epoch 00051: val_fmeasure did not improve from 59.74652
Epoch 52/200

Epoch 00052: val_fmeasure improved from 59.74652 to 59.77528, saving model to embedding.best_dropout0.7_9_20.hdf5
Epoch 53/200

Epoch 00053: val_fmeasure did not improve from 59.77528
Epoch 54/200

Epoch 00054: val_fmeasure did not improve from 59.77528
Epoch 55/200

Epoch 00055: val_fmeasure did not improve from 59.77528
Epoch 56/200

Epoch 00056: val_fmeasure improv

Epoch 69/200

Epoch 00069: val_fmeasure did not improve from 59.87146
Epoch 70/200

Epoch 00070: val_fmeasure improved from 59.87146 to 59.88396, saving model to embedding.best_dropout0.7_9_20.hdf5
Epoch 71/200

Epoch 00071: val_fmeasure did not improve from 59.88396
Epoch 72/200

Epoch 00072: val_fmeasure did not improve from 59.88396
Epoch 73/200

Epoch 00073: val_fmeasure improved from 59.88396 to 59.91971, saving model to embedding.best_dropout0.7_9_20.hdf5
Epoch 74/200

Epoch 00074: val_fmeasure did not improve from 59.91971
Epoch 75/200

Epoch 00075: val_fmeasure did not improve from 59.91971
Epoch 76/200

Epoch 00076: val_fmeasure did not improve from 59.91971
Epoch 77/200

Epoch 00077: val_fmeasure did not improve from 59.91971
Epoch 78/200

Epoch 00078: val_fmeasure did not improve from 59.91971

Epoch 00078: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.
Epoch 79/200

Epoch 00079: val_fmeasure did not improve from 59.91971
Epoch 80/200

Epoch 00080: val_fm


Epoch 00093: val_fmeasure did not improve from 59.93025
Epoch 94/200

Epoch 00094: val_fmeasure did not improve from 59.93025
Epoch 95/200

Epoch 00095: val_fmeasure did not improve from 59.93025
Epoch 96/200

Epoch 00096: val_fmeasure did not improve from 59.93025

Epoch 00096: ReduceLROnPlateau reducing learning rate to 3.12499992105586e-06.
Epoch 97/200

Epoch 00097: val_fmeasure did not improve from 59.93025
Epoch 98/200

Epoch 00098: val_fmeasure did not improve from 59.93025
Epoch 99/200

Epoch 00099: val_fmeasure did not improve from 59.93025
Epoch 100/200

Epoch 00100: val_fmeasure did not improve from 59.93025
Epoch 101/200

Epoch 00101: val_fmeasure did not improve from 59.93025

Epoch 00101: ReduceLROnPlateau reducing learning rate to 1.56249996052793e-06.
Epoch 102/200

Epoch 00102: val_fmeasure did not improve from 59.93025
Epoch 103/200

Epoch 00103: val_fmeasure did not improve from 59.93025
Epoch 104/200

Epoch 00104: val_fmeasure did not improve from 59.93025
Epoch 10


Epoch 00117: val_fmeasure did not improve from 59.93025
Epoch 118/200

Epoch 00118: val_fmeasure did not improve from 59.93025
Epoch 119/200

Epoch 00119: val_fmeasure did not improve from 59.93025
Epoch 120/200

Epoch 00120: val_fmeasure did not improve from 59.93025
Epoch 121/200

Epoch 00121: val_fmeasure did not improve from 59.93025

Epoch 00121: ReduceLROnPlateau reducing learning rate to 9.765624753299562e-08.
Epoch 122/200

Epoch 00122: val_fmeasure did not improve from 59.93025
Epoch 123/200

Epoch 00123: val_fmeasure did not improve from 59.93025
Epoch 124/200

Epoch 00124: val_fmeasure did not improve from 59.93025
Epoch 125/200

Epoch 00125: val_fmeasure did not improve from 59.93025
Epoch 126/200

Epoch 00126: val_fmeasure did not improve from 59.93025

Epoch 00126: ReduceLROnPlateau reducing learning rate to 4.882812376649781e-08.
Epoch 127/200

Epoch 00127: val_fmeasure did not improve from 59.93025
Epoch 128/200

Epoch 00128: val_fmeasure did not improve from 59.93025



Epoch 00141: val_fmeasure did not improve from 59.93025

Epoch 00141: ReduceLROnPlateau reducing learning rate to 6.103515470812226e-09.
Epoch 142/200

Epoch 00142: val_fmeasure did not improve from 59.93025
Epoch 143/200

Epoch 00143: val_fmeasure did not improve from 59.93025
Epoch 144/200

Epoch 00144: val_fmeasure did not improve from 59.93025
Epoch 145/200

Epoch 00145: val_fmeasure did not improve from 59.93025
Epoch 146/200

Epoch 00146: val_fmeasure did not improve from 59.93025

Epoch 00146: ReduceLROnPlateau reducing learning rate to 3.051757735406113e-09.
Epoch 147/200

Epoch 00147: val_fmeasure did not improve from 59.93025
Epoch 148/200

Epoch 00148: val_fmeasure did not improve from 59.93025
Epoch 149/200

Epoch 00149: val_fmeasure did not improve from 59.93025
Epoch 150/200

Epoch 00150: val_fmeasure did not improve from 59.93025
Epoch 151/200

Epoch 00151: val_fmeasure did not improve from 59.93025

Epoch 00151: ReduceLROnPlateau reducing learning rate to 1.52587886770


Epoch 00165: val_fmeasure did not improve from 59.93025
Epoch 166/200

Epoch 00166: val_fmeasure did not improve from 59.93025

Epoch 00166: ReduceLROnPlateau reducing learning rate to 1.9073485846288207e-10.
Epoch 167/200

Epoch 00167: val_fmeasure did not improve from 59.93025
Epoch 168/200

Epoch 00168: val_fmeasure did not improve from 59.93025
Epoch 169/200

Epoch 00169: val_fmeasure did not improve from 59.93025
Epoch 170/200

Epoch 00170: val_fmeasure did not improve from 59.93025
Epoch 171/200

Epoch 00171: val_fmeasure did not improve from 59.93025

Epoch 00171: ReduceLROnPlateau reducing learning rate to 9.536742923144104e-11.
Epoch 172/200

Epoch 00172: val_fmeasure did not improve from 59.93025
Epoch 173/200

Epoch 00173: val_fmeasure did not improve from 59.93025
Epoch 174/200

Epoch 00174: val_fmeasure did not improve from 59.93025
Epoch 175/200

Epoch 00175: val_fmeasure did not improve from 59.93025
Epoch 176/200

Epoch 00176: val_fmeasure did not improve from 59.93025


Epoch 00189: val_fmeasure did not improve from 59.93025
Epoch 190/200

Epoch 00190: val_fmeasure did not improve from 59.93025
Epoch 191/200

Epoch 00191: val_fmeasure did not improve from 59.93025

Epoch 00191: ReduceLROnPlateau reducing learning rate to 5.960464326965065e-12.
Epoch 192/200

Epoch 00192: val_fmeasure did not improve from 59.93025
Epoch 193/200

Epoch 00193: val_fmeasure did not improve from 59.93025
Epoch 194/200

Epoch 00194: val_fmeasure did not improve from 59.93025
Epoch 195/200

Epoch 00195: val_fmeasure did not improve from 59.93025
Epoch 196/200

Epoch 00196: val_fmeasure did not improve from 59.93025

Epoch 00196: ReduceLROnPlateau reducing learning rate to 2.9802321634825324e-12.
Epoch 197/200

Epoch 00197: val_fmeasure did not improve from 59.93025
Epoch 198/200

Epoch 00198: val_fmeasure did not improve from 59.93025
Epoch 199/200

Epoch 00199: val_fmeasure did not improve from 59.93025
Epoch 200/200

Epoch 00200: val_fmeasure did not improve from 59.93025

# 模型预测，得到结果

In [24]:
model.load_weights('embedding.best_dropout0.7_9_20_59.93025.hdf5')
y_pred = model.predict(X_test)

In [26]:
# Python
threshold = 0.5
def arr2tag(arr):
    tags = []
    for i in range(arr.shape[0]):
        tag = []
        index = np.where(arr[i] > threshold)  
        index = index[0].tolist()
        tag =  [hash_tag[j] for j in index]
        tags.append(tag)
    return tags
y_tags = arr2tag(y_pred)

import os
img_name = os.listdir('test/')

df = pd.DataFrame({'img_path':img_name, 'tags':y_tags})
for i in range(df['tags'].shape[0]):
    df['tags'].iloc[i] = ','.join(str(e) for e in  df['tags'].iloc[i])
df.to_csv('merged_moudle_best9_27_3_%s.csv'%(threshold),index=None)