In [1]:
import numpy as np 
import pandas as pd 

import os
from glob import glob
from random import shuffle
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Convolution1D, concatenate, SpatialDropout1D, GlobalMaxPool1D, GlobalAvgPool1D, Embedding, \
    Conv2D, SeparableConv1D, Add, BatchNormalization, Activation, GlobalAveragePooling2D, LeakyReLU, Flatten
from keras.layers import Dense, Input, Dropout, MaxPooling2D, Concatenate, GlobalMaxPooling2D, GlobalAveragePooling2D, \
    Lambda, Multiply, LSTM, Bidirectional, PReLU, MaxPooling1D
from keras.layers.pooling import _GlobalPooling1D
from keras.losses import mae, sparse_categorical_crossentropy, binary_crossentropy
from keras.models import Model
from keras.applications.nasnet import NASNetMobile, NASNetLarge, preprocess_input
from keras.optimizers import Adam, RMSprop
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from imgaug import augmenters as iaa
import imgaug as ia
print(os.listdir("../input"))

Using TensorFlow backend.


['train.zip', 'test.zip', 'train_labels.csv', 'test', 'sample_submission.csv', 'train']


In [2]:
df_train = pd.read_csv("../input/train_labels.csv")
id_label_map = {k:v for k,v in zip(df_train.id.values, df_train.label.values)}
df_train.head()

Unnamed: 0,id,label
0,f38a6374c348f90b587e046aac6079959adf3835,0
1,c18f2d887b7ae4f6742ee445113fa1aef383ed77,1
2,755db6279dae599ebb4d39a9123cce439965282d,0
3,bc3f0c64fb968ff4a8bd33af6971ecae77c75e08,0
4,068aba587a4950175d04c680d38943fd488d6a9d,0


In [3]:
df_train.label.sum()#89117

89117

In [4]:
len(id_label_map) #220025

220025

In [5]:
dice = (len(id_label_map)-df_train.label.sum())/df_train.label.sum()

dice #1.47

1.468945319074924

In [6]:
def get_id_from_file_path(file_path): # :路径分隔符
    return file_path.split(os.path.sep)[-1].replace('.tif', '')

In [7]:
labeled_files = glob('../input/train/*.tif')
test_files = glob('../input/test/*.tif')

In [8]:
print("labeled_files size :", len(labeled_files))
print("test_files size :", len(test_files))

labeled_files size : 220025
test_files size : 57458


In [9]:
train, val = train_test_split(labeled_files, test_size=0.1, random_state=101010)

In [10]:
def chunker(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

# 数据增强序列
def get_seq():
    sometimes = lambda aug: iaa.Sometimes(0.5, aug)
    seq = iaa.Sequential(
        [
           # 数据增强
            iaa.Fliplr(0.5), # horizontally flip 50% of all images
            iaa.Flipud(0.2), # vertically flip 20% of all images
            sometimes(iaa.Affine(
                scale={"x": (0.9, 1.1), "y": (0.9, 1.1)}, # scale images to 80-120% of their size, individually per axis
                translate_percent={"x": (-0.1, 0.1), "y": (-0.1, 0.1)}, # translate by -20 to +20 percent (per axis)
                rotate=(-10, 10), # rotate by -45 to +45 degrees
                shear=(-5, 5), # shear by -16 to +16 degrees
                order=[0, 1], # use nearest neighbour or bilinear interpolation (fast)
                cval=(0, 255), # if mode is constant, use a cval between 0 and 255
                mode=ia.ALL # use any of scikit-image's warping modes (see 2nd image from the top for examples)
            )),
            # execute 0 to 5 of the following (less important) augmenters per image
            # don't execute all of them, as that would often be way too strong
            iaa.SomeOf((0, 5),
                [
                    sometimes(iaa.Superpixels(p_replace=(0, 1.0), n_segments=(20, 200))), # convert images into their superpixel representation
                    iaa.OneOf([
                        iaa.GaussianBlur((0, 1.0)), # blur images with a sigma between 0 and 3.0
                        iaa.AverageBlur(k=(3, 5)), # blur image using local means with kernel sizes between 2 and 7
                        iaa.MedianBlur(k=(3, 5)), # blur image using local medians with kernel sizes between 2 and 7
                    ]),
                    iaa.Sharpen(alpha=(0, 1.0), lightness=(0.9, 1.1)), # sharpen images
                    iaa.Emboss(alpha=(0, 1.0), strength=(0, 2.0)), # emboss images
                    # search either for all edges or for directed edges,
                    # blend the result with the original image using a blobby mask
                    iaa.SimplexNoiseAlpha(iaa.OneOf([
                        iaa.EdgeDetect(alpha=(0.5, 1.0)),
                        iaa.DirectedEdgeDetect(alpha=(0.5, 1.0), direction=(0.0, 1.0)),
                    ])),
                    iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.01*255), per_channel=0.5), # add gaussian noise to images
                    iaa.OneOf([
                        iaa.Dropout((0.01, 0.05), per_channel=0.5), # randomly remove up to 10% of the pixels
                        iaa.CoarseDropout((0.01, 0.03), size_percent=(0.01, 0.02), per_channel=0.2),
                    ]),
                    iaa.Invert(0.01, per_channel=True), # invert color channels
                    iaa.Add((-2, 2), per_channel=0.5), # change brightness of images (by -10 to 10 of original value)
                    iaa.AddToHueAndSaturation((-1, 1)), # change hue and saturation
                    # either change the brightness of the whole image (sometimes
                    # per channel) or change the brightness of subareas
                    iaa.OneOf([
                        iaa.Multiply((0.9, 1.1), per_channel=0.5),
                        iaa.FrequencyNoiseAlpha(
                            exponent=(-1, 0),
                            first=iaa.Multiply((0.9, 1.1), per_channel=True),
                            second=iaa.ContrastNormalization((0.9, 1.1))
                        )
                    ]),
                    sometimes(iaa.ElasticTransformation(alpha=(0.5, 3.5), sigma=0.25)), # move pixels locally around (with random strengths)
                    sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.05))), # sometimes move parts of the image around
                    sometimes(iaa.PerspectiveTransform(scale=(0.01, 0.1)))
                ],
                random_order=True
            )
        ],
        random_order=True
    )
    return seq

def data_gen(list_files, id_label_map, batch_size, augment=False):
    seq = get_seq()
    while True:
        shuffle(list_files)
        for batch in chunker(list_files, batch_size):
            X = [cv2.imread(x) for x in batch]
            Y = [id_label_map[get_id_from_file_path(x)] for x in batch]
            if augment:
                X = seq.augment_images(X)
            X = [preprocess_input(x) for x in X]
                
            yield np.array(X), np.array(Y)
    

In [11]:
from keras import backend as K
def matthews_correlation(y_true, y_pred):
  
    y_pred_pos = K.round(K.clip(y_pred, 0, 1))
    y_pred_neg = 1 - y_pred_pos

    y_pos = K.round(K.clip(y_true, 0, 1))
    y_neg = 1 - y_pos

    tp = K.sum(y_pos * y_pred_pos)
    tn = K.sum(y_neg * y_pred_neg)

    fp = K.sum(y_neg * y_pred_pos)
    fn = K.sum(y_pos * y_pred_neg)

    numerator = (tp * tn - fp * fn)
    denominator = K.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))

    return numerator / (denominator + K.epsilon())

In [12]:
import tensorflow as tf
# 2分类 Dice Loss
def dice_coefficient(y_true_cls, y_pred_cls):
    
    eps = 1e-5
    intersection = tf.reduce_sum(y_true_cls * y_pred_cls )
    union = tf.reduce_sum(y_true_cls ) + tf.reduce_sum(y_pred_cls) + eps
    loss = 1. - (2 * intersection / union)

    return loss

In [13]:
# 2 分类 focal loss
def focal_loss(y_true, y_pred):
    gamma=0.75
    alpha=0.25
    pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
    pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
 
    pt_1 = K.clip(pt_1, 1e-3, .999)
    pt_0 = K.clip(pt_0, 1e-3, .999)
 
    return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))

In [14]:
def mixedLoss(y_true,y_pred,alpha=0.5):
    return alpha * focal_loss(y_true,y_pred) - K.log(dice_coefficient(y_true,y_pred))

In [15]:
# 分类模型 1
def get_model_classif_nasnet():
    inputs = Input((96, 96, 3))
    base_model = NASNetMobile(include_top=False, input_shape=(96, 96, 3))#, weights=None
    x = base_model(inputs)
    out1 = GlobalMaxPooling2D()(x)
    out2 = GlobalAveragePooling2D()(x)
    out3 = Flatten()(x)
    out = Concatenate(axis=-1)([out1, out2, out3])
    out = Dropout(0.5)(out)
    out = Dense(1, activation="sigmoid", name="3_")(out)
    model = Model(inputs, out)
    #model.compile(optimizer=Adam(0.0001), loss=binary_crossentropy, metrics=['acc'])
    model.compile(optimizer=Adam(0.0001), loss=focal_loss, metrics=['acc'])
    #model.summary()

    return model

model = get_model_classif_nasnet()
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 96, 96, 3)    0                                            
__________________________________________________________________________________________________
NASNet (Model)                  (None, 3, 3, 1056)   4269716     input_1[0][0]                    
__________________________________________________________________________________________________
global_max_pooling2d_1 (GlobalM (None, 1056)         0           NASNet[1][0]                     
__________________________________________________________________________________________________
global_average_pooling2d_1 (Glo (None, 1056)         0           NASNet[1][0]                     
__________________________________________________________________________________________________
flatten_1 

In [16]:
h5_path = "nasnet_model.h5"
model.load_weights(h5_path)

In [17]:
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau

checkpoint = ModelCheckpoint(h5_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=8,
                                   verbose=1, mode='auto', epsilon=0.005)
early = EarlyStopping(monitor="val_loss", 
                      mode="min", 
                      patience=16)

batch_size=64
history = model.fit_generator(
    data_gen(train, id_label_map, batch_size, augment=True),
    validation_data=data_gen(val, id_label_map, batch_size),
    epochs=50, verbose=1,
    callbacks=[checkpoint,reduceLROnPlat,early],
    steps_per_epoch=len(train) // batch_size,
    validation_steps=len(val) // batch_size)



Epoch 1/50

KeyboardInterrupt: 

In [None]:
from tqdm import tqdm_notebook 
#model.load_weights(h5_path)
preds = []
ids = []

for batch in tqdm_notebook(chunker(test_files, batch_size)):
    X = [preprocess_input(cv2.imread(x)) for x in batch]
    ids_batch = [get_id_from_file_path(x) for x in batch]
    X = np.array(X)
    preds_batch = ((model.predict(X).ravel()*model.predict(X[:, ::-1, :, :]).ravel()*model.predict(X[:, ::-1, ::-1, :]).ravel()*model.predict(X[:, :, ::-1, :]).ravel())**0.25).tolist()
    preds += preds_batch
    ids += ids_batch
    
    
df = pd.DataFrame({'id':ids, 'label':preds})
df.to_csv("baseline_nasnet-1.24.csv", index=False)
df.head()

In [15]:
#####################################

# 分类模型 modify 2
from keras import applications

def get_model_classif_resnet50():
    inputs = Input((96, 96, 3))
    
    #base_model = NASNetMobile(include_top=False, input_shape=(96, 96, 3))#, weights=None
    base_model = applications.resnet50.ResNet50(
    include_top=False,
    weights='imagenet',
    input_tensor=None,
    input_shape=(96, 96, 3),
    pooling='avg',
    classes=None)
    
    out = base_model(inputs)
  #  out1 = GlobalMaxPooling2D()(x)
  #  out2 = GlobalAveragePooling2D()(x)
  #  out3 = Flatten()(x)
  #  out = Concatenate(axis=-1)([out1, out2, out3])
    out = Dropout(0.5)(out)
    out = Dense(1, activation="sigmoid", name="3_")(out)
    model = Model(inputs, out)
    #model.compile(optimizer=Adam(0.0001), loss=binary_crossentropy, metrics=['acc'])
    model.compile(optimizer=Adam(0.0001), loss=focal_loss, metrics=[matthews_correlation])
    #model.summary()

    return model

model = get_model_classif_resnet50()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 96, 96, 3)         0         
_________________________________________________________________
resnet50 (Model)             (None, 2048)              23587712  
_________________________________________________________________
dropout_1 (Dropout)          (None, 2048)              0         
_________________________________________________________________
3_ (Dense)                   (None, 1)                 2049      
Total params: 23,589,761
Trainable params: 23,536,641
Non-trainable params: 53,120
_________________________________________________________________


In [19]:
h5_path = "resnet50_model.h5"
model.load_weights(h5_path)
#from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau

checkpoint = ModelCheckpoint(h5_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=8,
                                   verbose=1, mode='auto', epsilon=0.005)
early = EarlyStopping(monitor="val_loss", 
                      mode="min", 
                      patience=16)

batch_size=64
history = model.fit_generator(
    data_gen(train, id_label_map, batch_size, augment=True),
    validation_data=data_gen(val, id_label_map, batch_size),
    epochs=100, verbose=1,
    callbacks=[checkpoint,reduceLROnPlat,early],
    steps_per_epoch=len(train) // batch_size,
    validation_steps=len(val) // batch_size)

model.load_weights(h5_path)

Epoch 1/100

Epoch 00001: val_loss improved from inf to 3.10822, saving model to resnet50_model.h5
Epoch 2/100

Epoch 00002: val_loss improved from 3.10822 to 2.39313, saving model to resnet50_model.h5
Epoch 3/100

Epoch 00003: val_loss did not improve from 2.39313
Epoch 4/100

Epoch 00004: val_loss improved from 2.39313 to 2.33038, saving model to resnet50_model.h5
Epoch 5/100

Epoch 00005: val_loss improved from 2.33038 to 1.98595, saving model to resnet50_model.h5
Epoch 6/100

Epoch 00006: val_loss did not improve from 1.98595
Epoch 7/100

Epoch 00007: val_loss improved from 1.98595 to 1.78471, saving model to resnet50_model.h5
Epoch 8/100

Epoch 00008: val_loss improved from 1.78471 to 1.77030, saving model to resnet50_model.h5
Epoch 9/100

Epoch 00009: val_loss did not improve from 1.77030
Epoch 10/100

Epoch 00010: val_loss improved from 1.77030 to 1.65401, saving model to resnet50_model.h5
Epoch 11/100

Epoch 00011: val_loss improved from 1.65401 to 1.61873, saving model to resn


Epoch 00034: val_loss improved from 1.19100 to 1.03903, saving model to resnet50_model.h5
Epoch 35/100

Epoch 00035: val_loss did not improve from 1.03903
Epoch 36/100

Epoch 00036: val_loss did not improve from 1.03903
Epoch 37/100

Epoch 00037: val_loss did not improve from 1.03903
Epoch 38/100

Epoch 00038: val_loss did not improve from 1.03903
Epoch 39/100

Epoch 00039: val_loss did not improve from 1.03903
Epoch 40/100

Epoch 00040: val_loss did not improve from 1.03903
Epoch 41/100

Epoch 00041: val_loss did not improve from 1.03903
Epoch 42/100

Epoch 00042: val_loss did not improve from 1.03903

Epoch 00042: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-06.
Epoch 43/100

Epoch 00043: val_loss improved from 1.03903 to 0.97109, saving model to resnet50_model.h5
Epoch 44/100

Epoch 00044: val_loss improved from 0.97109 to 0.94379, saving model to resnet50_model.h5
Epoch 45/100

Epoch 00045: val_loss did not improve from 0.94379
Epoch 46/100

KeyboardInterrupt: 

In [20]:
preds = []
ids = []
h5_path = "resnet50_model.h5"
model.load_weights(h5_path)

for batch in chunker(test_files, batch_size):
    X = [preprocess_input(cv2.imread(x)) for x in batch]
    ids_batch = [get_id_from_file_path(x) for x in batch]
    X = np.array(X)
    preds_batch = ((model.predict(X).ravel()*model.predict(X[:, ::-1, :, :]).ravel()*model.predict(X[:, ::-1, ::-1, :]).ravel()*model.predict(X[:, :, ::-1, :]).ravel())**0.25).tolist()
    preds += preds_batch
    ids += ids_batch
    
    
df = pd.DataFrame({'id':ids, 'label':preds})
df.to_csv("baseline_resnet50_model.csv", index=False)
df.head()

Unnamed: 0,id,label
0,624f2c7ea820a0a912a1be09ff340a0beab26a36,0.005444
1,d662364233770ae9d82d95d71de628075c030861,0.002932
2,2a6c4fda3a41de065e78a18d35bf595b1294134c,0.0
3,9d7598388d882b20593ed1a436a4652aa668b631,0.049653
4,6a68fbdbf76369a957d039a1a71b0c83ef8cdc80,0.995147


In [None]:
# 分类模型 modify 3
from keras import applications

def get_model_classif_xception():
    inputs = Input((96, 96, 3))
    
    #base_model = NASNetMobile(include_top=False, input_shape=(96, 96, 3))#, weights=None
    base_model = applications.xception.Xception(
    include_top=False,
    weights='imagenet',
    input_tensor=None,
    input_shape=(96, 96, 3),
    pooling='avg',
    classes=None)
    
    out = base_model(inputs)
  #  out1 = GlobalMaxPooling2D()(x)
  #  out2 = GlobalAveragePooling2D()(x)
  #  out3 = Flatten()(x)
  #  out = Concatenate(axis=-1)([out1, out2, out3])
    out = Dropout(0.5)(out)
    out = Dense(1, activation="sigmoid", name="3_")(out)
    model = Model(inputs, out)
    #model.compile(optimizer=Adam(0.0001), loss=binary_crossentropy, metrics=['acc'])
    model.compile(optimizer=Adam(0.0001), loss=focal_loss, metrics=[matthews_correlation])
    #model.summary()

    return model

model = get_model_classif_xception()
model.summary()

In [None]:

h5_path = "xception_model.h5"
checkpoint = ModelCheckpoint(h5_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=8,
                                   verbose=1, mode='auto', epsilon=0.005)
early = EarlyStopping(monitor="val_loss", 
                      mode="min", 
                      patience=16)

batch_size=64
history = model.fit_generator(
    data_gen(train, id_label_map, batch_size, augment=True),
    validation_data=data_gen(val, id_label_map, batch_size),
    epochs=100, verbose=1,
    callbacks=[checkpoint,reduceLROnPlat,early],
    steps_per_epoch=len(train) // batch_size,
    validation_steps=len(val) // batch_size)

model.load_weights(h5_path)

In [None]:
preds = []
ids = []

for batch in chunker(test_files, batch_size):
    X = [preprocess_input(cv2.imread(x)) for x in batch]
    ids_batch = [get_id_from_file_path(x) for x in batch]
    X = np.array(X)
    preds_batch = ((model.predict(X).ravel()*model.predict(X[:, ::-1, :, :]).ravel()*model.predict(X[:, ::-1, ::-1, :]).ravel()*model.predict(X[:, :, ::-1, :]).ravel())**0.25).tolist()
    preds += preds_batch
    ids += ids_batch
    
    
df = pd.DataFrame({'id':ids, 'label':preds})
df.to_csv("baseline_xception_model.csv", index=False)
df.head()

In [None]:
# 分类模型 modify 4
from keras import applications

def get_model_classif_inception_resnet_v2():
    inputs = Input((96, 96, 3))
    
    #base_model = NASNetMobile(include_top=False, input_shape=(96, 96, 3))#, weights=None
    base_model = applications.inception_resnet_v2.InceptionResNetV2(
    include_top=False,
    weights='imagenet',
    input_tensor=None,
    input_shape=(96, 96, 3),
    pooling='avg',
    classes=None)
    
    out = base_model(inputs)
  #  out1 = GlobalMaxPooling2D()(x)
  #  out2 = GlobalAveragePooling2D()(x)
  #  out3 = Flatten()(x)
  #  out = Concatenate(axis=-1)([out1, out2, out3])
    out = Dropout(0.5)(out)
    out = Dense(1, activation="sigmoid", name="3_")(out)
    model = Model(inputs, out)
    #model.compile(optimizer=Adam(0.0001), loss=binary_crossentropy, metrics=['acc'])
    model.compile(optimizer=Adam(0.0001), loss=focal_loss, metrics=[matthews_correlation])
    #model.summary()

    return model

model = get_model_classif_inception_resnet_v2()
model.summary()

In [None]:
h5_path = "inception_resnet_v2_model.h5"
checkpoint = ModelCheckpoint(h5_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=8,
                                   verbose=1, mode='auto', epsilon=0.005)
early = EarlyStopping(monitor="val_loss", 
                      mode="min", 
                      patience=16)

batch_size=64
history = model.fit_generator(
    data_gen(train, id_label_map, batch_size, augment=True),
    validation_data=data_gen(val, id_label_map, batch_size),
    epochs=100, verbose=1,
    callbacks=[checkpoint,reduceLROnPlat,early],
    steps_per_epoch=len(train) // batch_size,
    validation_steps=len(val) // batch_size)

model.load_weights(h5_path)

In [None]:
preds = []
ids = []

for batch in chunker(test_files, batch_size):
    X = [preprocess_input(cv2.imread(x)) for x in batch]
    ids_batch = [get_id_from_file_path(x) for x in batch]
    X = np.array(X)
    preds_batch = ((model.predict(X).ravel()*model.predict(X[:, ::-1, :, :]).ravel()*model.predict(X[:, ::-1, ::-1, :]).ravel()*model.predict(X[:, :, ::-1, :]).ravel())**0.25).tolist()
    preds += preds_batch
    ids += ids_batch
    
    
df = pd.DataFrame({'id':ids, 'label':preds})
df.to_csv("baseline_inception_resnet_v2_model.csv", index=False)
df.head()

In [None]:
# 分类模型 modify 5
from keras import applications

def get_model_classif_inception_v3():
    inputs = Input((96, 96, 3))
    
    #base_model = NASNetMobile(include_top=False, input_shape=(96, 96, 3))#, weights=None
    base_model = applications.inception_v3.InceptionV3(
    include_top=False,
    weights='imagenet',
    input_tensor=None,
    input_shape=(96, 96, 3),
    pooling='avg',
    classes=None)
    
    out = base_model(inputs)
    #out1 = GlobalMaxPooling2D()(x)
    #out2 = GlobalAveragePooling2D()(x)
    #out3 = Flatten()(x)
    #out = Concatenate(axis=-1)([out1, out2, out3])
    out = Dropout(0.5)(out)
    out = Dense(1, activation="sigmoid", name="3_")(out)
    model = Model(inputs, out)
    #model.compile(optimizer=Adam(0.0001), loss=binary_crossentropy, metrics=['acc'])
    model.compile(optimizer=Adam(0.0001), loss=focal_loss, metrics=[matthews_correlation])
    #model.summary()

    return model

model = get_model_classif_inception_v3()
model.summary()

In [None]:
h5_path = "inception_v3_model.h5"
checkpoint = ModelCheckpoint(h5_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=8,
                                   verbose=1, mode='auto', epsilon=0.005)
early = EarlyStopping(monitor="val_loss", 
                      mode="min", 
                      patience=16)

batch_size=64
history = model.fit_generator(
    data_gen(train, id_label_map, batch_size, augment=True),
    validation_data=data_gen(val, id_label_map, batch_size),
    epochs=100, verbose=1,
    callbacks=[checkpoint,reduceLROnPlat,early],
    steps_per_epoch=len(train) // batch_size,
    validation_steps=len(val) // batch_size)
model.load_weights(h5_path)

preds = []
ids = []

for batch in chunker(test_files, batch_size):
    X = [preprocess_input(cv2.imread(x)) for x in batch]
    ids_batch = [get_id_from_file_path(x) for x in batch]
    X = np.array(X)
    preds_batch = ((model.predict(X).ravel()*model.predict(X[:, ::-1, :, :]).ravel()*model.predict(X[:, ::-1, ::-1, :]).ravel()*model.predict(X[:, :, ::-1, :]).ravel())**0.25).tolist()
    preds += preds_batch
    ids += ids_batch
    
    
df = pd.DataFrame({'id':ids, 'label':preds})
df.to_csv("baseline_inception_v3_model.csv", index=False)
df.head()

In [None]:
# 分类模型 modify 6
from keras import applications

def get_model_classif_densenet():
    inputs = Input((96, 96, 3))
    
    #base_model = NASNetMobile(include_top=False, input_shape=(96, 96, 3))#, weights=None
    base_model = applications.densenet.DenseNet201(
    include_top=False,
    weights='imagenet',
    input_tensor=None,
    input_shape=(96, 96, 3),
    pooling='avg',
    classes=None)
    
    out = base_model(inputs)
    #out1 = GlobalMaxPooling2D()(x)
    #out2 = GlobalAveragePooling2D()(x)
    #out3 = Flatten()(x)
    #out = Concatenate(axis=-1)([out1, out2, out3])
    out = Dropout(0.5)(out)
    out = Dense(1, activation="sigmoid", name="3_")(out)
    model = Model(inputs, out)
    #model.compile(optimizer=Adam(0.0001), loss=binary_crossentropy, metrics=['acc'])
    model.compile(optimizer=Adam(0.0001), loss=focal_loss, metrics=[matthews_correlation])
    #model.summary()

    return model

model = get_model_classif_densenet()
model.summary()

In [None]:
h5_path = "densenet_model.h5"
checkpoint = ModelCheckpoint(h5_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=8,
                                   verbose=1, mode='auto', epsilon=0.005)
early = EarlyStopping(monitor="val_loss", 
                      mode="min", 
                      patience=16)

batch_size=64
history = model.fit_generator(
    data_gen(train, id_label_map, batch_size, augment=True),
    validation_data=data_gen(val, id_label_map, batch_size),
    epochs=100, verbose=1,
    callbacks=[checkpoint,reduceLROnPlat,early],
    steps_per_epoch=len(train) // batch_size,
    validation_steps=len(val) // batch_size)

model.load_weights(h5_path)

preds = []
ids = []

for batch in chunker(test_files, batch_size):
    X = [preprocess_input(cv2.imread(x)) for x in batch]
    ids_batch = [get_id_from_file_path(x) for x in batch]
    X = np.array(X)
    preds_batch = ((model.predict(X).ravel()*model.predict(X[:, ::-1, :, :]).ravel()*model.predict(X[:, ::-1, ::-1, :]).ravel()*model.predict(X[:, :, ::-1, :]).ravel())**0.25).tolist()
    preds += preds_batch
    ids += ids_batch
    
    
df = pd.DataFrame({'id':ids, 'label':preds})
df.to_csv("baseline_densenet_model.csv", index=False)
df.head()

In [None]:
# 分类模型 modify 7
from keras import applications

def get_model_classif_NASNetLarge():
    inputs = Input((96, 96, 3))
    
    #base_model = NASNetMobile(include_top=False, input_shape=(96, 96, 3))#, weights=None
    base_model = applications.nasnet.NASNetLarge(
    include_top=False,
    weights='imagenet',
    input_tensor=None,
    input_shape=(96, 96, 3),
    pooling='avg',
    classes=None)
    
    out = base_model(inputs)
    #out1 = GlobalMaxPooling2D()(x)
    #out2 = GlobalAveragePooling2D()(x)
    #out3 = Flatten()(x)
    #out = Concatenate(axis=-1)([out1, out2, out3])
    out = Dropout(0.5)(out)
    out = Dense(1, activation="sigmoid", name="3_")(out)
    model = Model(inputs, out)
    #model.compile(optimizer=Adam(0.0001), loss=binary_crossentropy, metrics=['acc'])
    model.compile(optimizer=Adam(0.0001), loss=focal_loss, metrics=[matthews_correlation])
    #model.summary()

    return model

model = get_model_classif_NASNetLarge()
model.summary()

In [None]:
h5_path = "NASNetLarge_model.h5"
checkpoint = ModelCheckpoint(h5_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=8,
                                   verbose=1, mode='auto', epsilon=0.005)
early = EarlyStopping(monitor="val_loss", 
                      mode="min", 
                      patience=16)

batch_size=64
history = model.fit_generator(
    data_gen(train, id_label_map, batch_size, augment=True),
    validation_data=data_gen(val, id_label_map, batch_size),
    epochs=100, verbose=1,
    callbacks=[checkpoint,reduceLROnPlat,early],
    steps_per_epoch=len(train) // batch_size,
    validation_steps=len(val) // batch_size)

model.load_weights(h5_path)

preds = []
ids = []

for batch in chunker(test_files, batch_size):
    X = [preprocess_input(cv2.imread(x)) for x in batch]
    ids_batch = [get_id_from_file_path(x) for x in batch]
    X = np.array(X)
    preds_batch = ((model.predict(X).ravel()*model.predict(X[:, ::-1, :, :]).ravel()*model.predict(X[:, ::-1, ::-1, :]).ravel()*model.predict(X[:, :, ::-1, :]).ravel())**0.25).tolist()
    preds += preds_batch
    ids += ids_batch
    
    
df = pd.DataFrame({'id':ids, 'label':preds})
df.to_csv("baseline_NASNetLarge_model.csv", index=False)
df.head()