In [1]:
# https://www.kaggle.com/mathormad/inceptionv3-baseline-lb-0-379/code

In [2]:
import os, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import skimage.io
from skimage.transform import resize
from imgaug import augmenters as iaa
from tqdm import tqdm
import PIL
from PIL import Image
import cv2
from sklearn.utils import class_weight, shuffle
from sklearn.model_selection import KFold

import warnings
warnings.filterwarnings("ignore")
SIZE = 299

In [3]:
# https://www.kaggle.com/rejpalcz/best-loss-function-for-f1-score-metric/notebook
import tensorflow as tf

def f1(y_true, y_pred):
    y_pred = K.round(y_pred)
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(f1)

def f1_loss(y_true, y_pred):
    
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(K.binary_crossentropy(y_true, y_pred), axis=-1) + (1 - K.mean(f1))

In [4]:
# Load dataset info
path_to_train = '../data/train/'
data = pd.read_csv('../data/train.csv')

In [5]:
data.head()

Unnamed: 0,Id,Target
0,00070df0-bbc3-11e8-b2bc-ac1f6b6435d0,16 0
1,000a6c98-bb9b-11e8-b2b9-ac1f6b6435d0,7 1 2 0
2,000a9596-bbc4-11e8-b2bc-ac1f6b6435d0,5
3,000c99ba-bba4-11e8-b2b9-ac1f6b6435d0,1
4,001838f8-bbca-11e8-b2bc-ac1f6b6435d0,18


In [6]:
train_dataset_info = []
for name, labels in zip(data['Id'], data['Target'].str.split(' ')):
    train_dataset_info.append({
        'path':os.path.join(path_to_train, name),
        'labels':np.array([int(label) for label in labels])})
train_dataset_info = np.array(train_dataset_info)

In [7]:
train_dataset_info

array([{'path': '../data/train/00070df0-bbc3-11e8-b2bc-ac1f6b6435d0', 'labels': array([16,  0])},
       {'path': '../data/train/000a6c98-bb9b-11e8-b2b9-ac1f6b6435d0', 'labels': array([7, 1, 2, 0])},
       {'path': '../data/train/000a9596-bbc4-11e8-b2bc-ac1f6b6435d0', 'labels': array([5])},
       ...,
       {'path': '../data/train/fff189d8-bbab-11e8-b2ba-ac1f6b6435d0', 'labels': array([7])},
       {'path': '../data/train/fffdf7e0-bbc4-11e8-b2bc-ac1f6b6435d0', 'labels': array([25,  2, 21])},
       {'path': '../data/train/fffe0ffe-bbc0-11e8-b2bb-ac1f6b6435d0', 'labels': array([2, 0])}],
      dtype=object)

In [8]:
class data_generator:
    
    def create_train(dataset_info, batch_size, shape, augument=True):
        assert shape[2] == 3
        while True:
            dataset_info = shuffle(dataset_info)
            for start in range(0, len(dataset_info), batch_size):
                end = min(start + batch_size, len(dataset_info))
                batch_images = []
                X_train_batch = dataset_info[start:end]
                batch_labels = np.zeros((len(X_train_batch), 28))
                for i in range(len(X_train_batch)):
                    image = data_generator.load_image(
                        X_train_batch[i]['path'], shape)   
                    if augument:
                        image = data_generator.augment(image)
                    batch_images.append(image/255.)
                    batch_labels[i][X_train_batch[i]['labels']] = 1
                yield np.array(batch_images, np.float32), batch_labels

    def load_image(path, shape):
        image_red_ch = Image.open(path+'_red.png')
        image_yellow_ch = Image.open(path+'_yellow.png')
        image_green_ch = Image.open(path+'_green.png')
        image_blue_ch = Image.open(path+'_blue.png')
        image = np.stack((
            np.array(image_red_ch),
            np.array(image_green_ch), 
            np.array(image_blue_ch)), -1)
        w, h = 512, 512
        zero_data = np.zeros((h, w), dtype=np.uint8)
#         image2 = np.stack((
#             np.array(image_yellow_ch),
#             zero_data, zero_data), -1)
#         print(image1.shape, image2.shape)
#         image = np.vstack((image1, image2))
        image = cv2.resize(image, (shape[0], shape[1]))
        return image

    def augment(image):
        augment_img = iaa.Sequential([
            iaa.OneOf([
                iaa.Affine(rotate=0),
                iaa.Affine(rotate=90),
                iaa.Affine(rotate=180),
                iaa.Affine(rotate=270),
                iaa.Fliplr(0.5),
                iaa.Flipud(0.5),
            ])], random_order=True)

        image_aug = augment_img.augment_image(image)
        return image_aug


In [9]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, load_model
from keras.layers import Activation, Dropout, Flatten, Dense, GlobalMaxPooling2D, BatchNormalization, Input, Conv2D
from keras.applications.inception_v3 import InceptionV3
from keras.callbacks import ModelCheckpoint
from keras import metrics
from keras.optimizers import Adam 
from keras import backend as K
import keras
from keras.models import Model

Using TensorFlow backend.


In [10]:
def create_model(input_shape, n_out):
    input_tensor = Input(shape=input_shape)
    base_model = InceptionV3(include_top=False,
                   weights='imagenet',
                   input_shape=input_shape)
    bn = BatchNormalization()(input_tensor)
    x = base_model(bn)
    x = Conv2D(32, kernel_size=(1,1), activation='relu')(x)
    x = Flatten()(x)
    x = Dropout(0.5)(x)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.5)(x)
    output = Dense(n_out, activation='sigmoid')(x)
    model = Model(input_tensor, output)
    
    return model

In [11]:
import tensorflow as tf
from tensorflow.python.ops import array_ops

# https://github.com/ailias/Focal-Loss-implement-on-Tensorflow/blob/master/focal_loss.py
def focal_loss_org(prediction_tensor, target_tensor, weights=None, alpha=0.25, gamma=2):
    r"""Compute focal loss for predictions.
        Multi-labels Focal loss formula:
            FL = -alpha * (z-p)^gamma * log(p) -(1-alpha) * p^gamma * log(1-p)
                 ,which alpha = 0.25, gamma = 2, p = sigmoid(x), z = target_tensor.
    Args:
     prediction_tensor: A float tensor of shape [batch_size, num_anchors,
        num_classes] representing the predicted logits for each class
     target_tensor: A float tensor of shape [batch_size, num_anchors,
        num_classes] representing one-hot encoded classification targets
     weights: A float tensor of shape [batch_size, num_anchors]
     alpha: A scalar tensor for focal loss alpha hyper-parameter
     gamma: A scalar tensor for focal loss gamma hyper-parameter
    Returns:
        loss: A (scalar) tensor representing the value of the loss function
    """
    sigmoid_p = tf.nn.sigmoid(prediction_tensor)
    zeros = array_ops.zeros_like(sigmoid_p, dtype=sigmoid_p.dtype)
    
    # For poitive prediction, only need consider front part loss, back part is 0;
    # target_tensor > zeros <=> z=1, so poitive coefficient = z - p.
    pos_p_sub = array_ops.where(target_tensor > zeros, target_tensor - sigmoid_p, zeros)
    
    # For negative prediction, only need consider back part loss, front part is 0;
    # target_tensor > zeros <=> z=1, so negative coefficient = 0.
    neg_p_sub = array_ops.where(target_tensor > zeros, zeros, sigmoid_p)
    per_entry_cross_ent = - alpha * (pos_p_sub ** gamma) * tf.log(tf.clip_by_value(sigmoid_p, 1e-8, 1.0)) \
                          - (1 - alpha) * (neg_p_sub ** gamma) * tf.log(tf.clip_by_value(1.0 - sigmoid_p, 1e-8, 1.0))
    return tf.reduce_sum(per_entry_cross_ent)


In [12]:
def focal_loss(weights=None, alpha=0.25, gamma=2):
    def focal_loss_my(target_tensor, prediction_tensor, ):
        r"""Compute focal loss for predictions.
            Multi-labels Focal loss formula:
                FL = -alpha * (z-p)^gamma * log(p) -(1-alpha) * p^gamma * log(1-p)
                     ,which alpha = 0.25, gamma = 2, p = sigmoid(x), z = target_tensor.
        Args:
         prediction_tensor: A float tensor of shape [batch_size, num_anchors,
            num_classes] representing the predicted logits for each class
         target_tensor: A float tensor of shape [batch_size, num_anchors,
            num_classes] representing one-hot encoded classification targets
         weights: A float tensor of shape [batch_size, num_anchors]
         alpha: A scalar tensor for focal loss alpha hyper-parameter
         gamma: A scalar tensor for focal loss gamma hyper-parameter
        Returns:
            loss: A (scalar) tensor representing the value of the loss function
        """
        sigmoid_p = tf.nn.sigmoid(prediction_tensor)
        zeros = array_ops.zeros_like(sigmoid_p, dtype=sigmoid_p.dtype)

        # For poitive prediction, only need consider front part loss, back part is 0;
        # target_tensor > zeros <=> z=1, so poitive coefficient = z - p.
        pos_p_sub = array_ops.where(target_tensor > zeros, target_tensor - sigmoid_p, zeros)

        # For negative prediction, only need consider back part loss, front part is 0;
        # target_tensor > zeros <=> z=1, so negative coefficient = 0.
        neg_p_sub = array_ops.where(target_tensor > zeros, zeros, sigmoid_p)
        per_entry_cross_ent = - alpha * (pos_p_sub ** gamma) * tf.log(tf.clip_by_value(sigmoid_p, 1e-8, 1.0)) \
                              - (1 - alpha) * (neg_p_sub ** gamma) * tf.log(tf.clip_by_value(1.0 - sigmoid_p, 1e-8, 1.0))
        return tf.reduce_sum(per_entry_cross_ent)
#         return K.mean(K.binary_crossentropy(target_tensor, prediction_tensor), axis=-1) + tf.reduce_sum(per_entry_cross_ent)
    return focal_loss_my

In [13]:
def focal_loss_fixed(y_true, y_pred):
    gamma = 2.
    alpha = 0.25
    print(y_pred)
    print(y_true)
    pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
    pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))

#     pt_1 = K.clip(pt_1, 1e-3, .999)
#     pt_0 = K.clip(pt_0, 1e-3, .999)

    return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))


In [14]:
# def focal_loss(gamma=2., alpha=.25):
#     def focal_loss_fixed(y_true, y_pred):
#         pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
#         pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))

#         pt_1 = K.clip(pt_1, 1e-3, .999)
#         pt_0 = K.clip(pt_0, 1e-3, .999)

#         return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))
#     return focal_loss_fixed

In [15]:
# create callbacks list
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split



In [16]:
train_dataset_info.shape

(31072,)

In [17]:

# split data into train, valid
indexes = np.arange(train_dataset_info.shape[0])
# np.random.shuffle(indexes)
# train_indexes, valid_indexes = train_test_split(indexes, test_size=0.15, random_state=8)
n_splits = 5
kf = KFold(n_splits=n_splits, random_state=42, shuffle=True)
submit = pd.read_csv('../data/sample_submission.csv')

# train_generator = data_generator.create_train(
#     train_dataset_info[train_indexes], batch_size, (SIZE,SIZE,3), augument=True)
# validation_generator = data_generator.create_train(
#     train_dataset_info[valid_indexes], 32, (SIZE,SIZE,3), augument=False)

oof_class_preds = np.zeros((train_dataset_info.shape[0], 28))
sub_class_preds = np.zeros((submit.shape[0], 28))

fold_ = 0
epochs = 10; batch_size = 16
for train_indexes, valid_indexes in kf.split(indexes):
    
    checkpoint = ModelCheckpoint('../cache/InceptionV3.h5', monitor='val_loss', verbose=1, 
                                 save_best_only=True, mode='min', save_weights_only = True)
    reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, 
                                       verbose=1, mode='auto', epsilon=0.0001)
    early = EarlyStopping(monitor="val_loss", 
                          mode="min", 
                          patience=15)
    callbacks_list = [checkpoint, early, reduceLROnPlat]


    _preds = []
    # create train and valid datagens
    train_generator = data_generator.create_train(
        train_dataset_info[train_indexes], batch_size, (SIZE,SIZE,3), augument=True)
    validation_generator = data_generator.create_train(
        train_dataset_info[valid_indexes], 32, (SIZE,SIZE,3), augument=False)

    # warm up model
    model = create_model(
        input_shape=(SIZE,SIZE,3), 
        n_out=28)

    for layer in model.layers:
        layer.trainable = False
    model.layers[-1].trainable = True
    model.layers[-2].trainable = True
    model.layers[-3].trainable = True
    model.layers[-4].trainable = True
    model.layers[-5].trainable = True
    model.layers[-6].trainable = True
    
    model.compile(
        loss=f1_loss, 
        optimizer=Adam(1e-03),
        metrics=[f1])
#     model.summary()

    model.fit_generator(
        train_generator,
        steps_per_epoch=np.ceil(float(len(train_indexes)) / float(batch_size)),
        validation_data=validation_generator,
        validation_steps=np.ceil(float(len(valid_indexes)) / float(batch_size)),
        epochs=2, 
        verbose=1)
    
    # train all layers
    epochs=120
    for layer in model.layers:
        layer.trainable = True
    model.compile(loss=f1_loss,
                optimizer=Adam(lr=1e-4),
                metrics=[f1])
    model.fit_generator(
        train_generator,
        steps_per_epoch=np.ceil(float(len(train_indexes)) / float(batch_size)),
        validation_data=validation_generator,
        validation_steps=np.ceil(float(len(valid_indexes)) / float(batch_size)),
        epochs=epochs, 
        verbose=1,
        callbacks=callbacks_list)
    
    model.fit_generator(
        train_generator,
        steps_per_epoch=np.ceil(float(len(train_indexes)) / float(batch_size)),
        validation_data=validation_generator,
        validation_steps=np.ceil(float(len(valid_indexes)) / float(batch_size)),
        epochs=epochs, 
        verbose=1,
        callbacks=callbacks_list)
    
    for idx in tqdm(valid_indexes):
        item = train_dataset_info[idx]
        path = item['path']
        labels = item['labels']
        image = data_generator.load_image(path, (SIZE,SIZE,3))/255.
        score_predict = model.predict(image[np.newaxis])[0]
        oof_class_preds[idx] = score_predict
        np.save('../cache/oof_class_preds-13.npy', oof_class_preds)
    
    for idx, name in tqdm(enumerate(submit['Id'])):
        path = os.path.join('../data/test/', name)
        image = data_generator.load_image(path, (SIZE,SIZE,3))/255.
        score_predict = model.predict(image[np.newaxis])[0]
        sub_class_preds[idx] += score_predict
        np.save('../cache/sub_class_preds-13.npy', sub_class_preds)
    fold_ += 1
sub_class_preds /= n_splits

Epoch 1/2
Epoch 2/2
Epoch 1/120

Epoch 00001: val_loss improved from inf to 1.02485, saving model to ../cache/InceptionV3.h5
Epoch 2/120

Epoch 00002: val_loss improved from 1.02485 to 0.93639, saving model to ../cache/InceptionV3.h5
Epoch 3/120

Epoch 00003: val_loss improved from 0.93639 to 0.89304, saving model to ../cache/InceptionV3.h5
Epoch 4/120

Epoch 00004: val_loss improved from 0.89304 to 0.84578, saving model to ../cache/InceptionV3.h5
Epoch 5/120

Epoch 00005: val_loss improved from 0.84578 to 0.83939, saving model to ../cache/InceptionV3.h5
Epoch 6/120

Epoch 00006: val_loss improved from 0.83939 to 0.82510, saving model to ../cache/InceptionV3.h5
Epoch 7/120

Epoch 00007: val_loss improved from 0.82510 to 0.81198, saving model to ../cache/InceptionV3.h5
Epoch 8/120

Epoch 00008: val_loss improved from 0.81198 to 0.81002, saving model to ../cache/InceptionV3.h5
Epoch 9/120

Epoch 00009: val_loss improved from 0.81002 to 0.79066, saving model to ../cache/InceptionV3.h5
Epo


Epoch 00037: val_loss did not improve from 0.73655

Epoch 00037: ReduceLROnPlateau reducing learning rate to 9.999999717180686e-11.
Epoch 38/120

Epoch 00038: val_loss did not improve from 0.73655
Epoch 39/120

Epoch 00039: val_loss did not improve from 0.73655
Epoch 40/120

Epoch 00040: val_loss did not improve from 0.73655

Epoch 00040: ReduceLROnPlateau reducing learning rate to 9.99999943962493e-12.
Epoch 1/120

Epoch 00001: val_loss did not improve from 0.73655
Epoch 2/120

Epoch 00002: val_loss did not improve from 0.73655
Epoch 3/120

Epoch 00003: val_loss did not improve from 0.73655
Epoch 4/120

Epoch 00004: val_loss did not improve from 0.73655
Epoch 5/120

Epoch 00005: val_loss did not improve from 0.73655
Epoch 6/120

Epoch 00006: val_loss did not improve from 0.73655
Epoch 7/120

Epoch 00007: val_loss did not improve from 0.73655
Epoch 8/120

Epoch 00008: val_loss did not improve from 0.73655
Epoch 9/120

Epoch 00009: val_loss did not improve from 0.73655
Epoch 10/120

Ep

  0%|          | 0/6215 [00:00<?, ?it/s]


Epoch 00024: val_loss did not improve from 0.73655

Epoch 00024: ReduceLROnPlateau reducing learning rate to 9.999998977483754e-17.


100%|██████████| 6215/6215 [05:51<00:00, 17.68it/s]
11702it [08:02, 24.26it/s]


Epoch 1/2
Epoch 2/2
Epoch 1/120

Epoch 00001: val_loss improved from inf to 1.00331, saving model to ../cache/InceptionV3.h5
Epoch 2/120

Epoch 00002: val_loss improved from 1.00331 to 0.92726, saving model to ../cache/InceptionV3.h5
Epoch 3/120

Epoch 00003: val_loss improved from 0.92726 to 0.87006, saving model to ../cache/InceptionV3.h5
Epoch 4/120

Epoch 00004: val_loss improved from 0.87006 to 0.85711, saving model to ../cache/InceptionV3.h5
Epoch 5/120

Epoch 00005: val_loss improved from 0.85711 to 0.83719, saving model to ../cache/InceptionV3.h5
Epoch 6/120

Epoch 00006: val_loss improved from 0.83719 to 0.83306, saving model to ../cache/InceptionV3.h5
Epoch 7/120

Epoch 00007: val_loss improved from 0.83306 to 0.81386, saving model to ../cache/InceptionV3.h5
Epoch 8/120

Epoch 00008: val_loss improved from 0.81386 to 0.80049, saving model to ../cache/InceptionV3.h5
Epoch 9/120

Epoch 00009: val_loss did not improve from 0.80049
Epoch 10/120

Epoch 00010: val_loss improved fro


Epoch 00036: val_loss did not improve from 0.72993
Epoch 37/120

Epoch 00037: val_loss did not improve from 0.72993
Epoch 38/120

Epoch 00038: val_loss did not improve from 0.72993

Epoch 00038: ReduceLROnPlateau reducing learning rate to 9.999999717180686e-11.
Epoch 39/120

Epoch 00039: val_loss did not improve from 0.72993
Epoch 40/120

Epoch 00040: val_loss did not improve from 0.72993
Epoch 41/120

Epoch 00041: val_loss did not improve from 0.72993

Epoch 00041: ReduceLROnPlateau reducing learning rate to 9.99999943962493e-12.
Epoch 1/120

Epoch 00001: val_loss did not improve from 0.72993
Epoch 2/120

Epoch 00002: val_loss did not improve from 0.72993
Epoch 3/120

Epoch 00003: val_loss improved from 0.72993 to 0.72956, saving model to ../cache/InceptionV3.h5
Epoch 4/120

Epoch 00004: val_loss did not improve from 0.72956
Epoch 5/120

Epoch 00005: val_loss improved from 0.72956 to 0.72937, saving model to ../cache/InceptionV3.h5
Epoch 6/120

Epoch 00006: val_loss did not improve f

  0%|          | 0/6215 [00:00<?, ?it/s]


Epoch 00020: val_loss did not improve from 0.72937

Epoch 00020: ReduceLROnPlateau reducing learning rate to 9.999998977483754e-17.


100%|██████████| 6215/6215 [05:23<00:00, 19.22it/s]
11702it [08:20, 23.37it/s]


Epoch 1/2
Epoch 2/2
Epoch 1/120

Epoch 00001: val_loss improved from inf to 0.98139, saving model to ../cache/InceptionV3.h5
Epoch 2/120

Epoch 00002: val_loss improved from 0.98139 to 0.90634, saving model to ../cache/InceptionV3.h5
Epoch 3/120

Epoch 00003: val_loss improved from 0.90634 to 0.89056, saving model to ../cache/InceptionV3.h5
Epoch 4/120

Epoch 00004: val_loss improved from 0.89056 to 0.87762, saving model to ../cache/InceptionV3.h5
Epoch 5/120

Epoch 00005: val_loss did not improve from 0.87762
Epoch 6/120

Epoch 00006: val_loss improved from 0.87762 to 0.81734, saving model to ../cache/InceptionV3.h5
Epoch 7/120

Epoch 00007: val_loss improved from 0.81734 to 0.80342, saving model to ../cache/InceptionV3.h5
Epoch 8/120

Epoch 00008: val_loss did not improve from 0.80342
Epoch 9/120

Epoch 00009: val_loss improved from 0.80342 to 0.79794, saving model to ../cache/InceptionV3.h5
Epoch 10/120

Epoch 00010: val_loss did not improve from 0.79794
Epoch 11/120

Epoch 00011: v


Epoch 00037: val_loss did not improve from 0.73110
Epoch 38/120

Epoch 00038: val_loss did not improve from 0.73110

Epoch 00038: ReduceLROnPlateau reducing learning rate to 9.999999939225292e-10.
Epoch 39/120

Epoch 00039: val_loss did not improve from 0.73110
Epoch 40/120

Epoch 00040: val_loss did not improve from 0.73110
Epoch 41/120

Epoch 00041: val_loss improved from 0.73110 to 0.72984, saving model to ../cache/InceptionV3.h5
Epoch 42/120

Epoch 00042: val_loss did not improve from 0.72984
Epoch 43/120

Epoch 00043: val_loss did not improve from 0.72984
Epoch 44/120

Epoch 00044: val_loss did not improve from 0.72984

Epoch 00044: ReduceLROnPlateau reducing learning rate to 9.999999717180686e-11.
Epoch 45/120

Epoch 00045: val_loss did not improve from 0.72984
Epoch 46/120

Epoch 00046: val_loss did not improve from 0.72984
Epoch 47/120

Epoch 00047: val_loss did not improve from 0.72984

Epoch 00047: ReduceLROnPlateau reducing learning rate to 9.99999943962493e-12.
Epoch 48/12


Epoch 00020: val_loss did not improve from 0.72984

Epoch 00020: ReduceLROnPlateau reducing learning rate to 9.999999424161285e-20.
Epoch 21/120

Epoch 00021: val_loss did not improve from 0.72984
Epoch 22/120

Epoch 00022: val_loss did not improve from 0.72984
Epoch 23/120

Epoch 00023: val_loss did not improve from 0.72984

Epoch 00023: ReduceLROnPlateau reducing learning rate to 9.999999682655225e-21.
Epoch 24/120

Epoch 00024: val_loss did not improve from 0.72984
Epoch 25/120

Epoch 00025: val_loss did not improve from 0.72984
Epoch 26/120

Epoch 00026: val_loss did not improve from 0.72984

Epoch 00026: ReduceLROnPlateau reducing learning rate to 9.999999682655225e-22.
Epoch 27/120

Epoch 00027: val_loss did not improve from 0.72984
Epoch 28/120

Epoch 00028: val_loss did not improve from 0.72984
Epoch 29/120

Epoch 00029: val_loss did not improve from 0.72984

Epoch 00029: ReduceLROnPlateau reducing learning rate to 9.999999682655225e-23.
Epoch 30/120

Epoch 00030: val_loss did

  0%|          | 0/6214 [00:00<?, ?it/s]


Epoch 00042: val_loss did not improve from 0.72984


100%|██████████| 6214/6214 [05:44<00:00, 18.03it/s]
11702it [08:18, 23.49it/s]


Epoch 1/2
Epoch 2/2
Epoch 1/120

Epoch 00001: val_loss improved from inf to 0.98448, saving model to ../cache/InceptionV3.h5
Epoch 2/120

Epoch 00002: val_loss improved from 0.98448 to 0.91880, saving model to ../cache/InceptionV3.h5
Epoch 3/120

Epoch 00003: val_loss did not improve from 0.91880
Epoch 4/120

Epoch 00004: val_loss improved from 0.91880 to 0.84680, saving model to ../cache/InceptionV3.h5
Epoch 5/120

Epoch 00005: val_loss improved from 0.84680 to 0.82342, saving model to ../cache/InceptionV3.h5
Epoch 6/120

Epoch 00006: val_loss improved from 0.82342 to 0.81969, saving model to ../cache/InceptionV3.h5
Epoch 7/120

Epoch 00007: val_loss improved from 0.81969 to 0.80836, saving model to ../cache/InceptionV3.h5
Epoch 8/120

Epoch 00008: val_loss did not improve from 0.80836
Epoch 9/120

Epoch 00009: val_loss improved from 0.80836 to 0.79898, saving model to ../cache/InceptionV3.h5
Epoch 10/120

Epoch 00010: val_loss improved from 0.79898 to 0.79216, saving model to ../cach

Epoch 37/120

Epoch 00037: val_loss did not improve from 0.73233
Epoch 38/120

Epoch 00038: val_loss did not improve from 0.73233
Epoch 39/120

Epoch 00039: val_loss did not improve from 0.73233

Epoch 00039: ReduceLROnPlateau reducing learning rate to 9.99999943962493e-12.
Epoch 40/120

Epoch 00040: val_loss did not improve from 0.73233
Epoch 41/120

Epoch 00041: val_loss did not improve from 0.73233
Epoch 42/120

Epoch 00042: val_loss did not improve from 0.73233

Epoch 00042: ReduceLROnPlateau reducing learning rate to 9.999999092680235e-13.
Epoch 43/120

Epoch 00043: val_loss did not improve from 0.73233
Epoch 44/120

Epoch 00044: val_loss did not improve from 0.73233
Epoch 45/120

Epoch 00045: val_loss did not improve from 0.73233

Epoch 00045: ReduceLROnPlateau reducing learning rate to 9.9999988758398e-14.
Epoch 46/120

Epoch 00046: val_loss did not improve from 0.73233
Epoch 47/120

Epoch 00047: val_loss did not improve from 0.73233
Epoch 48/120

Epoch 00048: val_loss did not i

  0%|          | 0/6214 [00:00<?, ?it/s]


Epoch 00017: val_loss did not improve from 0.73233

Epoch 00017: ReduceLROnPlateau reducing learning rate to 9.999999682655225e-21.


100%|██████████| 6214/6214 [05:35<00:00, 18.55it/s]
11702it [08:28, 23.00it/s]


Epoch 1/2
Epoch 2/2
Epoch 1/120

Epoch 00001: val_loss improved from inf to 1.01546, saving model to ../cache/InceptionV3.h5
Epoch 2/120

Epoch 00002: val_loss improved from 1.01546 to 0.91633, saving model to ../cache/InceptionV3.h5
Epoch 3/120

Epoch 00003: val_loss did not improve from 0.91633
Epoch 4/120

Epoch 00004: val_loss improved from 0.91633 to 0.86176, saving model to ../cache/InceptionV3.h5
Epoch 5/120

Epoch 00005: val_loss improved from 0.86176 to 0.84903, saving model to ../cache/InceptionV3.h5
Epoch 6/120

Epoch 00006: val_loss improved from 0.84903 to 0.83811, saving model to ../cache/InceptionV3.h5
Epoch 7/120

Epoch 00007: val_loss improved from 0.83811 to 0.81265, saving model to ../cache/InceptionV3.h5
Epoch 8/120

Epoch 00008: val_loss did not improve from 0.81265
Epoch 9/120

Epoch 00009: val_loss improved from 0.81265 to 0.79193, saving model to ../cache/InceptionV3.h5
Epoch 10/120

Epoch 00010: val_loss did not improve from 0.79193
Epoch 11/120

Epoch 00011: v


Epoch 00037: val_loss did not improve from 0.73782
Epoch 38/120

Epoch 00038: val_loss did not improve from 0.73782
Epoch 39/120

Epoch 00039: val_loss did not improve from 0.73782

Epoch 00039: ReduceLROnPlateau reducing learning rate to 9.999999092680235e-13.
Epoch 40/120

Epoch 00040: val_loss did not improve from 0.73782
Epoch 41/120

Epoch 00041: val_loss improved from 0.73782 to 0.73707, saving model to ../cache/InceptionV3.h5
Epoch 42/120

Epoch 00042: val_loss did not improve from 0.73707
Epoch 43/120

Epoch 00043: val_loss did not improve from 0.73707
Epoch 44/120

Epoch 00044: val_loss did not improve from 0.73707

Epoch 00044: ReduceLROnPlateau reducing learning rate to 9.9999988758398e-14.
Epoch 45/120

Epoch 00045: val_loss did not improve from 0.73707
Epoch 46/120

Epoch 00046: val_loss did not improve from 0.73707
Epoch 47/120

Epoch 00047: val_loss did not improve from 0.73707

Epoch 00047: ReduceLROnPlateau reducing learning rate to 9.999999146890344e-15.
Epoch 48/120

  0%|          | 0/6214 [00:00<?, ?it/s]


Epoch 00016: val_loss did not improve from 0.73619

Epoch 00016: ReduceLROnPlateau reducing learning rate to 9.999999682655225e-23.


100%|██████████| 6214/6214 [05:39<00:00, 18.28it/s]
11702it [08:49, 22.09it/s]


In [18]:
np.save('../cache/oof_class_preds-13.npy', oof_class_preds)
np.save('../cache/sub_class_preds-13.npy', sub_class_preds)

In [19]:
# oof_class_preds = np.zeros((train_dataset_info.shape[0], 28))
# sub_class_preds = np.zeros((submit.shape[0], 28))
# score_predict = model.predict(image[np.newaxis])[0]
# oof_class_preds[idx] = score_predict

In [20]:
# for train_index, test_index in kf.split(indexes):
# ...    print("TRAIN:", train_index, "TEST:", test_index)

In [21]:
sub_class_preds.shape

(11702, 28)

In [22]:
predicted = []
for line in tqdm(sub_class_preds):
    label_predict = np.arange(28)[line>=0.2]
    str_predict_label = ' '.join(str(l) for l in label_predict)
    predicted.append(str_predict_label)

100%|██████████| 11702/11702 [00:00<00:00, 89289.54it/s]


In [23]:
len(predicted)

11702

In [24]:
predicted

['2',
 '5 25',
 '0 5 25',
 '0 25',
 '7 25',
 '4',
 '4 25',
 '0 23 25',
 '0',
 '0 25',
 '17 18 19 25',
 '3 5',
 '0 2 25',
 '7 9 10 20',
 '23',
 '0 4 5 18 25',
 '2 14',
 '0 5',
 '14 21',
 '0 5',
 '6',
 '3 5 24',
 '0 2 11 25',
 '0',
 '0 4 25',
 '0 11 12 21 25 26',
 '0',
 '0',
 '0 25',
 '0',
 '0 21',
 '0 7 25',
 '14 16 17 18 21 25',
 '0 5 25',
 '0 7',
 '13',
 '0 13 25',
 '0 3',
 '0 21 25',
 '1',
 '0 16 17 21 25',
 '6 25',
 '0 21 25',
 '18 19 25',
 '0 16 25',
 '6',
 '0',
 '0 23',
 '6 11 23',
 '0',
 '0 16 17 25',
 '0 5',
 '8 20 23 24',
 '0 25',
 '3',
 '0 25',
 '0 25',
 '11 23',
 '0 25',
 '21 25',
 '2 21 22',
 '0 5 21',
 '0 14 16 21',
 '7 21 25',
 '23',
 '0 18 19 25',
 '3 6 21 25',
 '0 25',
 '0 16 25',
 '21',
 '2 3',
 '0 2',
 '14',
 '4',
 '0 21',
 '0',
 '0 2 4',
 '0 1',
 '0 25',
 '0 25',
 '6 23 25',
 '0 25',
 '0 21',
 '21 25',
 '17 18',
 '0 23 25',
 '20 23',
 '0 21',
 '14 16 25',
 '11 14',
 '0 25',
 '11 14',
 '23',
 '13',
 '0 16 17 25',
 '0 25',
 '7 17 18 25',
 '0 7 19 25',
 '24',
 '0 23 25',

In [25]:
submit['Predicted'] = predicted

In [26]:
# Create submit
# submit = pd.read_csv('../data/sample_submission.csv')
# predicted = []
# draw_predict = []
# # model.load_weights('../cache/InceptionV3.h5')
# for name in tqdm(submit['Id']):
#     path = os.path.join('../data/test/', name)
#     image = data_generator.load_image(path, (SIZE,SIZE,3))/255.
#     score_predict = model.predict(image[np.newaxis])[0]
#     draw_predict.append(score_predict)
#     label_predict = np.arange(28)[score_predict>=0.2]
#     str_predict_label = ' '.join(str(l) for l in label_predict)
#     predicted.append(str_predict_label)

# submit['Predicted'] = predicted
# np.save('../cache/draw_predict_InceptionV3-8.npy', score_predict)
# submit.to_csv('../submissions/submit_InceptionV3.csv', index=False)

In [27]:
submit.to_csv('../submissions/sub13-a.csv', index=False)

In [28]:
#https://stackoverflow.com/questions/1855095/how-to-create-a-zip-archive-of-a-directory
def backup_project_as_zip(project_dir, zip_file):
    assert(os.path.isdir(project_dir))
    assert(os.path.isdir(os.path.dirname(zip_file)))
    shutil.make_archive(zip_file.replace('.zip',''), 'zip', project_dir)
    pass

In [29]:
import datetime, shutil
now = datetime.datetime.now()
print(now)
PROJECT_PATH = '/home/watts/lal/Kaggle/kagglehp/scripts_nbs'
backup_project_as_zip(PROJECT_PATH, '../cache/code.scripts_nbs.%s.zip'%now)

2018-10-30 00:48:13.957997


In [32]:
!pip install -U kaggle

[33mCache entry deserialization failed, entry ignored[0m
Collecting kaggle
  Downloading https://files.pythonhosted.org/packages/83/9b/ac57e15fbb239c6793c8d0b7dfd1a4c4a025eaa9f791b5388a7afb515aed/kaggle-1.5.0.tar.gz (53kB)
[K    100% |████████████████████████████████| 61kB 183kB/s ta 0:00:01
[?25hRequirement already up-to-date: urllib3<1.23.0,>=1.15 in /home/watts/anaconda3/envs/hpg/lib/python3.6/site-packages (from kaggle)
Requirement already up-to-date: six>=1.10 in /home/watts/anaconda3/envs/hpg/lib/python3.6/site-packages (from kaggle)
Collecting certifi (from kaggle)
  Downloading https://files.pythonhosted.org/packages/56/9d/1d02dd80bc4cd955f98980f28c5ee2200e1209292d5f9e9cc8d030d18655/certifi-2018.10.15-py2.py3-none-any.whl (146kB)
[K    100% |████████████████████████████████| 153kB 545kB/s ta 0:00:01
[?25hCollecting python-dateutil (from kaggle)
  Downloading https://files.pythonhosted.org/packages/74/68/d87d9b36af36f44254a8d512cbfc48369103a3b9e474be9bdfe536abfc45/python_d

In [33]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub13-a.csv -m ""

100%|████████████████████████████████████████| 492k/492k [00:13<00:00, 37.3kB/s]
Successfully submitted to Human Protein Atlas Image ClassificationCPU times: user 349 ms, sys: 224 ms, total: 572 ms
Wall time: 17.2 s


In [34]:
from time import sleep
sleep(10)
!kaggle competitions submissions -c human-protein-atlas-image-classification

fileName      date                 description  status    publicScore  privateScore  
------------  -------------------  -----------  --------  -----------  ------------  
sub13-a.csv   2018-10-29 19:20:40               complete  0.444        None          
sub12-d.csv   2018-10-26 02:09:32               complete  0.466        None          
sub12-h.csv   2018-10-26 02:07:56               complete  0.389        None          
sub12-g.csv   2018-10-25 00:55:10               complete  0.433        None          
sub12-c.csv   2018-10-25 00:45:32               complete  0.469        None          
sub12-bb.csv  2018-10-25 00:43:34               complete  0.466        None          
sub12-b.csv   2018-10-25 00:41:50               complete  0.457        None          
sub12-a.csv   2018-10-25 00:40:56               complete  0.449        None          
sub11-k.csv   2018-10-24 00:35:39               complete  0.346        None          
sub11-j.csv   2018-10-24 00:34:46          

In [35]:
predicted = []
for line in tqdm(sub_class_preds):
    label_predict = np.arange(28)[line>=0.25]
    str_predict_label = ' '.join(str(l) for l in label_predict)
    predicted.append(str_predict_label)

100%|██████████| 11702/11702 [00:00<00:00, 90436.26it/s]


In [36]:
submit['Predicted'] = predicted
submit.to_csv('../submissions/sub13-b.csv', index=False)

In [37]:
import datetime, shutil
now = datetime.datetime.now()
print(now)
PROJECT_PATH = '/home/watts/lal/Kaggle/kagglehp/scripts_nbs'
backup_project_as_zip(PROJECT_PATH, '../cache/code.scripts_nbs.%s.zip'%now)

2018-10-30 00:51:23.536802


In [38]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub13-b.csv -m ""

100%|████████████████████████████████████████| 486k/486k [00:13<00:00, 37.7kB/s]
Successfully submitted to Human Protein Atlas Image ClassificationCPU times: user 349 ms, sys: 199 ms, total: 548 ms
Wall time: 16.2 s


In [39]:
from time import sleep
sleep(10)
!kaggle competitions submissions -c human-protein-atlas-image-classification

fileName      date                 description  status    publicScore  privateScore  
------------  -------------------  -----------  --------  -----------  ------------  
sub13-b.csv   2018-10-29 19:21:53               complete  0.448        None          
sub13-a.csv   2018-10-29 19:20:40               complete  0.444        None          
sub12-d.csv   2018-10-26 02:09:32               complete  0.466        None          
sub12-h.csv   2018-10-26 02:07:56               complete  0.389        None          
sub12-g.csv   2018-10-25 00:55:10               complete  0.433        None          
sub12-c.csv   2018-10-25 00:45:32               complete  0.469        None          
sub12-bb.csv  2018-10-25 00:43:34               complete  0.466        None          
sub12-b.csv   2018-10-25 00:41:50               complete  0.457        None          
sub12-a.csv   2018-10-25 00:40:56               complete  0.449        None          
sub11-k.csv   2018-10-24 00:35:39          

In [42]:
d = {0.3:'bb', 0.35:'c', 0.4:'d', 0.45:'e', 0.5:'f'}

In [43]:
for alpha in [0.3, 0.35, 0.4, 0.45, 0.5]:
    predicted = []
    for line in tqdm(sub_class_preds):
        label_predict = np.arange(28)[line>=alpha]
        str_predict_label = ' '.join(str(l) for l in label_predict)
        predicted.append(str_predict_label)
    submit['Predicted'] = predicted
    name = '../submissions/sub13-' + d[alpha] + '.csv'
    print(name)
    submit.to_csv(name, index=False)

100%|██████████| 11702/11702 [00:00<00:00, 96444.28it/s]
100%|██████████| 11702/11702 [00:00<00:00, 107148.54it/s]
 99%|█████████▉| 11610/11702 [00:00<00:00, 116077.42it/s]

../submissions/sub13-bb.csv
../submissions/sub13-c.csv


100%|██████████| 11702/11702 [00:00<00:00, 114139.61it/s]
100%|██████████| 11702/11702 [00:00<00:00, 109594.41it/s]
  0%|          | 0/11702 [00:00<?, ?it/s]

../submissions/sub13-d.csv
../submissions/sub13-e.csv


100%|██████████| 11702/11702 [00:00<00:00, 117229.73it/s]

../submissions/sub13-f.csv





In [44]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub13-bb.csv -m ""

100%|████████████████████████████████████████| 481k/481k [00:16<00:00, 30.2kB/s]
Successfully submitted to Human Protein Atlas Image ClassificationCPU times: user 427 ms, sys: 214 ms, total: 641 ms
Wall time: 19.6 s


In [45]:
from time import sleep
sleep(10)
!kaggle competitions submissions -c human-protein-atlas-image-classification

fileName      date                 description  status    publicScore  privateScore  
------------  -------------------  -----------  --------  -----------  ------------  
sub13-bb.csv  2018-10-29 19:23:01               complete  0.454        None          
sub13-b.csv   2018-10-29 19:21:53               complete  0.448        None          
sub13-a.csv   2018-10-29 19:20:40               complete  0.444        None          
sub12-d.csv   2018-10-26 02:09:32               complete  0.466        None          
sub12-h.csv   2018-10-26 02:07:56               complete  0.389        None          
sub12-g.csv   2018-10-25 00:55:10               complete  0.433        None          
sub12-c.csv   2018-10-25 00:45:32               complete  0.469        None          
sub12-bb.csv  2018-10-25 00:43:34               complete  0.466        None          
sub12-b.csv   2018-10-25 00:41:50               complete  0.457        None          
sub12-a.csv   2018-10-25 00:40:56          

In [46]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub13-c.csv -m ""
from time import sleep
sleep(10)
!kaggle competitions submissions -c human-protein-atlas-image-classification

100%|████████████████████████████████████████| 477k/477k [00:08<00:00, 55.1kB/s]
Successfully submitted to Human Protein Atlas Image ClassificationfileName      date                 description  status    publicScore  privateScore  
------------  -------------------  -----------  --------  -----------  ------------  
sub13-c.csv   2018-10-29 19:24:00               complete  0.459        None          
sub13-bb.csv  2018-10-29 19:23:01               complete  0.454        None          
sub13-b.csv   2018-10-29 19:21:53               complete  0.448        None          
sub13-a.csv   2018-10-29 19:20:40               complete  0.444        None          
sub12-d.csv   2018-10-26 02:09:32               complete  0.466        None          
sub12-h.csv   2018-10-26 02:07:56               complete  0.389        None          
sub12-g.csv   2018-10-25 00:55:10               complete  0.433        None          
sub12-c.csv   2018-10-25 00:45:32               complete  0.469        None    