In [1]:
# https://www.kaggle.com/mathormad/inceptionv3-baseline-lb-0-379/code

In [2]:
# !pip install iterative-stratification

In [3]:
import os, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import skimage.io
from skimage.transform import resize
from imgaug import augmenters as iaa
from tqdm import tqdm
import PIL
from PIL import Image
import cv2
from sklearn.utils import class_weight, shuffle
from sklearn.model_selection import KFold
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold, MultilabelStratifiedShuffleSplit

import warnings
warnings.filterwarnings("ignore")
SIZE = 299

In [4]:
# https://www.kaggle.com/rejpalcz/best-loss-function-for-f1-score-metric/notebook
import tensorflow as tf

def f1(y_true, y_pred):
    y_pred = K.round(y_pred)
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(f1)

def f1_loss(y_true, y_pred):
    
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(K.binary_crossentropy(y_true, y_pred), axis=-1) + (1 - K.mean(f1))

In [5]:
# Load dataset info
path_to_train = '../data/train/'
data = pd.read_csv('../data/train.csv')

In [6]:
data.head()

Unnamed: 0,Id,Target
0,00070df0-bbc3-11e8-b2bc-ac1f6b6435d0,16 0
1,000a6c98-bb9b-11e8-b2b9-ac1f6b6435d0,7 1 2 0
2,000a9596-bbc4-11e8-b2bc-ac1f6b6435d0,5
3,000c99ba-bba4-11e8-b2b9-ac1f6b6435d0,1
4,001838f8-bbca-11e8-b2bc-ac1f6b6435d0,18


In [7]:
train_dataset_info = []
for name, labels in zip(data['Id'], data['Target'].str.split(' ')):
    train_dataset_info.append({
        'path':os.path.join(path_to_train, name),
        'labels':np.array([int(label) for label in labels])})
train_dataset_info = np.array(train_dataset_info)

In [8]:
train_dataset_info

array([{'path': '../data/train/00070df0-bbc3-11e8-b2bc-ac1f6b6435d0', 'labels': array([16,  0])},
       {'path': '../data/train/000a6c98-bb9b-11e8-b2b9-ac1f6b6435d0', 'labels': array([7, 1, 2, 0])},
       {'path': '../data/train/000a9596-bbc4-11e8-b2bc-ac1f6b6435d0', 'labels': array([5])},
       ...,
       {'path': '../data/train/fff189d8-bbab-11e8-b2ba-ac1f6b6435d0', 'labels': array([7])},
       {'path': '../data/train/fffdf7e0-bbc4-11e8-b2bc-ac1f6b6435d0', 'labels': array([25,  2, 21])},
       {'path': '../data/train/fffe0ffe-bbc0-11e8-b2bb-ac1f6b6435d0', 'labels': array([2, 0])}],
      dtype=object)

In [9]:
class data_generator:
    
    def create_train(dataset_info, batch_size, shape, augument=True):
        assert shape[2] == 3
        while True:
            dataset_info = shuffle(dataset_info)
            for start in range(0, len(dataset_info), batch_size):
                end = min(start + batch_size, len(dataset_info))
                batch_images = []
                X_train_batch = dataset_info[start:end]
                batch_labels = np.zeros((len(X_train_batch), 28))
                for i in range(len(X_train_batch)):
                    image = data_generator.load_image(
                        X_train_batch[i]['path'], shape)   
                    if augument:
                        image = data_generator.augment(image)
                    batch_images.append(image/255.)
                    batch_labels[i][X_train_batch[i]['labels']] = 1
                yield np.array(batch_images, np.float32), batch_labels

    def load_image(path, shape):
        image_red_ch = Image.open(path+'_red.png')
        image_yellow_ch = Image.open(path+'_yellow.png')
        image_green_ch = Image.open(path+'_green.png')
        image_blue_ch = Image.open(path+'_blue.png')
        image = np.stack((
            np.array(image_red_ch),
            np.array(image_green_ch), 
            np.array(image_blue_ch)), -1)
        w, h = 512, 512
        zero_data = np.zeros((h, w), dtype=np.uint8)
#         image2 = np.stack((
#             np.array(image_yellow_ch),
#             zero_data, zero_data), -1)
#         print(image1.shape, image2.shape)
#         image = np.vstack((image1, image2))
        image = cv2.resize(image, (shape[0], shape[1]))
        return image

    def augment(image):
        augment_img = iaa.Sequential([
            iaa.OneOf([
                iaa.Affine(rotate=0),
                iaa.Affine(rotate=90),
                iaa.Affine(rotate=180),
                iaa.Affine(rotate=270),
                iaa.Fliplr(0.5),
                iaa.Flipud(0.5),
            ])], random_order=True)

        image_aug = augment_img.augment_image(image)
        return image_aug


In [10]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, load_model
from keras.layers import Activation, Dropout, Flatten, Dense, GlobalMaxPooling2D, BatchNormalization, Input, Conv2D
from keras.applications.inception_v3 import InceptionV3
from keras.callbacks import ModelCheckpoint
from keras import metrics
from keras.optimizers import Adam 
from keras import backend as K
import keras
from keras.models import Model

Using TensorFlow backend.


In [11]:
def create_model(input_shape, n_out):
    input_tensor = Input(shape=input_shape)
    base_model = InceptionV3(include_top=False,
                   weights='imagenet',
                   input_shape=input_shape)
    bn = BatchNormalization()(input_tensor)
    x = base_model(bn)
    x = Conv2D(32, kernel_size=(1,1), activation='relu')(x)
    x = Flatten()(x)
    x = Dropout(0.5)(x)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.5)(x)
    output = Dense(n_out, activation='sigmoid')(x)
    model = Model(input_tensor, output)
    
    return model

In [12]:
import tensorflow as tf
from tensorflow.python.ops import array_ops

# https://github.com/ailias/Focal-Loss-implement-on-Tensorflow/blob/master/focal_loss.py
def focal_loss_org(prediction_tensor, target_tensor, weights=None, alpha=0.25, gamma=2):
    r"""Compute focal loss for predictions.
        Multi-labels Focal loss formula:
            FL = -alpha * (z-p)^gamma * log(p) -(1-alpha) * p^gamma * log(1-p)
                 ,which alpha = 0.25, gamma = 2, p = sigmoid(x), z = target_tensor.
    Args:
     prediction_tensor: A float tensor of shape [batch_size, num_anchors,
        num_classes] representing the predicted logits for each class
     target_tensor: A float tensor of shape [batch_size, num_anchors,
        num_classes] representing one-hot encoded classification targets
     weights: A float tensor of shape [batch_size, num_anchors]
     alpha: A scalar tensor for focal loss alpha hyper-parameter
     gamma: A scalar tensor for focal loss gamma hyper-parameter
    Returns:
        loss: A (scalar) tensor representing the value of the loss function
    """
    sigmoid_p = tf.nn.sigmoid(prediction_tensor)
    zeros = array_ops.zeros_like(sigmoid_p, dtype=sigmoid_p.dtype)
    
    # For poitive prediction, only need consider front part loss, back part is 0;
    # target_tensor > zeros <=> z=1, so poitive coefficient = z - p.
    pos_p_sub = array_ops.where(target_tensor > zeros, target_tensor - sigmoid_p, zeros)
    
    # For negative prediction, only need consider back part loss, front part is 0;
    # target_tensor > zeros <=> z=1, so negative coefficient = 0.
    neg_p_sub = array_ops.where(target_tensor > zeros, zeros, sigmoid_p)
    per_entry_cross_ent = - alpha * (pos_p_sub ** gamma) * tf.log(tf.clip_by_value(sigmoid_p, 1e-8, 1.0)) \
                          - (1 - alpha) * (neg_p_sub ** gamma) * tf.log(tf.clip_by_value(1.0 - sigmoid_p, 1e-8, 1.0))
    return tf.reduce_sum(per_entry_cross_ent)


In [13]:
def focal_loss(weights=None, alpha=0.25, gamma=2):
    def focal_loss_my(target_tensor, prediction_tensor, ):
        r"""Compute focal loss for predictions.
            Multi-labels Focal loss formula:
                FL = -alpha * (z-p)^gamma * log(p) -(1-alpha) * p^gamma * log(1-p)
                     ,which alpha = 0.25, gamma = 2, p = sigmoid(x), z = target_tensor.
        Args:
         prediction_tensor: A float tensor of shape [batch_size, num_anchors,
            num_classes] representing the predicted logits for each class
         target_tensor: A float tensor of shape [batch_size, num_anchors,
            num_classes] representing one-hot encoded classification targets
         weights: A float tensor of shape [batch_size, num_anchors]
         alpha: A scalar tensor for focal loss alpha hyper-parameter
         gamma: A scalar tensor for focal loss gamma hyper-parameter
        Returns:
            loss: A (scalar) tensor representing the value of the loss function
        """
        sigmoid_p = tf.nn.sigmoid(prediction_tensor)
        zeros = array_ops.zeros_like(sigmoid_p, dtype=sigmoid_p.dtype)

        # For poitive prediction, only need consider front part loss, back part is 0;
        # target_tensor > zeros <=> z=1, so poitive coefficient = z - p.
        pos_p_sub = array_ops.where(target_tensor > zeros, target_tensor - sigmoid_p, zeros)

        # For negative prediction, only need consider back part loss, front part is 0;
        # target_tensor > zeros <=> z=1, so negative coefficient = 0.
        neg_p_sub = array_ops.where(target_tensor > zeros, zeros, sigmoid_p)
        per_entry_cross_ent = - alpha * (pos_p_sub ** gamma) * tf.log(tf.clip_by_value(sigmoid_p, 1e-8, 1.0)) \
                              - (1 - alpha) * (neg_p_sub ** gamma) * tf.log(tf.clip_by_value(1.0 - sigmoid_p, 1e-8, 1.0))
        return tf.reduce_sum(per_entry_cross_ent)
#         return K.mean(K.binary_crossentropy(target_tensor, prediction_tensor), axis=-1) + tf.reduce_sum(per_entry_cross_ent)
    return focal_loss_my

In [14]:
def focal_loss_fixed(y_true, y_pred):
    gamma = 2.
    alpha = 0.25
    print(y_pred)
    print(y_true)
    pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
    pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))

#     pt_1 = K.clip(pt_1, 1e-3, .999)
#     pt_0 = K.clip(pt_0, 1e-3, .999)

    return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))


In [15]:
# def focal_loss(gamma=2., alpha=.25):
#     def focal_loss_fixed(y_true, y_pred):
#         pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
#         pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))

#         pt_1 = K.clip(pt_1, 1e-3, .999)
#         pt_0 = K.clip(pt_0, 1e-3, .999)

#         return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))
#     return focal_loss_fixed

In [16]:
# create callbacks list
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split



In [17]:
train_dataset_info.shape

(31072,)

In [18]:
n_classes = 28
y_train = np.zeros((train_dataset_info.shape[0], n_classes))
print(y_train.shape)

idx = 0
for labels in tqdm(data['Target'].str.split(' ')):
#     print(labels)
    for label in labels:
        y_train[idx][int(label)] = 1
    idx += 1

100%|██████████| 31072/31072 [00:00<00:00, 625442.90it/s]

(31072, 28)





In [20]:

# split data into train, valid
indexes = np.arange(train_dataset_info.shape[0])
# np.random.shuffle(indexes)
# train_indexes, valid_indexes = train_test_split(indexes, test_size=0.15, random_state=8)
n_splits = 5
# kf = KFold(n_splits=n_splits, random_state=42, shuffle=True)
kf = MultilabelStratifiedShuffleSplit(n_splits=5, random_state=42)
submit = pd.read_csv('../data/sample_submission.csv')

# train_generator = data_generator.create_train(
#     train_dataset_info[train_indexes], batch_size, (SIZE,SIZE,3), augument=True)
# validation_generator = data_generator.create_train(
#     train_dataset_info[valid_indexes], 32, (SIZE,SIZE,3), augument=False)

oof_class_preds = np.zeros((train_dataset_info.shape[0], 28))
sub_class_preds = np.zeros((submit.shape[0], 28))

oof_class_preds = np.load('../cache/oof_class_preds-14.npy')
sub_class_preds = np.load('../cache/sub_class_preds-14.npy')
                          
fold_ = 0
epochs = 10; batch_size = 16
my_fold = 0
for train_indexes, valid_indexes in kf.split(indexes, y_train):
    print(len(train_indexes), len(valid_indexes))
    if my_fold < 2:
        my_fold += 1
        continue
    checkpoint = ModelCheckpoint('../cache/InceptionV3.h5', monitor='val_loss', verbose=1, 
                                 save_best_only=True, mode='min', save_weights_only = True)
    reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, 
                                       verbose=1, mode='auto', epsilon=0.0001)
    early = EarlyStopping(monitor="val_loss", 
                          mode="min", 
                          patience=6)
    callbacks_list = [checkpoint, early, reduceLROnPlat]


    _preds = []
    # create train and valid datagens
    train_generator = data_generator.create_train(
        train_dataset_info[train_indexes], batch_size, (SIZE,SIZE,3), augument=True)
    validation_generator = data_generator.create_train(
        train_dataset_info[valid_indexes], 32, (SIZE,SIZE,3), augument=False)

    # warm up model
    model = create_model(
        input_shape=(SIZE,SIZE,3), 
        n_out=28)

    for layer in model.layers:
        layer.trainable = False
    model.layers[-1].trainable = True
    model.layers[-2].trainable = True
    model.layers[-3].trainable = True
    model.layers[-4].trainable = True
    model.layers[-5].trainable = True
    model.layers[-6].trainable = True
    
    model.compile(
        loss=f1_loss, 
        optimizer=Adam(1e-03),
        metrics=[f1])
#     model.summary()

    model.fit_generator(
        train_generator,
        steps_per_epoch=np.ceil(float(len(train_indexes)) / float(batch_size)),
        validation_data=validation_generator,
        validation_steps=np.ceil(float(len(valid_indexes)) / float(batch_size)),
        epochs=2, 
        verbose=1)
    
    # train all layers
    epochs=120
    for layer in model.layers:
        layer.trainable = True
    model.compile(loss=f1_loss,
                optimizer=Adam(lr=1e-4),
                metrics=[f1])
    model.fit_generator(
        train_generator,
        steps_per_epoch=np.ceil(float(len(train_indexes)) / float(batch_size)),
        validation_data=validation_generator,
        validation_steps=np.ceil(float(len(valid_indexes)) / float(batch_size)),
        epochs=epochs, 
        verbose=1,
        callbacks=callbacks_list)
    
    for idx in tqdm(valid_indexes):
        item = train_dataset_info[idx]
        path = item['path']
        labels = item['labels']
        image = data_generator.load_image(path, (SIZE,SIZE,3))/255.
        score_predict = model.predict(image[np.newaxis])[0]
        oof_class_preds[idx] = score_predict
        np.save('../cache/oof_class_preds-14.npy', oof_class_preds)
        
    for idx, name in tqdm(enumerate(submit['Id'])):
        path = os.path.join('../data/test/', name)
        image = data_generator.load_image(path, (SIZE,SIZE,3))/255.
        score_predict = model.predict(image[np.newaxis])[0]
        sub_class_preds[idx] += score_predict
        np.save('../cache/sub_class_preds-14.npy', sub_class_preds)
    fold_ += 1
sub_class_preds /= n_splits

27958 3114
27973 3099
27969 3103
27995 3077
27928 3144


In [None]:
# 27958 3114
# Epoch 1/2
# 1748/1748 [==============================] - 1797s 1s/step - loss: 1.1195 - f1: 0.0376 - val_loss: 1.1698 - val_f1: 0.0320
# Epoch 2/2
# 1748/1748 [==============================] - 325s 186ms/step - loss: 1.1067 - f1: 0.0476 - val_loss: 1.1549 - val_f1: 0.0308
# Epoch 1/120
# 1748/1748 [==============================] - 413s 236ms/step - loss: 1.0568 - f1: 0.0977 - val_loss: 0.9852 - val_f1: 0.1725

# Epoch 00001: val_loss improved from inf to 0.98515, saving model to ../cache/InceptionV3.h5
# Epoch 2/120
# 1748/1748 [==============================] - 396s 227ms/step - loss: 0.9804 - f1: 0.1686 - val_loss: 0.9657 - val_f1: 0.2039

# Epoch 00002: val_loss improved from 0.98515 to 0.96568, saving model to ../cache/InceptionV3.h5
# Epoch 3/120
# 1748/1748 [==============================] - 401s 229ms/step - loss: 0.9326 - f1: 0.2086 - val_loss: 0.8926 - val_f1: 0.2645

# Epoch 00003: val_loss improved from 0.96568 to 0.89257, saving model to ../cache/InceptionV3.h5
# Epoch 4/120
# 1748/1748 [==============================] - 403s 230ms/step - loss: 0.9055 - f1: 0.2302 - val_loss: 0.8653 - val_f1: 0.2878

# Epoch 00004: val_loss improved from 0.89257 to 0.86533, saving model to ../cache/InceptionV3.h5
# Epoch 5/120
# 1748/1748 [==============================] - 406s 232ms/step - loss: 0.8883 - f1: 0.2408 - val_loss: 0.8512 - val_f1: 0.3025

# Epoch 00005: val_loss improved from 0.86533 to 0.85124, saving model to ../cache/InceptionV3.h5
# Epoch 6/120
# 1748/1748 [==============================] - 407s 233ms/step - loss: 0.8755 - f1: 0.2495 - val_loss: 0.8227 - val_f1: 0.3191

# Epoch 00006: val_loss improved from 0.85124 to 0.82271, saving model to ../cache/InceptionV3.h5
# Epoch 7/120
# 1748/1748 [==============================] - 404s 231ms/step - loss: 0.8642 - f1: 0.2577 - val_loss: 0.8060 - val_f1: 0.3284

# Epoch 00007: val_loss improved from 0.82271 to 0.80597, saving model to ../cache/InceptionV3.h5
# Epoch 8/120
# 1748/1748 [==============================] - 404s 231ms/step - loss: 0.8521 - f1: 0.2657 - val_loss: 0.8056 - val_f1: 0.3273

# Epoch 00008: val_loss improved from 0.80597 to 0.80562, saving model to ../cache/InceptionV3.h5
# Epoch 9/120
# 1748/1748 [==============================] - 402s 230ms/step - loss: 0.8428 - f1: 0.2711 - val_loss: 0.7919 - val_f1: 0.3408

# Epoch 00009: val_loss improved from 0.80562 to 0.79187, saving model to ../cache/InceptionV3.h5
# Epoch 10/120
# 1748/1748 [==============================] - 399s 228ms/step - loss: 0.8353 - f1: 0.2757 - val_loss: 0.8004 - val_f1: 0.3347

# Epoch 00010: val_loss did not improve from 0.79187
# Epoch 11/120
# 1748/1748 [==============================] - 400s 229ms/step - loss: 0.8286 - f1: 0.2815 - val_loss: 0.7983 - val_f1: 0.3329

# Epoch 00011: val_loss did not improve from 0.79187
# Epoch 12/120
# 1748/1748 [==============================] - 396s 226ms/step - loss: 0.8188 - f1: 0.2875 - val_loss: 0.7782 - val_f1: 0.3509

# Epoch 00012: val_loss improved from 0.79187 to 0.77825, saving model to ../cache/InceptionV3.h5
# Epoch 13/120
# 1748/1748 [==============================] - 396s 226ms/step - loss: 0.8137 - f1: 0.2905 - val_loss: 0.7710 - val_f1: 0.3602

# Epoch 00013: val_loss improved from 0.77825 to 0.77102, saving model to ../cache/InceptionV3.h5
# Epoch 14/120
# 1748/1748 [==============================] - 396s 227ms/step - loss: 0.8078 - f1: 0.2949 - val_loss: 0.7753 - val_f1: 0.3552

# Epoch 00014: val_loss did not improve from 0.77102
# Epoch 15/120
# 1748/1748 [==============================] - 396s 226ms/step - loss: 0.8001 - f1: 0.2995 - val_loss: 0.7773 - val_f1: 0.3505

# Epoch 00015: val_loss did not improve from 0.77102
# Epoch 16/120
# 1748/1748 [==============================] - 395s 226ms/step - loss: 0.7972 - f1: 0.3004 - val_loss: 0.8064 - val_f1: 0.3335

# Epoch 00016: val_loss did not improve from 0.77102

# Epoch 00016: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-06.
# Epoch 17/120
# 1748/1748 [==============================] - 396s 226ms/step - loss: 0.7710 - f1: 0.3174 - val_loss: 0.7301 - val_f1: 0.3866

# Epoch 00017: val_loss improved from 0.77102 to 0.73014, saving model to ../cache/InceptionV3.h5
# Epoch 18/120
# 1748/1748 [==============================] - 395s 226ms/step - loss: 0.7606 - f1: 0.3238 - val_loss: 0.7375 - val_f1: 0.3798

# Epoch 00018: val_loss did not improve from 0.73014
# Epoch 19/120
# 1748/1748 [==============================] - 396s 226ms/step - loss: 0.7575 - f1: 0.3253 - val_loss: 0.7297 - val_f1: 0.3862

# Epoch 00019: val_loss improved from 0.73014 to 0.72971, saving model to ../cache/InceptionV3.h5
# Epoch 20/120
# 1748/1748 [==============================] - 394s 225ms/step - loss: 0.7554 - f1: 0.3257 - val_loss: 0.7304 - val_f1: 0.3831

# Epoch 00020: val_loss did not improve from 0.72971
# Epoch 21/120
# 1748/1748 [==============================] - 395s 226ms/step - loss: 0.7496 - f1: 0.3294 - val_loss: 0.7343 - val_f1: 0.3803

# Epoch 00021: val_loss did not improve from 0.72971
# Epoch 22/120
# 1748/1748 [==============================] - 393s 225ms/step - loss: 0.7482 - f1: 0.3302 - val_loss: 0.7265 - val_f1: 0.3887

# Epoch 00022: val_loss improved from 0.72971 to 0.72649, saving model to ../cache/InceptionV3.h5
# Epoch 23/120
# 1748/1748 [==============================] - 394s 226ms/step - loss: 0.7444 - f1: 0.3328 - val_loss: 0.7339 - val_f1: 0.3818

# Epoch 00023: val_loss did not improve from 0.72649
# Epoch 24/120
# 1748/1748 [==============================] - 394s 225ms/step - loss: 0.7426 - f1: 0.3338 - val_loss: 0.7304 - val_f1: 0.3866

# Epoch 00024: val_loss did not improve from 0.72649
# Epoch 25/120
# 1748/1748 [==============================] - 394s 226ms/step - loss: 0.7398 - f1: 0.3354 - val_loss: 0.7324 - val_f1: 0.3816

# Epoch 00025: val_loss did not improve from 0.72649

# Epoch 00025: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-07.
# Epoch 26/120
# 1748/1748 [==============================] - 395s 226ms/step - loss: 0.7369 - f1: 0.3367 - val_loss: 0.7284 - val_f1: 0.3866

# Epoch 00026: val_loss did not improve from 0.72649
# Epoch 27/120
# 1748/1748 [==============================] - 394s 225ms/step - loss: 0.7358 - f1: 0.3378 - val_loss: 0.7321 - val_f1: 0.3835

# Epoch 00027: val_loss did not improve from 0.72649
# Epoch 28/120
# 1748/1748 [==============================] - 393s 225ms/step - loss: 0.7357 - f1: 0.3381 - val_loss: 0.7290 - val_f1: 0.3877
#   0%|          | 0/3114 [00:00<?, ?it/s]

# Epoch 00028: val_loss did not improve from 0.72649

# Epoch 00028: ReduceLROnPlateau reducing learning rate to 9.999999974752428e-08.
# 100%|██████████| 3114/3114 [02:42<00:00, 19.14it/s]
# 11702it [21:06,  9.24it/s]
# 27973 3099
# Epoch 1/2
# 1749/1749 [==============================] - 331s 190ms/step - loss: 1.1197 - f1: 0.0378 - val_loss: 1.1816 - val_f1: 0.0357
# Epoch 2/2
# 1749/1749 [==============================] - 319s 182ms/step - loss: 1.1067 - f1: 0.0478 - val_loss: 1.2044 - val_f1: 0.0347
# Epoch 1/120
# 1749/1749 [==============================] - 405s 232ms/step - loss: 1.0601 - f1: 0.0933 - val_loss: 0.9947 - val_f1: 0.1676

# Epoch 00001: val_loss improved from inf to 0.99467, saving model to ../cache/InceptionV3.h5
# Epoch 2/120
# 1749/1749 [==============================] - 384s 220ms/step - loss: 0.9880 - f1: 0.1613 - val_loss: 0.9376 - val_f1: 0.2233

# Epoch 00002: val_loss improved from 0.99467 to 0.93759, saving model to ../cache/InceptionV3.h5
# Epoch 3/120
# 1749/1749 [==============================] - 385s 220ms/step - loss: 0.9386 - f1: 0.2050 - val_loss: 0.8781 - val_f1: 0.2778

# Epoch 00003: val_loss improved from 0.93759 to 0.87811, saving model to ../cache/InceptionV3.h5
# Epoch 4/120
# 1749/1749 [==============================] - 385s 220ms/step - loss: 0.9109 - f1: 0.2256 - val_loss: 0.8430 - val_f1: 0.2990

# Epoch 00004: val_loss improved from 0.87811 to 0.84296, saving model to ../cache/InceptionV3.h5
# Epoch 5/120
# 1749/1749 [==============================] - 384s 220ms/step - loss: 0.8937 - f1: 0.2383 - val_loss: 0.8510 - val_f1: 0.2958

# Epoch 00005: val_loss did not improve from 0.84296
# Epoch 6/120
# 1749/1749 [==============================] - 384s 219ms/step - loss: 0.8799 - f1: 0.2472 - val_loss: 0.8430 - val_f1: 0.3046

# Epoch 00006: val_loss did not improve from 0.84296
# Epoch 7/120
# 1749/1749 [==============================] - 384s 219ms/step - loss: 0.8679 - f1: 0.2548 - val_loss: 0.8243 - val_f1: 0.3158

# Epoch 00007: val_loss improved from 0.84296 to 0.82426, saving model to ../cache/InceptionV3.h5
# Epoch 8/120
# 1749/1749 [==============================] - 383s 219ms/step - loss: 0.8553 - f1: 0.2641 - val_loss: 0.8559 - val_f1: 0.2988

# Epoch 00008: val_loss did not improve from 0.82426
# Epoch 9/120
# 1749/1749 [==============================] - 384s 219ms/step - loss: 0.8510 - f1: 0.2662 - val_loss: 0.8497 - val_f1: 0.3030

# Epoch 00009: val_loss did not improve from 0.82426
# Epoch 10/120
# 1749/1749 [==============================] - 382s 218ms/step - loss: 0.8410 - f1: 0.2731 - val_loss: 0.8813 - val_f1: 0.2890

# Epoch 00010: val_loss did not improve from 0.82426

# Epoch 00010: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-06.
# Epoch 11/120
# 1749/1749 [==============================] - 382s 219ms/step - loss: 0.8165 - f1: 0.2874 - val_loss: 0.7563 - val_f1: 0.3650

# Epoch 00011: val_loss improved from 0.82426 to 0.75628, saving model to ../cache/InceptionV3.h5
# Epoch 12/120
# 1749/1749 [==============================] - 382s 218ms/step - loss: 0.8073 - f1: 0.2933 - val_loss: 0.7564 - val_f1: 0.3663

# Epoch 00012: val_loss did not improve from 0.75628
# Epoch 13/120
# 1749/1749 [==============================] - 383s 219ms/step - loss: 0.8035 - f1: 0.2963 - val_loss: 0.7528 - val_f1: 0.3692

# Epoch 00013: val_loss improved from 0.75628 to 0.75284, saving model to ../cache/InceptionV3.h5
# Epoch 14/120
# 1749/1749 [==============================] - 383s 219ms/step - loss: 0.7986 - f1: 0.2995 - val_loss: 0.7515 - val_f1: 0.3681

# Epoch 00014: val_loss improved from 0.75284 to 0.75147, saving model to ../cache/InceptionV3.h5
# Epoch 15/120
# 1749/1749 [==============================] - 382s 219ms/step - loss: 0.7943 - f1: 0.3026 - val_loss: 0.7538 - val_f1: 0.3696

# Epoch 00015: val_loss did not improve from 0.75147
# Epoch 16/120
# 1749/1749 [==============================] - 382s 219ms/step - loss: 0.7955 - f1: 0.3003 - val_loss: 0.7536 - val_f1: 0.3683

# Epoch 00016: val_loss did not improve from 0.75147
# Epoch 17/120
# 1749/1749 [==============================] - 384s 219ms/step - loss: 0.7889 - f1: 0.3054 - val_loss: 0.7508 - val_f1: 0.3676

# Epoch 00017: val_loss improved from 0.75147 to 0.75082, saving model to ../cache/InceptionV3.h5
# Epoch 18/120
# 1749/1749 [==============================] - 383s 219ms/step - loss: 0.7860 - f1: 0.3074 - val_loss: 0.7566 - val_f1: 0.3672

# Epoch 00018: val_loss did not improve from 0.75082
# Epoch 19/120
# 1749/1749 [==============================] - 383s 219ms/step - loss: 0.7832 - f1: 0.3099 - val_loss: 0.7517 - val_f1: 0.3718

# Epoch 00019: val_loss did not improve from 0.75082
# Epoch 20/120
# 1749/1749 [==============================] - 382s 219ms/step - loss: 0.7794 - f1: 0.3112 - val_loss: 0.7510 - val_f1: 0.3683

# Epoch 00020: val_loss did not improve from 0.75082

# Epoch 00020: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-07.
# Epoch 21/120
# 1749/1749 [==============================] - 383s 219ms/step - loss: 0.7766 - f1: 0.3129 - val_loss: 0.7479 - val_f1: 0.3757

# Epoch 00021: val_loss improved from 0.75082 to 0.74793, saving model to ../cache/InceptionV3.h5
# Epoch 22/120
# 1749/1749 [==============================] - 383s 219ms/step - loss: 0.7778 - f1: 0.3123 - val_loss: 0.7495 - val_f1: 0.3689

# Epoch 00022: val_loss did not improve from 0.74793
# Epoch 23/120
# 1749/1749 [==============================] - 383s 219ms/step - loss: 0.7773 - f1: 0.3124 - val_loss: 0.7480 - val_f1: 0.3725

# Epoch 00023: val_loss did not improve from 0.74793
# Epoch 24/120
# 1749/1749 [==============================] - 382s 219ms/step - loss: 0.7740 - f1: 0.3146 - val_loss: 0.7491 - val_f1: 0.3715

# Epoch 00024: val_loss did not improve from 0.74793

# Epoch 00024: ReduceLROnPlateau reducing learning rate to 9.999999974752428e-08.
# Epoch 25/120
# 1749/1749 [==============================] - 382s 219ms/step - loss: 0.7737 - f1: 0.3153 - val_loss: 0.7473 - val_f1: 0.3761

# Epoch 00025: val_loss improved from 0.74793 to 0.74733, saving model to ../cache/InceptionV3.h5
# Epoch 26/120
# 1749/1749 [==============================] - 383s 219ms/step - loss: 0.7742 - f1: 0.3146 - val_loss: 0.7437 - val_f1: 0.3774

# Epoch 00026: val_loss improved from 0.74733 to 0.74374, saving model to ../cache/InceptionV3.h5
# Epoch 27/120
# 1749/1749 [==============================] - 383s 219ms/step - loss: 0.7743 - f1: 0.3141 - val_loss: 0.7483 - val_f1: 0.3721

# Epoch 00027: val_loss did not improve from 0.74374
# Epoch 28/120
# 1749/1749 [==============================] - 384s 219ms/step - loss: 0.7740 - f1: 0.3141 - val_loss: 0.7494 - val_f1: 0.3722

# Epoch 00028: val_loss did not improve from 0.74374
# Epoch 29/120
# 1749/1749 [==============================] - 383s 219ms/step - loss: 0.7741 - f1: 0.3147 - val_loss: 0.7469 - val_f1: 0.3752

# Epoch 00029: val_loss did not improve from 0.74374

# Epoch 00029: ReduceLROnPlateau reducing learning rate to 1.0000000116860975e-08.
# Epoch 30/120
# 1749/1749 [==============================] - 383s 219ms/step - loss: 0.7732 - f1: 0.3159 - val_loss: 0.7468 - val_f1: 0.3734

# Epoch 00030: val_loss did not improve from 0.74374
# Epoch 31/120
# 1749/1749 [==============================] - 384s 220ms/step - loss: 0.7745 - f1: 0.3138 - val_loss: 0.7500 - val_f1: 0.3702

# Epoch 00031: val_loss did not improve from 0.74374
# Epoch 32/120
# 1749/1749 [==============================] - 386s 221ms/step - loss: 0.7765 - f1: 0.3127 - val_loss: 0.7468 - val_f1: 0.3748
#   0%|          | 0/3099 [00:00<?, ?it/s]

# Epoch 00032: val_loss did not improve from 0.74374

# Epoch 00032: ReduceLROnPlateau reducing learning rate to 9.999999939225292e-10.
# 100%|██████████| 3099/3099 [02:47<00:00, 18.52it/s]
# 11702it [08:03, 24.19it/s]
# 27969 3103
# Epoch 1/2
# 1749/1749 [==============================] - 331s 189ms/step - loss: 1.1194 - f1: 0.0381 - val_loss: 1.1530 - val_f1: 0.0276
# Epoch 2/2
# 1749/1749 [==============================] - 328s 187ms/step - loss: 1.1066 - f1: 0.0477 - val_loss: 1.2052 - val_f1: 0.0364
# Epoch 1/120
# 1749/1749 [==============================] - 401s 229ms/step - loss: 1.0629 - f1: 0.0893 - val_loss: 1.0045 - val_f1: 0.1535

# Epoch 00001: val_loss improved from inf to 1.00452, saving model to ../cache/InceptionV3.h5
# Epoch 2/120
# 1749/1749 [==============================] - 380s 218ms/step - loss: 0.9884 - f1: 0.1612 - val_loss: 0.9702 - val_f1: 0.1899

# Epoch 00002: val_loss improved from 1.00452 to 0.97018, saving model to ../cache/InceptionV3.h5
# Epoch 3/120
# 1749/1749 [==============================] - 378s 216ms/step - loss: 0.9427 - f1: 0.2013 - val_loss: 0.8851 - val_f1: 0.2652

# Epoch 00003: val_loss improved from 0.97018 to 0.88508, saving model to ../cache/InceptionV3.h5
# Epoch 4/120
# 1749/1749 [==============================] - 379s 217ms/step - loss: 0.9145 - f1: 0.2240 - val_loss: 0.8796 - val_f1: 0.2782

# Epoch 00004: val_loss improved from 0.88508 to 0.87963, saving model to ../cache/InceptionV3.h5
# Epoch 5/120
#  978/1749 [===============>..............] - ETA: 2:24 - loss: 0.8989 - f1: 0.2330

In [None]:
np.save('../cache/oof_class_preds-14.npy', oof_class_preds)
np.save('../cache/sub_class_preds-14.npy', sub_class_preds)

In [22]:
# oof_class_preds = np.zeros((train_dataset_info.shape[0], 28))
# sub_class_preds = np.zeros((submit.shape[0], 28))
# score_predict = model.predict(image[np.newaxis])[0]
# oof_class_preds[idx] = score_predict

In [23]:
# for train_index, test_index in kf.split(indexes):
# ...    print("TRAIN:", train_index, "TEST:", test_index)

In [24]:
sub_class_preds.shape

(11702, 28)

In [25]:
predicted = []
for line in tqdm(sub_class_preds):
    label_predict = np.arange(28)[line>=0.2]
    str_predict_label = ' '.join(str(l) for l in label_predict)
    predicted.append(str_predict_label)

100%|██████████| 11702/11702 [00:00<00:00, 82600.97it/s]


In [26]:
len(predicted)

11702

In [27]:
predicted

['2',
 '5 25',
 '0 5 25',
 '0 25',
 '0 7 25',
 '4',
 '0 4 23 25',
 '0 23 25',
 '0',
 '0 25',
 '18 25',
 '3 5',
 '0 25',
 '7 9 20',
 '23',
 '4 18 25',
 '2 14',
 '0 5',
 '14 21',
 '0 5',
 '6',
 '3 5 24',
 '0 11 16 17 25',
 '0 21',
 '0 25',
 '0 11 12 25 26',
 '0',
 '0 5',
 '0 2 25',
 '0',
 '21',
 '0 7 25',
 '14 16 17 18 21 25',
 '0 5 25',
 '0 7 25',
 '13',
 '0 25',
 '0 3',
 '0 5 21 25',
 '1',
 '0 16 17 25',
 '6 25',
 '0 21',
 '18 19 25',
 '0 14 16 25',
 '6',
 '0',
 '0',
 '6 11 23 25',
 '0',
 '0 25',
 '0 5',
 '20 23',
 '0 25',
 '3',
 '0 25',
 '0 25',
 '11 23',
 '0 25',
 '11 21 25',
 '2 21 22 23',
 '0 5 21 25',
 '14 16',
 '7 21 25',
 '23',
 '0 13 18 19 25',
 '3 6 21 25',
 '0 21 22 25',
 '0 16',
 '21 25',
 '2 3',
 '0 2',
 '14',
 '4',
 '21',
 '0',
 '2 4',
 '0 1',
 '0 25',
 '0 25',
 '6 25',
 '0 25',
 '0 21',
 '0 21 25',
 '18 19',
 '0 23 24 25',
 '20 23',
 '0 21',
 '14 25',
 '12 14',
 '0 25',
 '11 21',
 '23',
 '12 13',
 '0 16 17 23 25',
 '0 25',
 '7 17 18 25',
 '0 19 25',
 '24',
 '0 23 25',
 '0

In [28]:
submit['Predicted'] = predicted

In [29]:
# Create submit
# submit = pd.read_csv('../data/sample_submission.csv')
# predicted = []
# draw_predict = []
# # model.load_weights('../cache/InceptionV3.h5')
# for name in tqdm(submit['Id']):
#     path = os.path.join('../data/test/', name)
#     image = data_generator.load_image(path, (SIZE,SIZE,3))/255.
#     score_predict = model.predict(image[np.newaxis])[0]
#     draw_predict.append(score_predict)
#     label_predict = np.arange(28)[score_predict>=0.2]
#     str_predict_label = ' '.join(str(l) for l in label_predict)
#     predicted.append(str_predict_label)

# submit['Predicted'] = predicted
# np.save('../cache/draw_predict_InceptionV3-8.npy', score_predict)
# submit.to_csv('../submissions/submit_InceptionV3.csv', index=False)

In [30]:
submit.to_csv('../submissions/sub12-a.csv', index=False)

In [31]:
#https://stackoverflow.com/questions/1855095/how-to-create-a-zip-archive-of-a-directory
def backup_project_as_zip(project_dir, zip_file):
    assert(os.path.isdir(project_dir))
    assert(os.path.isdir(os.path.dirname(zip_file)))
    shutil.make_archive(zip_file.replace('.zip',''), 'zip', project_dir)
    pass

In [32]:
import datetime, shutil
now = datetime.datetime.now()
print(now)
PROJECT_PATH = '/home/watts/lal/Kaggle/kagglehp/scripts_nbs'
backup_project_as_zip(PROJECT_PATH, '../cache/code.scripts_nbs.%s.zip'%now)

2018-10-25 06:10:33.481598


In [33]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub12-a.csv -m ""

Successfully submitted to Human Protein Atlas Image ClassificationCPU times: user 306 ms, sys: 172 ms, total: 478 ms
Wall time: 13.5 s


In [34]:
from time import sleep
sleep(10)
!kaggle competitions submissions -c human-protein-atlas-image-classification

fileName     date                 description  status    publicScore  privateScore  
-----------  -------------------  -----------  --------  -----------  ------------  
sub12-a.csv  2018-10-25 00:40:56               complete  0.449        None          
sub11-k.csv  2018-10-24 00:35:39               complete  0.346        None          
sub11-j.csv  2018-10-24 00:34:46               complete  0.366        None          
sub11-j.csv  2018-10-24 00:33:17               complete  0.000        None          
sub11-i.csv  2018-10-24 00:24:24               complete  0.389        None          
sub11-h.csv  2018-10-24 00:21:18               complete  0.371        None          
sub11-g.csv  2018-10-23 09:13:19               complete  0.347        None          
sub11-f.csv  2018-10-23 09:11:15               complete  0.358        None          
sub11-b.csv  2018-10-23 05:25:32               complete  0.437        None          
sub12.csv    2018-10-23 05:18:36               complete  0.436   

In [35]:
predicted = []
for line in tqdm(sub_class_preds):
    label_predict = np.arange(28)[line>=0.25]
    str_predict_label = ' '.join(str(l) for l in label_predict)
    predicted.append(str_predict_label)

100%|██████████| 11702/11702 [00:00<00:00, 81492.96it/s]


In [36]:
submit['Predicted'] = predicted
submit.to_csv('../submissions/sub12-b.csv', index=False)

In [37]:
import datetime, shutil
now = datetime.datetime.now()
print(now)
PROJECT_PATH = '/home/watts/lal/Kaggle/kagglehp/scripts_nbs'
backup_project_as_zip(PROJECT_PATH, '../cache/code.scripts_nbs.%s.zip'%now)

2018-10-25 06:11:26.291663


In [38]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub12-b.csv -m ""

Successfully submitted to Human Protein Atlas Image ClassificationCPU times: user 267 ms, sys: 217 ms, total: 484 ms
Wall time: 13.8 s


In [39]:
from time import sleep
sleep(10)
!kaggle competitions submissions -c human-protein-atlas-image-classification

fileName     date                 description  status    publicScore  privateScore  
-----------  -------------------  -----------  --------  -----------  ------------  
sub12-b.csv  2018-10-25 00:41:50               complete  0.457        None          
sub12-a.csv  2018-10-25 00:40:56               complete  0.449        None          
sub11-k.csv  2018-10-24 00:35:39               complete  0.346        None          
sub11-j.csv  2018-10-24 00:34:46               complete  0.366        None          
sub11-j.csv  2018-10-24 00:33:17               complete  0.000        None          
sub11-i.csv  2018-10-24 00:24:24               complete  0.389        None          
sub11-h.csv  2018-10-24 00:21:18               complete  0.371        None          
sub11-g.csv  2018-10-23 09:13:19               complete  0.347        None          
sub11-f.csv  2018-10-23 09:11:15               complete  0.358        None          
sub11-b.csv  2018-10-23 05:25:32               complet

In [41]:
d = {0.3:'bb', 0.35:'c', 0.4:'d', 0.45:'e', 0.5:'f'}

In [42]:
for alpha in [0.3, 0.35, 0.4, 0.45, 0.5]:
    predicted = []
    for line in tqdm(sub_class_preds):
        label_predict = np.arange(28)[line>=alpha]
        str_predict_label = ' '.join(str(l) for l in label_predict)
        predicted.append(str_predict_label)
    submit['Predicted'] = predicted
    name = '../submissions/sub12-' + d[alpha] + '.csv'
    print(name)
    submit.to_csv(name, index=False)

100%|██████████| 11702/11702 [00:00<00:00, 87915.69it/s]
100%|██████████| 11702/11702 [00:00<00:00, 93076.31it/s]
  0%|          | 0/11702 [00:00<?, ?it/s]

../submissions/sub12-bb.csv
../submissions/sub12-c.csv


100%|██████████| 11702/11702 [00:00<00:00, 98104.04it/s]
100%|██████████| 11702/11702 [00:00<00:00, 96662.13it/s]
  0%|          | 0/11702 [00:00<?, ?it/s]

../submissions/sub12-d.csv
../submissions/sub12-e.csv


100%|██████████| 11702/11702 [00:00<00:00, 98385.05it/s]

../submissions/sub12-f.csv





In [43]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub12-bb.csv -m ""

Successfully submitted to Human Protein Atlas Image ClassificationCPU times: user 290 ms, sys: 216 ms, total: 506 ms
Wall time: 13.7 s


In [44]:
from time import sleep
sleep(10)
!kaggle competitions submissions -c human-protein-atlas-image-classification

fileName      date                 description  status    publicScore  privateScore  
------------  -------------------  -----------  --------  -----------  ------------  
sub12-bb.csv  2018-10-25 00:43:34               complete  0.466        None          
sub12-b.csv   2018-10-25 00:41:50               complete  0.457        None          
sub12-a.csv   2018-10-25 00:40:56               complete  0.449        None          
sub11-k.csv   2018-10-24 00:35:39               complete  0.346        None          
sub11-j.csv   2018-10-24 00:34:46               complete  0.366        None          
sub11-j.csv   2018-10-24 00:33:17               complete  0.000        None          
sub11-i.csv   2018-10-24 00:24:24               complete  0.389        None          
sub11-h.csv   2018-10-24 00:21:18               complete  0.371        None          
sub11-g.csv   2018-10-23 09:13:19               complete  0.347        None          
sub11-f.csv   2018-10-23 09:11:15          

In [45]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub12-c.csv -m ""
from time import sleep
sleep(10)
!kaggle competitions submissions -c human-protein-atlas-image-classification

fileName      date                 description  status    publicScore  privateScore  
------------  -------------------  -----------  --------  -----------  ------------  
sub12-c.csv   2018-10-25 00:45:32               complete  0.469        None          
sub12-bb.csv  2018-10-25 00:43:34               complete  0.466        None          
sub12-b.csv   2018-10-25 00:41:50               complete  0.457        None          
sub12-a.csv   2018-10-25 00:40:56               complete  0.449        None          
sub11-k.csv   2018-10-24 00:35:39               complete  0.346        None          
sub11-j.csv   2018-10-24 00:34:46               complete  0.366        None          
sub11-j.csv   2018-10-24 00:33:17               complete  0.000        None          
sub11-i.csv   2018-10-24 00:24:24               complete  0.389        None          
sub11-h.csv   2018-10-24 00:21:18               complete  0.371        None          
sub11-g.csv   2018-10-23 09:13:19               comple

In [46]:
oof_class_preds.shape

(31072, 28)

In [47]:
data.head()

Unnamed: 0,Id,Target
0,00070df0-bbc3-11e8-b2bc-ac1f6b6435d0,16 0
1,000a6c98-bb9b-11e8-b2b9-ac1f6b6435d0,7 1 2 0
2,000a9596-bbc4-11e8-b2bc-ac1f6b6435d0,5
3,000c99ba-bba4-11e8-b2b9-ac1f6b6435d0,1
4,001838f8-bbca-11e8-b2bc-ac1f6b6435d0,18


In [48]:
data.shape

(31072, 2)

In [49]:
n_classes = 28
y_train = np.zeros(oof_class_preds.shape)
y_train.shape

(31072, 28)

In [50]:
idx = 0
for labels in tqdm(data['Target'].str.split(' ')):
#     print(labels)
    for label in labels:
        y_train[idx][int(label)] = 1
    idx += 1

100%|██████████| 31072/31072 [00:00<00:00, 741901.66it/s]


In [51]:
y_train

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 1., 0., 0.],
       [1., 0., 1., ..., 0., 0., 0.]])

In [52]:
y_train[0]

array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [54]:
sub_ridge_preds = np.zeros(sub_class_preds.shape)
from sklearn.linear_model import Ridge
for cls in np.arange(n_classes):
    for alpha in [0.001, 0.01, 0.1, 0.5, 0.75, 1.0]:
        y = y_train[:, cls]
        X = oof_class_preds
        clf = Ridge(alpha=alpha)
        clf.fit(X,y)
        score_ = clf.score(X, y)
        print(alpha, score_)
    print('------------------')
#         X_test = sub_class_preds[:, cls]
#         preds_ = clf.predict(X_test)
#         sub_ridge_preds[:,cls] = preds_

0.001 0.6174081518752018
0.01 0.6174081494907646
0.1 0.6174079419635514
0.5 0.6174049197331472
0.75 0.6174023910597577
1.0 0.6173997536697944
------------------
0.001 0.7042131450176463
0.01 0.7042131449021258
0.1 0.7042131341504188
0.5 0.7042129258072632
0.75 0.7042126913081677
1.0 0.7042123865546952
------------------
0.001 0.6405293650278552
0.01 0.6405293643199262
0.1 0.6405293035770623
0.5 0.6405284696126301
0.75 0.6405278129689744
1.0 0.6405271539313317
------------------
0.001 0.5206868082561187
0.01 0.5206868064937693
0.1 0.5206866550090807
0.5 0.5206845661735036
0.75 0.5206829204240566
1.0 0.5206812723936773
------------------
0.001 0.6225629182679313
0.01 0.6225628870674288
0.1 0.6225602493284829
0.5 0.6225265347174933
0.75 0.6225024258166685
1.0 0.6224802007531789
------------------
0.001 0.47625033808763034
0.01 0.4762503310360723
0.1 0.47624973372408597
0.5 0.4762420258070793
0.75 0.4762364393569697
1.0 0.47623122503579396
------------------
0.001 0.37015383088158105
0.01 

In [55]:
sub_ridge_preds = np.zeros(sub_class_preds.shape)
from sklearn.linear_model import Ridge
for cls in np.arange(n_classes):
    y = y_train[:, cls]
    X = oof_class_preds
    clf = Ridge(alpha=0.1)
    clf.fit(X,y)
    X_test = sub_class_preds
    preds_ = clf.predict(X_test)
    sub_ridge_preds[:,cls] = preds_

In [56]:
sub_class_preds

array([[9.38001867e-02, 1.59430779e-04, 9.98871672e-01, ...,
        1.77784316e-03, 1.12266340e-04, 5.20836841e-09],
       [2.47062426e-02, 2.68127583e-04, 7.85995722e-04, ...,
        6.51888692e-01, 8.71524611e-04, 3.26655725e-05],
       [8.41529155e-01, 2.72278007e-04, 3.96186303e-03, ...,
        9.23864961e-01, 2.01543609e-03, 1.92988443e-05],
       ...,
       [6.59056642e-04, 5.12143007e-05, 3.49444263e-05, ...,
        1.77463120e-03, 5.20864920e-08, 5.91902866e-09],
       [5.01914832e-01, 9.99162483e-01, 2.73049554e-03, ...,
        1.16331837e-02, 1.53418808e-04, 1.20560289e-06],
       [5.07521251e-01, 3.52088286e-04, 3.01849514e-03, ...,
        6.92711103e-01, 1.99362053e-03, 1.05607675e-06]])

In [57]:
np.arange(n_classes)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27])

In [58]:
sub_class_preds[:, 0].shape

(11702,)

In [59]:
sub_ridge_preds

array([[ 1.26424064e-01,  8.84581981e-03,  8.85833838e-01, ...,
         3.20592330e-02,  4.54385428e-03, -6.92529375e-05],
       [ 5.51528260e-02,  2.13827355e-03,  1.63897473e-02, ...,
         5.38178358e-01,  1.06812025e-03,  1.39571143e-03],
       [ 7.77233272e-01, -1.67743788e-03,  1.87791802e-02, ...,
         7.51411388e-01, -1.45306321e-03,  1.27131495e-03],
       ...,
       [ 2.62432560e-02,  4.66381707e-04,  7.97892029e-03, ...,
         2.92185935e-02,  3.12140745e-03,  1.76712611e-04],
       [ 4.95625470e-01,  8.71181686e-01,  1.99755452e-02, ...,
         3.70837162e-02,  3.03441473e-03,  3.03600332e-05],
       [ 4.96285545e-01,  1.04218405e-02,  2.67882221e-02, ...,
         5.85205684e-01,  4.37290352e-03,  1.52204091e-06]])

In [60]:
predicted = []
alpha = 0.35
for line in tqdm(sub_ridge_preds):
    label_predict = np.arange(28)[line>=alpha]
    str_predict_label = ' '.join(str(l) for l in label_predict)
    predicted.append(str_predict_label)
submit['Predicted'] = predicted
name = '../submissions/sub12-g.csv'
print(name)
submit.to_csv(name, index=False)

100%|██████████| 11702/11702 [00:00<00:00, 88242.24it/s]

../submissions/sub12-g.csv





In [61]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub12-g.csv -m ""

Successfully submitted to Human Protein Atlas Image ClassificationCPU times: user 372 ms, sys: 201 ms, total: 573 ms
Wall time: 17.3 s


In [62]:
from time import sleep
sleep(10)
!kaggle competitions submissions -c human-protein-atlas-image-classification

fileName      date                 description  status    publicScore  privateScore  
------------  -------------------  -----------  --------  -----------  ------------  
sub12-g.csv   2018-10-25 00:55:10               complete  0.433        None          
sub12-c.csv   2018-10-25 00:45:32               complete  0.469        None          
sub12-bb.csv  2018-10-25 00:43:34               complete  0.466        None          
sub12-b.csv   2018-10-25 00:41:50               complete  0.457        None          
sub12-a.csv   2018-10-25 00:40:56               complete  0.449        None          
sub11-k.csv   2018-10-24 00:35:39               complete  0.346        None          
sub11-j.csv   2018-10-24 00:34:46               complete  0.366        None          
sub11-j.csv   2018-10-24 00:33:17               complete  0.000        None          
sub11-i.csv   2018-10-24 00:24:24               complete  0.389        None          
sub11-h.csv   2018-10-24 00:21:18          

In [63]:
predicted = []
alpha = 0.4
for line in tqdm(sub_ridge_preds):
    label_predict = np.arange(28)[line>=alpha]
    str_predict_label = ' '.join(str(l) for l in label_predict)
    predicted.append(str_predict_label)
submit['Predicted'] = predicted
name = '../submissions/sub12-h.csv'
print(name)
submit.to_csv(name, index=False)

100%|██████████| 11702/11702 [00:00<00:00, 79991.76it/s]

../submissions/sub12-h.csv





In [64]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub12-h.csv -m ""
from time import sleep
sleep(10)
!kaggle competitions submissions -c human-protein-atlas-image-classification

fileName      date                 description  status    publicScore  privateScore  
------------  -------------------  -----------  --------  -----------  ------------  
sub12-h.csv   2018-10-26 02:07:56               complete  0.389        None          
sub12-g.csv   2018-10-25 00:55:10               complete  0.433        None          
sub12-c.csv   2018-10-25 00:45:32               complete  0.469        None          
sub12-bb.csv  2018-10-25 00:43:34               complete  0.466        None          
sub12-b.csv   2018-10-25 00:41:50               complete  0.457        None          
sub12-a.csv   2018-10-25 00:40:56               complete  0.449        None          
sub11-k.csv   2018-10-24 00:35:39               complete  0.346        None          
sub11-j.csv   2018-10-24 00:34:46               complete  0.366        None          
sub11-j.csv   2018-10-24 00:33:17               complete  0.000        None          
sub11-i.csv   2018-10-24 00:24:24               comple

In [65]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub12-d.csv -m ""
from time import sleep
sleep(10)
!kaggle competitions submissions -c human-protein-atlas-image-classification

fileName      date                 description  status    publicScore  privateScore  
------------  -------------------  -----------  --------  -----------  ------------  
sub12-d.csv   2018-10-26 02:09:32               complete  0.466        None          
sub12-h.csv   2018-10-26 02:07:56               complete  0.389        None          
sub12-g.csv   2018-10-25 00:55:10               complete  0.433        None          
sub12-c.csv   2018-10-25 00:45:32               complete  0.469        None          
sub12-bb.csv  2018-10-25 00:43:34               complete  0.466        None          
sub12-b.csv   2018-10-25 00:41:50               complete  0.457        None          
sub12-a.csv   2018-10-25 00:40:56               complete  0.449        None          
sub11-k.csv   2018-10-24 00:35:39               complete  0.346        None          
sub11-j.csv   2018-10-24 00:34:46               complete  0.366        None          
sub11-j.csv   2018-10-24 00:33:17               comple