In [1]:
# https://www.kaggle.com/mathormad/inceptionv3-baseline-lb-0-379/code

In [2]:
import os, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import skimage.io
from skimage.transform import resize
from imgaug import augmenters as iaa
from tqdm import tqdm
import PIL
from PIL import Image
import cv2
from sklearn.utils import class_weight, shuffle
from sklearn.model_selection import KFold

import warnings
warnings.filterwarnings("ignore")
SIZE = 299

In [3]:
# https://www.kaggle.com/rejpalcz/best-loss-function-for-f1-score-metric/notebook
import tensorflow as tf

def f1(y_true, y_pred):
    y_pred = K.round(y_pred)
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(f1)

def f1_loss(y_true, y_pred):
    
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(K.binary_crossentropy(y_true, y_pred), axis=-1) + (1 - K.mean(f1))

In [4]:
# Load dataset info
path_to_train = '../data/train/'
data = pd.read_csv('../data/train.csv')

In [5]:
data.head()

Unnamed: 0,Id,Target
0,00070df0-bbc3-11e8-b2bc-ac1f6b6435d0,16 0
1,000a6c98-bb9b-11e8-b2b9-ac1f6b6435d0,7 1 2 0
2,000a9596-bbc4-11e8-b2bc-ac1f6b6435d0,5
3,000c99ba-bba4-11e8-b2b9-ac1f6b6435d0,1
4,001838f8-bbca-11e8-b2bc-ac1f6b6435d0,18


In [6]:
train_dataset_info = []
for name, labels in zip(data['Id'], data['Target'].str.split(' ')):
    train_dataset_info.append({
        'path':os.path.join(path_to_train, name),
        'labels':np.array([int(label) for label in labels])})
train_dataset_info = np.array(train_dataset_info)

In [7]:
train_dataset_info

array([{'path': '../data/train/00070df0-bbc3-11e8-b2bc-ac1f6b6435d0', 'labels': array([16,  0])},
       {'path': '../data/train/000a6c98-bb9b-11e8-b2b9-ac1f6b6435d0', 'labels': array([7, 1, 2, 0])},
       {'path': '../data/train/000a9596-bbc4-11e8-b2bc-ac1f6b6435d0', 'labels': array([5])},
       ...,
       {'path': '../data/train/fff189d8-bbab-11e8-b2ba-ac1f6b6435d0', 'labels': array([7])},
       {'path': '../data/train/fffdf7e0-bbc4-11e8-b2bc-ac1f6b6435d0', 'labels': array([25,  2, 21])},
       {'path': '../data/train/fffe0ffe-bbc0-11e8-b2bb-ac1f6b6435d0', 'labels': array([2, 0])}],
      dtype=object)

In [8]:
class data_generator:
    
    def create_train(dataset_info, batch_size, shape, augument=True):
        assert shape[2] == 3
        while True:
            dataset_info = shuffle(dataset_info)
            for start in range(0, len(dataset_info), batch_size):
                end = min(start + batch_size, len(dataset_info))
                batch_images = []
                X_train_batch = dataset_info[start:end]
                batch_labels = np.zeros((len(X_train_batch), 28))
                for i in range(len(X_train_batch)):
                    image = data_generator.load_image(
                        X_train_batch[i]['path'], shape)   
                    if augument:
                        image = data_generator.augment(image)
                    batch_images.append(image/255.)
                    batch_labels[i][X_train_batch[i]['labels']] = 1
                yield np.array(batch_images, np.float32), batch_labels

    def load_image(path, shape):
        image_red_ch = Image.open(path+'_red.png')
        image_yellow_ch = Image.open(path+'_yellow.png')
        image_green_ch = Image.open(path+'_green.png')
        image_blue_ch = Image.open(path+'_blue.png')
        image = np.stack((
            np.array(image_red_ch),
            np.array(image_green_ch), 
            np.array(image_blue_ch)), -1)
        w, h = 512, 512
        zero_data = np.zeros((h, w), dtype=np.uint8)
#         image2 = np.stack((
#             np.array(image_yellow_ch),
#             zero_data, zero_data), -1)
#         print(image1.shape, image2.shape)
#         image = np.vstack((image1, image2))
        image = cv2.resize(image, (shape[0], shape[1]))
        return image

    def augment(image):
        augment_img = iaa.Sequential([
            iaa.OneOf([
                iaa.Affine(rotate=0),
                iaa.Affine(rotate=90),
                iaa.Affine(rotate=180),
                iaa.Affine(rotate=270),
                iaa.Fliplr(0.5),
                iaa.Flipud(0.5),
            ])], random_order=True)

        image_aug = augment_img.augment_image(image)
        return image_aug


In [9]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, load_model
from keras.layers import Activation, Dropout, Flatten, Dense, GlobalMaxPooling2D, BatchNormalization, Input, Conv2D
from keras.applications.inception_v3 import InceptionV3
from keras.callbacks import ModelCheckpoint
from keras import metrics
from keras.optimizers import Adam 
from keras import backend as K
import keras
from keras.models import Model

Using TensorFlow backend.


In [10]:
def create_model(input_shape, n_out):
    input_tensor = Input(shape=input_shape)
    base_model = InceptionV3(include_top=False,
                   weights='imagenet',
                   input_shape=input_shape)
    bn = BatchNormalization()(input_tensor)
    x = base_model(bn)
    x = Conv2D(32, kernel_size=(1,1), activation='relu')(x)
    x = Flatten()(x)
    x = Dropout(0.5)(x)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.5)(x)
    output = Dense(n_out, activation='sigmoid')(x)
    model = Model(input_tensor, output)
    
    return model

In [11]:
import tensorflow as tf
from tensorflow.python.ops import array_ops

# https://github.com/ailias/Focal-Loss-implement-on-Tensorflow/blob/master/focal_loss.py
def focal_loss_org(prediction_tensor, target_tensor, weights=None, alpha=0.25, gamma=2):
    r"""Compute focal loss for predictions.
        Multi-labels Focal loss formula:
            FL = -alpha * (z-p)^gamma * log(p) -(1-alpha) * p^gamma * log(1-p)
                 ,which alpha = 0.25, gamma = 2, p = sigmoid(x), z = target_tensor.
    Args:
     prediction_tensor: A float tensor of shape [batch_size, num_anchors,
        num_classes] representing the predicted logits for each class
     target_tensor: A float tensor of shape [batch_size, num_anchors,
        num_classes] representing one-hot encoded classification targets
     weights: A float tensor of shape [batch_size, num_anchors]
     alpha: A scalar tensor for focal loss alpha hyper-parameter
     gamma: A scalar tensor for focal loss gamma hyper-parameter
    Returns:
        loss: A (scalar) tensor representing the value of the loss function
    """
    sigmoid_p = tf.nn.sigmoid(prediction_tensor)
    zeros = array_ops.zeros_like(sigmoid_p, dtype=sigmoid_p.dtype)
    
    # For poitive prediction, only need consider front part loss, back part is 0;
    # target_tensor > zeros <=> z=1, so poitive coefficient = z - p.
    pos_p_sub = array_ops.where(target_tensor > zeros, target_tensor - sigmoid_p, zeros)
    
    # For negative prediction, only need consider back part loss, front part is 0;
    # target_tensor > zeros <=> z=1, so negative coefficient = 0.
    neg_p_sub = array_ops.where(target_tensor > zeros, zeros, sigmoid_p)
    per_entry_cross_ent = - alpha * (pos_p_sub ** gamma) * tf.log(tf.clip_by_value(sigmoid_p, 1e-8, 1.0)) \
                          - (1 - alpha) * (neg_p_sub ** gamma) * tf.log(tf.clip_by_value(1.0 - sigmoid_p, 1e-8, 1.0))
    return tf.reduce_sum(per_entry_cross_ent)


In [12]:
def focal_loss(weights=None, alpha=0.25, gamma=2):
    def focal_loss_my(target_tensor, prediction_tensor, ):
        r"""Compute focal loss for predictions.
            Multi-labels Focal loss formula:
                FL = -alpha * (z-p)^gamma * log(p) -(1-alpha) * p^gamma * log(1-p)
                     ,which alpha = 0.25, gamma = 2, p = sigmoid(x), z = target_tensor.
        Args:
         prediction_tensor: A float tensor of shape [batch_size, num_anchors,
            num_classes] representing the predicted logits for each class
         target_tensor: A float tensor of shape [batch_size, num_anchors,
            num_classes] representing one-hot encoded classification targets
         weights: A float tensor of shape [batch_size, num_anchors]
         alpha: A scalar tensor for focal loss alpha hyper-parameter
         gamma: A scalar tensor for focal loss gamma hyper-parameter
        Returns:
            loss: A (scalar) tensor representing the value of the loss function
        """
        sigmoid_p = tf.nn.sigmoid(prediction_tensor)
        zeros = array_ops.zeros_like(sigmoid_p, dtype=sigmoid_p.dtype)

        # For poitive prediction, only need consider front part loss, back part is 0;
        # target_tensor > zeros <=> z=1, so poitive coefficient = z - p.
        pos_p_sub = array_ops.where(target_tensor > zeros, target_tensor - sigmoid_p, zeros)

        # For negative prediction, only need consider back part loss, front part is 0;
        # target_tensor > zeros <=> z=1, so negative coefficient = 0.
        neg_p_sub = array_ops.where(target_tensor > zeros, zeros, sigmoid_p)
        per_entry_cross_ent = - alpha * (pos_p_sub ** gamma) * tf.log(tf.clip_by_value(sigmoid_p, 1e-8, 1.0)) \
                              - (1 - alpha) * (neg_p_sub ** gamma) * tf.log(tf.clip_by_value(1.0 - sigmoid_p, 1e-8, 1.0))
        return tf.reduce_sum(per_entry_cross_ent)
#         return K.mean(K.binary_crossentropy(target_tensor, prediction_tensor), axis=-1) + tf.reduce_sum(per_entry_cross_ent)
    return focal_loss_my

In [13]:
def focal_loss_fixed(y_true, y_pred):
    gamma = 2.
    alpha = 0.25
    print(y_pred)
    print(y_true)
    pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
    pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))

#     pt_1 = K.clip(pt_1, 1e-3, .999)
#     pt_0 = K.clip(pt_0, 1e-3, .999)

    return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))


In [14]:
# def focal_loss(gamma=2., alpha=.25):
#     def focal_loss_fixed(y_true, y_pred):
#         pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
#         pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))

#         pt_1 = K.clip(pt_1, 1e-3, .999)
#         pt_0 = K.clip(pt_0, 1e-3, .999)

#         return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))
#     return focal_loss_fixed

In [16]:
# create callbacks list
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split



In [17]:
train_dataset_info.shape

(31072,)

In [18]:

# split data into train, valid
indexes = np.arange(train_dataset_info.shape[0])
# np.random.shuffle(indexes)
# train_indexes, valid_indexes = train_test_split(indexes, test_size=0.15, random_state=8)
n_splits = 5
kf = KFold(n_splits=n_splits, random_state=42, shuffle=True)
submit = pd.read_csv('../data/sample_submission.csv')

# train_generator = data_generator.create_train(
#     train_dataset_info[train_indexes], batch_size, (SIZE,SIZE,3), augument=True)
# validation_generator = data_generator.create_train(
#     train_dataset_info[valid_indexes], 32, (SIZE,SIZE,3), augument=False)

oof_class_preds = np.zeros((train_dataset_info.shape[0], 28))
sub_class_preds = np.zeros((submit.shape[0], 28))

fold_ = 0
epochs = 10; batch_size = 16
for train_indexes, valid_indexes in kf.split(indexes):
    
    checkpoint = ModelCheckpoint('../cache/InceptionV3.h5', monitor='val_loss', verbose=1, 
                                 save_best_only=True, mode='min', save_weights_only = True)
    reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, 
                                       verbose=1, mode='auto', epsilon=0.0001)
    early = EarlyStopping(monitor="val_loss", 
                          mode="min", 
                          patience=6)
    callbacks_list = [checkpoint, early, reduceLROnPlat]


    _preds = []
    # create train and valid datagens
    train_generator = data_generator.create_train(
        train_dataset_info[train_indexes], batch_size, (SIZE,SIZE,3), augument=True)
    validation_generator = data_generator.create_train(
        train_dataset_info[valid_indexes], 32, (SIZE,SIZE,3), augument=False)

    # warm up model
    model = create_model(
        input_shape=(SIZE,SIZE,3), 
        n_out=28)

    for layer in model.layers:
        layer.trainable = False
    model.layers[-1].trainable = True
    model.layers[-2].trainable = True
    model.layers[-3].trainable = True
    model.layers[-4].trainable = True
    model.layers[-5].trainable = True
    model.layers[-6].trainable = True
    
    model.compile(
        loss=f1_loss, 
        optimizer=Adam(1e-03),
        metrics=[f1])
#     model.summary()

    model.fit_generator(
        train_generator,
        steps_per_epoch=np.ceil(float(len(train_indexes)) / float(batch_size)),
        validation_data=validation_generator,
        validation_steps=np.ceil(float(len(valid_indexes)) / float(batch_size)),
        epochs=2, 
        verbose=1)
    
    # train all layers
    epochs=10
    for layer in model.layers:
        layer.trainable = True
    model.compile(loss=f1_loss,
                optimizer=Adam(lr=1e-4),
                metrics=[f1])
    model.fit_generator(
        train_generator,
        steps_per_epoch=np.ceil(float(len(train_indexes)) / float(batch_size)),
        validation_data=validation_generator,
        validation_steps=np.ceil(float(len(valid_indexes)) / float(batch_size)),
        epochs=epochs, 
        verbose=1,
        callbacks=callbacks_list)
    
    for idx in tqdm(valid_indexes):
        item = train_dataset_info[idx]
        path = item['path']
        labels = item['labels']
        image = data_generator.load_image(path, (SIZE,SIZE,3))/255.
        score_predict = model.predict(image[np.newaxis])[0]
        oof_class_preds[idx] = score_predict
    
    for idx, name in tqdm(enumerate(submit['Id'])):
        path = os.path.join('../data/test/', name)
        image = data_generator.load_image(path, (SIZE,SIZE,3))/255.
        score_predict = model.predict(image[np.newaxis])[0]
        sub_class_preds[idx] += score_predict
    fold_ += 1
sub_class_preds /= n_splits

Epoch 1/2
Epoch 2/2
Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.96807, saving model to ../cache/InceptionV3.h5
Epoch 2/10

Epoch 00002: val_loss improved from 0.96807 to 0.91122, saving model to ../cache/InceptionV3.h5
Epoch 3/10

Epoch 00003: val_loss improved from 0.91122 to 0.87399, saving model to ../cache/InceptionV3.h5
Epoch 4/10

Epoch 00004: val_loss improved from 0.87399 to 0.86604, saving model to ../cache/InceptionV3.h5
Epoch 5/10

Epoch 00005: val_loss improved from 0.86604 to 0.85974, saving model to ../cache/InceptionV3.h5
Epoch 6/10

Epoch 00006: val_loss improved from 0.85974 to 0.82278, saving model to ../cache/InceptionV3.h5
Epoch 7/10

Epoch 00007: val_loss improved from 0.82278 to 0.81602, saving model to ../cache/InceptionV3.h5
Epoch 8/10

Epoch 00008: val_loss did not improve from 0.81602
Epoch 9/10

Epoch 00009: val_loss improved from 0.81602 to 0.80711, saving model to ../cache/InceptionV3.h5
Epoch 10/10


  0%|          | 0/6215 [00:00<?, ?it/s]


Epoch 00010: val_loss did not improve from 0.80711


100%|██████████| 6215/6215 [03:44<00:00, 27.64it/s]
11702it [07:09, 27.24it/s]


Epoch 1/2
Epoch 2/2
Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.97124, saving model to ../cache/InceptionV3.h5
Epoch 2/10

Epoch 00002: val_loss improved from 0.97124 to 0.93899, saving model to ../cache/InceptionV3.h5
Epoch 3/10

Epoch 00003: val_loss improved from 0.93899 to 0.86708, saving model to ../cache/InceptionV3.h5
Epoch 4/10

Epoch 00004: val_loss improved from 0.86708 to 0.84671, saving model to ../cache/InceptionV3.h5
Epoch 5/10

Epoch 00005: val_loss improved from 0.84671 to 0.83366, saving model to ../cache/InceptionV3.h5
Epoch 6/10

Epoch 00006: val_loss improved from 0.83366 to 0.80921, saving model to ../cache/InceptionV3.h5
Epoch 7/10

Epoch 00007: val_loss did not improve from 0.80921
Epoch 8/10

Epoch 00008: val_loss improved from 0.80921 to 0.79135, saving model to ../cache/InceptionV3.h5
Epoch 9/10

Epoch 00009: val_loss did not improve from 0.79135
Epoch 10/10

Epoch 00010: val_loss improved from 0.79135 to 0.78676, saving model to ../cache/Inceptio

100%|██████████| 6215/6215 [03:57<00:00, 26.12it/s]
11702it [07:16, 26.79it/s]


Epoch 1/2
Epoch 2/2
Epoch 1/10

Epoch 00001: val_loss improved from inf to 1.02019, saving model to ../cache/InceptionV3.h5
Epoch 2/10

Epoch 00002: val_loss improved from 1.02019 to 0.93380, saving model to ../cache/InceptionV3.h5
Epoch 3/10

Epoch 00003: val_loss improved from 0.93380 to 0.87204, saving model to ../cache/InceptionV3.h5
Epoch 4/10

Epoch 00004: val_loss did not improve from 0.87204
Epoch 5/10

Epoch 00005: val_loss did not improve from 0.87204
Epoch 6/10

Epoch 00006: val_loss improved from 0.87204 to 0.81014, saving model to ../cache/InceptionV3.h5
Epoch 7/10

Epoch 00007: val_loss did not improve from 0.81014
Epoch 8/10

Epoch 00008: val_loss improved from 0.81014 to 0.80963, saving model to ../cache/InceptionV3.h5
Epoch 9/10

Epoch 00009: val_loss improved from 0.80963 to 0.78191, saving model to ../cache/InceptionV3.h5
Epoch 10/10


  0%|          | 0/6214 [00:00<?, ?it/s]


Epoch 00010: val_loss did not improve from 0.78191


100%|██████████| 6214/6214 [04:11<00:00, 24.67it/s]
11702it [07:41, 25.36it/s]


Epoch 1/2
Epoch 2/2
Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.98004, saving model to ../cache/InceptionV3.h5
Epoch 2/10

Epoch 00002: val_loss improved from 0.98004 to 0.90920, saving model to ../cache/InceptionV3.h5
Epoch 3/10

Epoch 00003: val_loss improved from 0.90920 to 0.86858, saving model to ../cache/InceptionV3.h5
Epoch 4/10

Epoch 00004: val_loss improved from 0.86858 to 0.85897, saving model to ../cache/InceptionV3.h5
Epoch 5/10

Epoch 00005: val_loss improved from 0.85897 to 0.82646, saving model to ../cache/InceptionV3.h5
Epoch 6/10

Epoch 00006: val_loss did not improve from 0.82646
Epoch 7/10

Epoch 00007: val_loss improved from 0.82646 to 0.82232, saving model to ../cache/InceptionV3.h5
Epoch 8/10

Epoch 00008: val_loss improved from 0.82232 to 0.79557, saving model to ../cache/InceptionV3.h5
Epoch 9/10

Epoch 00009: val_loss did not improve from 0.79557
Epoch 10/10


  0%|          | 0/6214 [00:00<?, ?it/s]


Epoch 00010: val_loss did not improve from 0.79557


100%|██████████| 6214/6214 [04:21<00:00, 23.73it/s]
11702it [07:56, 24.55it/s]


Epoch 1/2
Epoch 2/2
Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.97879, saving model to ../cache/InceptionV3.h5
Epoch 2/10

Epoch 00002: val_loss improved from 0.97879 to 0.93393, saving model to ../cache/InceptionV3.h5
Epoch 3/10

Epoch 00003: val_loss improved from 0.93393 to 0.91074, saving model to ../cache/InceptionV3.h5
Epoch 4/10

Epoch 00004: val_loss improved from 0.91074 to 0.84833, saving model to ../cache/InceptionV3.h5
Epoch 5/10

Epoch 00005: val_loss improved from 0.84833 to 0.82847, saving model to ../cache/InceptionV3.h5
Epoch 6/10

Epoch 00006: val_loss did not improve from 0.82847
Epoch 7/10

Epoch 00007: val_loss did not improve from 0.82847
Epoch 8/10

Epoch 00008: val_loss improved from 0.82847 to 0.82200, saving model to ../cache/InceptionV3.h5
Epoch 9/10

Epoch 00009: val_loss improved from 0.82200 to 0.79474, saving model to ../cache/InceptionV3.h5
Epoch 10/10

Epoch 00010: val_loss improved from 0.79474 to 0.78603, saving model to ../cache/Inceptio

100%|██████████| 6214/6214 [04:31<00:00, 22.88it/s]
11702it [08:09, 23.90it/s]


In [19]:
# oof_class_preds = np.zeros((train_dataset_info.shape[0], 28))
# sub_class_preds = np.zeros((submit.shape[0], 28))
# score_predict = model.predict(image[np.newaxis])[0]
# oof_class_preds[idx] = score_predict

In [20]:
# for train_index, test_index in kf.split(indexes):
# ...    print("TRAIN:", train_index, "TEST:", test_index)

In [21]:
sub_class_preds.shape

(11702, 28)

In [22]:
predicted = []
for line in tqdm(sub_class_preds):
    label_predict = np.arange(28)[line>=0.2]
    str_predict_label = ' '.join(str(l) for l in label_predict)
    predicted.append(str_predict_label)

100%|██████████| 11702/11702 [00:00<00:00, 87471.97it/s]


In [23]:
len(predicted)

11702

In [24]:
predicted

['2',
 '5 6 25',
 '0 5 21 25',
 '0 25',
 '7 25',
 '4 25',
 '0 4 25',
 '0 25',
 '0',
 '25',
 '0 18 25',
 '3 5',
 '0 2 25',
 '7',
 '23',
 '0 2 18 25',
 '2 14',
 '0 5',
 '12 14 21',
 '0 5',
 '1 6 25',
 '3 5 24',
 '0 25',
 '0 7',
 '0 4',
 '0 12 21 25',
 '0 21',
 '0 5',
 '0 25',
 '0 5',
 '13 21',
 '0 19 25',
 '7 14 16 17 18 21 25',
 '0 5 25',
 '0 7 25',
 '13',
 '0 21 25',
 '0 3',
 '0 21 25',
 '1',
 '0 25',
 '6 25',
 '0 21 25',
 '18 19 25',
 '0 25',
 '6 7 21 25',
 '0 2',
 '0',
 '6 7 21 25',
 '0',
 '0 16 17 25',
 '0 5',
 '7 22',
 '0 25',
 '3',
 '0 25',
 '0 25',
 '11 12 21 22',
 '0 25',
 '0 21 25',
 '2 22',
 '0 5 16 21',
 '0 14 21 25',
 '7 21 25',
 '23',
 '0 18 19 21 25',
 '2 3 6 21 25',
 '0 21 25',
 '0 1 16 25',
 '0 21 25',
 '2 3',
 '0',
 '14 21',
 '4',
 '0 21',
 '0',
 '0 4',
 '0 1 21',
 '0 25',
 '0 25',
 '0 25',
 '0 25',
 '0 21 25',
 '0 21 25',
 '7 17 18 19',
 '0 24',
 '8 20 22 23',
 '0 21',
 '14 25',
 '11 12 14',
 '0 25',
 '11 12 21',
 '23',
 '13 21',
 '0 21 25',
 '0 21 25',
 '7 17 18 25',


In [25]:
submit['Predicted'] = predicted

In [26]:
# Create submit
# submit = pd.read_csv('../data/sample_submission.csv')
# predicted = []
# draw_predict = []
# # model.load_weights('../cache/InceptionV3.h5')
# for name in tqdm(submit['Id']):
#     path = os.path.join('../data/test/', name)
#     image = data_generator.load_image(path, (SIZE,SIZE,3))/255.
#     score_predict = model.predict(image[np.newaxis])[0]
#     draw_predict.append(score_predict)
#     label_predict = np.arange(28)[score_predict>=0.2]
#     str_predict_label = ' '.join(str(l) for l in label_predict)
#     predicted.append(str_predict_label)

# submit['Predicted'] = predicted
# np.save('../cache/draw_predict_InceptionV3-8.npy', score_predict)
# submit.to_csv('../submissions/submit_InceptionV3.csv', index=False)

In [27]:
submit.to_csv('../submissions/sub11.csv', index=False)

In [28]:
#https://stackoverflow.com/questions/1855095/how-to-create-a-zip-archive-of-a-directory
def backup_project_as_zip(project_dir, zip_file):
    assert(os.path.isdir(project_dir))
    assert(os.path.isdir(os.path.dirname(zip_file)))
    shutil.make_archive(zip_file.replace('.zip',''), 'zip', project_dir)
    pass

In [29]:
import datetime, shutil
now = datetime.datetime.now()
print(now)
PROJECT_PATH = '/home/watts/lal/Kaggle/kagglehp/scripts_nbs'
backup_project_as_zip(PROJECT_PATH, '../cache/code.scripts_nbs.%s.zip'%now)

2018-10-23 06:36:55.828507


In [30]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub11.csv -m ""

Successfully submitted to Human Protein Atlas Image ClassificationCPU times: user 254 ms, sys: 195 ms, total: 449 ms
Wall time: 12.7 s


In [31]:
from time import sleep
sleep(10)
!kaggle competitions submissions -c human-protein-atlas-image-classification

fileName   date                 description  status    publicScore  privateScore  
---------  -------------------  -----------  --------  -----------  ------------  
sub11.csv  2018-10-23 01:07:18               complete  0.431        None          
sub10.csv  2018-10-22 17:16:40               complete  0.336        None          
sub9.csv   2018-10-21 20:04:09               complete  0.098        None          
sub9.csv   2018-10-21 19:44:17               complete  0.073        None          
sub9.csv   2018-10-21 19:37:30               complete  0.043        None          
sub9.csv   2018-10-21 19:27:51               complete  0.000        None          
sub8.csv   2018-10-20 20:08:45               complete  0.422        None          
sub7.csv   2018-10-20 17:06:09               complete  0.389        None          
sub5.csv   2018-10-19 18:27:33               complete  0.387        None          
sub4.csv   2018-10-19 14:45:15               complete  0.411        None    

In [32]:
predicted = []
for line in tqdm(sub_class_preds):
    label_predict = np.arange(28)[line>=0.25]
    str_predict_label = ' '.join(str(l) for l in label_predict)
    predicted.append(str_predict_label)

100%|██████████| 11702/11702 [00:00<00:00, 83018.58it/s]


In [33]:
submit['Predicted'] = predicted
submit.to_csv('../submissions/sub12.csv', index=False)

In [34]:
import datetime, shutil
now = datetime.datetime.now()
print(now)
PROJECT_PATH = '/home/watts/lal/Kaggle/kagglehp/scripts_nbs'
backup_project_as_zip(PROJECT_PATH, '../cache/code.scripts_nbs.%s.zip'%now)

2018-10-23 10:48:02.699601


In [35]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub12.csv -m ""

Successfully submitted to Human Protein Atlas Image ClassificationCPU times: user 490 ms, sys: 287 ms, total: 777 ms
Wall time: 23.5 s


In [36]:
from time import sleep
sleep(10)
!kaggle competitions submissions -c human-protein-atlas-image-classification

fileName   date                 description  status    publicScore  privateScore  
---------  -------------------  -----------  --------  -----------  ------------  
sub12.csv  2018-10-23 05:18:36               complete  0.436        None          
sub11.csv  2018-10-23 01:07:18               complete  0.431        None          
sub10.csv  2018-10-22 17:16:40               complete  0.336        None          
sub9.csv   2018-10-21 20:04:09               complete  0.098        None          
sub9.csv   2018-10-21 19:44:17               complete  0.073        None          
sub9.csv   2018-10-21 19:37:30               complete  0.043        None          
sub9.csv   2018-10-21 19:27:51               complete  0.000        None          
sub8.csv   2018-10-20 20:08:45               complete  0.422        None          
sub7.csv   2018-10-20 17:06:09               complete  0.389        None          
sub5.csv   2018-10-19 18:27:33               complete  0.387        None          
sub4

In [40]:
d = {0.3:'b', 0.35:'c', 0.4:'d', 0.45:'e', 0.5:'f'}

In [41]:
for alpha in [0.3, 0.35, 0.4]:
    predicted = []
    for line in tqdm(sub_class_preds):
        label_predict = np.arange(28)[line>=alpha]
        str_predict_label = ' '.join(str(l) for l in label_predict)
        predicted.append(str_predict_label)
    submit['Predicted'] = predicted
    name = '../submissions/sub11-' + d[alpha] + '.csv'
    print(name)
    submit.to_csv(name, index=False)

100%|██████████| 11702/11702 [00:00<00:00, 95851.74it/s]
100%|██████████| 11702/11702 [00:00<00:00, 108039.61it/s]
 93%|█████████▎| 10837/11702 [00:00<00:00, 108346.08it/s]

../submissions/sub11-b.csv
../submissions/sub11-c.csv


100%|██████████| 11702/11702 [00:00<00:00, 107651.70it/s]

../submissions/sub11-d.csv





In [42]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub11-b.csv -m ""

Successfully submitted to Human Protein Atlas Image ClassificationCPU times: user 279 ms, sys: 188 ms, total: 467 ms
Wall time: 12.5 s


In [43]:
from time import sleep
sleep(10)
!kaggle competitions submissions -c human-protein-atlas-image-classification

fileName     date                 description  status    publicScore  privateScore  
-----------  -------------------  -----------  --------  -----------  ------------  
sub11-b.csv  2018-10-23 05:25:32               complete  0.437        None          
sub12.csv    2018-10-23 05:18:36               complete  0.436        None          
sub11.csv    2018-10-23 01:07:18               complete  0.431        None          
sub10.csv    2018-10-22 17:16:40               complete  0.336        None          
sub9.csv     2018-10-21 20:04:09               complete  0.098        None          
sub9.csv     2018-10-21 19:44:17               complete  0.073        None          
sub9.csv     2018-10-21 19:37:30               complete  0.043        None          
sub9.csv     2018-10-21 19:27:51               complete  0.000        None          
sub8.csv     2018-10-20 20:08:45               complete  0.422        None          
sub7.csv     2018-10-20 17:06:09               complet

In [45]:
oof_class_preds.shape

(31072, 28)

In [46]:
data.head()

Unnamed: 0,Id,Target
0,00070df0-bbc3-11e8-b2bc-ac1f6b6435d0,16 0
1,000a6c98-bb9b-11e8-b2b9-ac1f6b6435d0,7 1 2 0
2,000a9596-bbc4-11e8-b2bc-ac1f6b6435d0,5
3,000c99ba-bba4-11e8-b2b9-ac1f6b6435d0,1
4,001838f8-bbca-11e8-b2bc-ac1f6b6435d0,18


In [55]:
data.shape

(31072, 2)

In [52]:
n_classes = 28
y_train = np.zeros(oof_class_preds.shape)
y_train.shape

(31072, 28)

In [57]:
idx = 0
for labels in tqdm(data['Target'].str.split(' ')):
#     print(labels)
    for label in labels:
        y_train[idx][int(label)] = 1
    idx += 1

100%|██████████| 31072/31072 [00:00<00:00, 761215.45it/s]


In [58]:
y_train

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 1., 0., 0.],
       [1., 0., 1., ..., 0., 0., 0.]])

In [59]:
y_train[0]

array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [62]:
sub_ridge_preds = np.zeros(sub_class_preds.shape)
from sklearn.linear_model import Ridge
for cls in np.arange(n_classes):
    for alpha in [0.1, 0.5, 0.75, 1.0]:
        y = y_train[:, cls]
        X = oof_class_preds
        clf = Ridge(alpha=alpha)
        clf.fit(X,y)
        score_ = clf.score(X, y)
        print(alpha, score_)
    print('------------------')
#         X_test = sub_class_preds[:, cls]
#         preds_ = clf.predict(X_test)
#         sub_ridge_preds[:,cls] = preds_

0.1 0.30516408800294936
0.5 0.3051538272504285
0.75 0.3051518822685677
1.0 0.3051507429050234
------------------
0.1 0.3374655589990885
0.5 0.33744313624695615
0.75 0.3374391064751636
1.0 0.3374365528223586
------------------
0.1 0.3478230052717737
0.5 0.3478094213713602
0.75 0.34780678970667567
1.0 0.3478049883197386
------------------
0.1 0.2683626869914665
0.5 0.268352899140183
0.75 0.26835153112706067
1.0 0.26835055216969317
------------------
0.1 0.35815723900035146
0.5 0.35815048203003075
0.75 0.35814891018769357
1.0 0.358147739540993
------------------
0.1 0.22794961526676827
0.5 0.2279078050326938
0.75 0.22789955736433287
1.0 0.22789429905693348
------------------
0.1 0.1489416133060476
0.5 0.14893490102819695
0.75 0.14893130098979046
1.0 0.14892760759756607
------------------
0.1 0.3187985084046536
0.5 0.3187129546214597
0.75 0.31869812554763033
1.0 0.31868961998265277
------------------
0.1 0.058919401412568344
0.5 0.05882704633553349
0.75 0.05875189987664576
1.0 0.0586673252

In [68]:
sub_ridge_preds = np.zeros(sub_class_preds.shape)
from sklearn.linear_model import Ridge
for cls in np.arange(n_classes):
    y = y_train[:, cls]
    X = oof_class_preds
    clf = Ridge(alpha=1.0)
    clf.fit(X,y)
    X_test = sub_class_preds
    preds_ = clf.predict(X_test)
    sub_ridge_preds[:,cls] = preds_

In [64]:
sub_class_preds

array([[1.00039284e-01, 1.17990415e-03, 9.99636507e-01, ...,
        1.29729806e-03, 4.45128300e-05, 1.83242408e-09],
       [5.12776233e-02, 6.77042314e-02, 5.64872707e-02, ...,
        3.79496768e-01, 4.95238428e-03, 7.19379994e-04],
       [9.01414943e-01, 3.52338608e-03, 6.46044161e-03, ...,
        8.81369901e-01, 4.51639912e-03, 1.18063125e-04],
       ...,
       [1.55598224e-02, 1.30675434e-03, 5.59325572e-04, ...,
        9.47414422e-03, 2.23753006e-04, 1.62329312e-06],
       [4.32646036e-01, 9.83005440e-01, 2.32393433e-02, ...,
        1.39799099e-02, 5.22908648e-03, 6.82432909e-06],
       [5.79181534e-01, 2.58583431e-03, 6.77158823e-03, ...,
        6.30617431e-01, 1.03785092e-02, 2.24508471e-06]])

In [65]:
np.arange(n_classes)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27])

In [67]:
sub_class_preds[:, 0].shape

(11702,)

In [69]:
sub_ridge_preds

array([[ 3.45753296e-01,  3.62983644e-02,  8.84358668e-01, ...,
         1.94297450e-01,  1.24761124e-02, -3.66673039e-04],
       [ 2.77064977e-01,  7.45731031e-02,  1.14356844e-01, ...,
         4.52840405e-01,  1.39280828e-02,  1.22852482e-03],
       [ 8.62137324e-01,  2.33744613e-02,  9.35121162e-02, ...,
         8.06679838e-01,  8.58408516e-03,  8.51189659e-04],
       ...,
       [ 2.76361021e-01,  3.70848688e-02,  8.74717594e-02, ...,
         2.12327144e-01,  1.07031498e-02,  8.76129349e-04],
       [ 5.41232295e-01,  8.03546180e-01,  6.54105768e-02, ...,
         2.11617752e-01,  1.39038025e-02,  1.00735100e-03],
       [ 6.65414781e-01,  3.70336201e-02,  1.09595604e-01, ...,
         6.37737230e-01,  1.44159650e-02, -3.23285538e-06]])

In [70]:
predicted = []
alpha = 0.3
for line in tqdm(sub_ridge_preds):
    label_predict = np.arange(28)[line>=alpha]
    str_predict_label = ' '.join(str(l) for l in label_predict)
    predicted.append(str_predict_label)
submit['Predicted'] = predicted
name = '../submissions/sub11-f.csv'
print(name)
submit.to_csv(name, index=False)

100%|██████████| 11702/11702 [00:00<00:00, 94823.98it/s]

../submissions/sub11-f.csv





In [71]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub11-f.csv -m ""

Successfully submitted to Human Protein Atlas Image ClassificationCPU times: user 300 ms, sys: 171 ms, total: 470 ms
Wall time: 12.8 s


In [72]:
from time import sleep
sleep(10)
!kaggle competitions submissions -c human-protein-atlas-image-classification

fileName     date                 description  status    publicScore  privateScore  
-----------  -------------------  -----------  --------  -----------  ------------  
sub11-f.csv  2018-10-23 09:11:15               complete  0.358        None          
sub11-b.csv  2018-10-23 05:25:32               complete  0.437        None          
sub12.csv    2018-10-23 05:18:36               complete  0.436        None          
sub11.csv    2018-10-23 01:07:18               complete  0.431        None          
sub10.csv    2018-10-22 17:16:40               complete  0.336        None          
sub9.csv     2018-10-21 20:04:09               complete  0.098        None          
sub9.csv     2018-10-21 19:44:17               complete  0.073        None          
sub9.csv     2018-10-21 19:37:30               complete  0.043        None          
sub9.csv     2018-10-21 19:27:51               complete  0.000        None          
sub8.csv     2018-10-20 20:08:45               complete  0.422   

In [73]:
predicted = []
alpha = 0.35
for line in tqdm(sub_ridge_preds):
    label_predict = np.arange(28)[line>=alpha]
    str_predict_label = ' '.join(str(l) for l in label_predict)
    predicted.append(str_predict_label)
submit['Predicted'] = predicted
name = '../submissions/sub11-g.csv'
print(name)
submit.to_csv(name, index=False)

100%|██████████| 11702/11702 [00:00<00:00, 100564.36it/s]

../submissions/sub11-g.csv





In [74]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub11-g.csv -m ""
from time import sleep
sleep(10)
!kaggle competitions submissions -c human-protein-atlas-image-classification

fileName     date                 description  status    publicScore  privateScore  
-----------  -------------------  -----------  --------  -----------  ------------  
sub11-g.csv  2018-10-23 09:13:19               complete  0.347        None          
sub11-f.csv  2018-10-23 09:11:15               complete  0.358        None          
sub11-b.csv  2018-10-23 05:25:32               complete  0.437        None          
sub12.csv    2018-10-23 05:18:36               complete  0.436        None          
sub11.csv    2018-10-23 01:07:18               complete  0.431        None          
sub10.csv    2018-10-22 17:16:40               complete  0.336        None          
sub9.csv     2018-10-21 20:04:09               complete  0.098        None          
sub9.csv     2018-10-21 19:44:17               complete  0.073        None          
sub9.csv     2018-10-21 19:37:30               complete  0.043        None          
sub9.csv     2018-10-21 19:27:51               complete  0.000   

In [75]:
predicted = []
alpha = 0.25
for line in tqdm(sub_ridge_preds):
    label_predict = np.arange(28)[line>=alpha]
    str_predict_label = ' '.join(str(l) for l in label_predict)
    predicted.append(str_predict_label)
submit['Predicted'] = predicted
name = '../submissions/sub11-h.csv'
print(name)
submit.to_csv(name, index=False)

100%|██████████| 11702/11702 [00:00<00:00, 92276.09it/s]

../submissions/sub11-h.csv





In [76]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub11-h.csv -m ""
from time import sleep
sleep(10)
!kaggle competitions submissions -c human-protein-atlas-image-classification

fileName     date                 description  status    publicScore  privateScore  
-----------  -------------------  -----------  --------  -----------  ------------  
sub11-h.csv  2018-10-24 00:21:18               complete  0.371        None          
sub11-g.csv  2018-10-23 09:13:19               complete  0.347        None          
sub11-f.csv  2018-10-23 09:11:15               complete  0.358        None          
sub11-b.csv  2018-10-23 05:25:32               complete  0.437        None          
sub12.csv    2018-10-23 05:18:36               complete  0.436        None          
sub11.csv    2018-10-23 01:07:18               complete  0.431        None          
sub10.csv    2018-10-22 17:16:40               complete  0.336        None          
sub9.csv     2018-10-21 20:04:09               complete  0.098        None          
sub9.csv     2018-10-21 19:44:17               complete  0.073        None          
sub9.csv     2018-10-21 19:37:30               complete  0.043   

In [77]:
predicted = []
alpha = 0.15
for line in tqdm(sub_ridge_preds):
    label_predict = np.arange(28)[line>=alpha]
    str_predict_label = ' '.join(str(l) for l in label_predict)
    predicted.append(str_predict_label)
submit['Predicted'] = predicted
name = '../submissions/sub11-i.csv'
print(name)
submit.to_csv(name, index=False)

100%|██████████| 11702/11702 [00:00<00:00, 87331.89it/s]

../submissions/sub11-i.csv





In [78]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub11-i.csv -m ""
from time import sleep
sleep(10)
!kaggle competitions submissions -c human-protein-atlas-image-classification

fileName     date                 description  status    publicScore  privateScore  
-----------  -------------------  -----------  --------  -----------  ------------  
sub11-i.csv  2018-10-24 00:24:24               complete  0.389        None          
sub11-h.csv  2018-10-24 00:21:18               complete  0.371        None          
sub11-g.csv  2018-10-23 09:13:19               complete  0.347        None          
sub11-f.csv  2018-10-23 09:11:15               complete  0.358        None          
sub11-b.csv  2018-10-23 05:25:32               complete  0.437        None          
sub12.csv    2018-10-23 05:18:36               complete  0.436        None          
sub11.csv    2018-10-23 01:07:18               complete  0.431        None          
sub10.csv    2018-10-22 17:16:40               complete  0.336        None          
sub9.csv     2018-10-21 20:04:09               complete  0.098        None          
sub9.csv     2018-10-21 19:44:17               complete  0.073   

In [94]:
sub_ridge_preds = np.zeros(sub_class_preds.shape)
from sklearn.linear_model import Lasso
for cls in np.arange(n_classes):
    for alpha in [1e-6, 0.00001, 0.0001, 0.001, 0.01, 0.1, 0.25, 0.5, 0.75, 1.0]:
        y = y_train[:, cls]
        X = oof_class_preds
        clf = Lasso(alpha=alpha)
        clf.fit(X,y)
        score_ = clf.score(X, y)
        print(alpha, score_)
    print('------------------')

1e-06 0.3051675755114847
1e-05 0.305144376000422
0.0001 0.30505250180682486
0.001 0.3041995216261415
0.01 0.30132614510411415
0.1 0.05535801830147291
0.25 0.0
0.5 0.0
0.75 0.0
1.0 0.0
------------------
1e-06 0.33746889229292953
1e-05 0.33741565993666545
0.0001 0.3371574485354396
0.001 0.335685088699252
0.01 0.2863562825647684
0.1 0.0
0.25 0.0
0.5 0.0
0.75 0.0
1.0 0.0
------------------
1e-06 0.34782327756674636
1e-05 0.3477948032137138
0.0001 0.3475776983079414
0.001 0.3462152360822507
0.01 0.3373824233127851
0.1 0.0
0.25 0.0
0.5 0.0
0.75 0.0
1.0 0.0
------------------
1e-06 0.2683562724662605
1e-05 0.26834021416647713
0.0001 0.2680539319635379
0.001 0.2663948313938498
0.01 0.2277392538020788
0.1 0.0
0.25 0.0
0.5 0.0
0.75 0.0
1.0 0.0
------------------
1e-06 0.3581536893431313
1e-05 0.3581301281001904
0.0001 0.35799302647738984
0.001 0.35711942069347236
0.01 0.3317396164094919
0.1 0.0
0.25 0.0
0.5 0.0
0.75 0.0
1.0 0.0
------------------
1e-06 0.22798732882065265
1e-05 0.22786521734467

In [84]:
sub_lasso_preds = np.zeros(sub_class_preds.shape)
from sklearn.linear_model import Lasso
for cls in np.arange(n_classes):
    y = y_train[:, cls]
    X = oof_class_preds
    clf = Lasso(alpha=0.0001)
    clf.fit(X,y)
    X_test = sub_class_preds
    preds_ = clf.predict(X_test)
    sub_lasso_preds[:,cls] = preds_

In [87]:
sub_lasso_preds

array([[3.44429966e-01, 3.29890118e-02, 8.78434195e-01, ...,
        1.93262240e-01, 1.03985480e-02, 6.09050269e-04],
       [2.79492620e-01, 7.57434899e-02, 1.19624324e-01, ...,
        4.50920065e-01, 1.25096596e-02, 6.28292950e-04],
       [8.63054324e-01, 2.55347763e-02, 9.50523800e-02, ...,
        8.07028056e-01, 1.02366936e-02, 6.19276270e-04],
       ...,
       [2.77679194e-01, 3.55172235e-02, 8.77119006e-02, ...,
        2.10879483e-01, 9.58084540e-03, 6.09106243e-04],
       [5.41686191e-01, 7.97803140e-01, 6.48745945e-02, ...,
        2.12811326e-01, 1.22223679e-02, 6.10234894e-04],
       [6.63490374e-01, 3.29306364e-02, 1.05517794e-01, ...,
        6.35244635e-01, 1.37181906e-02, 6.09625472e-04]])

In [89]:
predicted = []
alpha = 0.2
for line in tqdm(sub_lasso_preds):
    label_predict = np.arange(28)[line>=alpha]
    str_predict_label = ' '.join(str(l) for l in label_predict)
    predicted.append(str_predict_label)
submit['Predicted'] = predicted
name = '../submissions/sub11-j.csv'
print(name)
submit.to_csv(name, index=False)

100%|██████████| 11702/11702 [00:00<00:00, 86566.52it/s]

../submissions/sub11-j.csv





In [90]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub11-j.csv -m ""
from time import sleep
sleep(10)
!kaggle competitions submissions -c human-protein-atlas-image-classification

fileName     date                 description  status    publicScore  privateScore  
-----------  -------------------  -----------  --------  -----------  ------------  
sub11-j.csv  2018-10-24 00:34:46               complete  0.366        None          
sub11-j.csv  2018-10-24 00:33:17               complete  0.000        None          
sub11-i.csv  2018-10-24 00:24:24               complete  0.389        None          
sub11-h.csv  2018-10-24 00:21:18               complete  0.371        None          
sub11-g.csv  2018-10-23 09:13:19               complete  0.347        None          
sub11-f.csv  2018-10-23 09:11:15               complete  0.358        None          
sub11-b.csv  2018-10-23 05:25:32               complete  0.437        None          
sub12.csv    2018-10-23 05:18:36               complete  0.436        None          
sub11.csv    2018-10-23 01:07:18               complete  0.431        None          
sub10.csv    2018-10-22 17:16:40               complete  0.336   

In [91]:
predicted = []
alpha = 0.35
for line in tqdm(sub_lasso_preds):
    label_predict = np.arange(28)[line>=alpha]
    str_predict_label = ' '.join(str(l) for l in label_predict)
    predicted.append(str_predict_label)
submit['Predicted'] = predicted
name = '../submissions/sub11-k.csv'
print(name)
submit.to_csv(name, index=False)

100%|██████████| 11702/11702 [00:00<00:00, 92002.45it/s]

../submissions/sub11-k.csv





In [92]:
%%time
!kaggle competitions submit -c human-protein-atlas-image-classification -f ../submissions/sub11-k.csv -m ""
from time import sleep
sleep(10)
!kaggle competitions submissions -c human-protein-atlas-image-classification

fileName     date                 description  status    publicScore  privateScore  
-----------  -------------------  -----------  --------  -----------  ------------  
sub11-k.csv  2018-10-24 00:35:39               complete  0.346        None          
sub11-j.csv  2018-10-24 00:34:46               complete  0.366        None          
sub11-j.csv  2018-10-24 00:33:17               complete  0.000        None          
sub11-i.csv  2018-10-24 00:24:24               complete  0.389        None          
sub11-h.csv  2018-10-24 00:21:18               complete  0.371        None          
sub11-g.csv  2018-10-23 09:13:19               complete  0.347        None          
sub11-f.csv  2018-10-23 09:11:15               complete  0.358        None          
sub11-b.csv  2018-10-23 05:25:32               complete  0.437        None          
sub12.csv    2018-10-23 05:18:36               complete  0.436        None          
sub11.csv    2018-10-23 01:07:18               complete  0.431   

In [96]:
0.1 0.30516408800294936
0.5 0.3051538272504285
0.75 0.3051518822685677
1.0 0.3051507429050234
------------------
0.1 0.3374655589990885
0.5 0.33744313624695615
0.75 0.3374391064751636
1.0 0.3374365528223586
------------------
0.1 0.3478230052717737
0.5 0.3478094213713602
0.75 0.34780678970667567
1.0 0.3478049883197386
------------------
0.1 0.2683626869914665
0.5 0.268352899140183
0.75 0.26835153112706067
1.0 0.26835055216969317
------------------
0.1 0.35815723900035146
0.5 0.35815048203003075
0.75 0.35814891018769357
1.0 0.358147739540993
------------------
0.1 0.22794961526676827
0.5 0.2279078050326938
0.75 0.22789955736433287
1.0 0.22789429905693348
------------------
0.1 0.1489416133060476
0.5 0.14893490102819695
0.75 0.14893130098979046
1.0 0.14892760759756607
------------------
0.1 0.3187985084046536
0.5 0.3187129546214597
0.75 0.31869812554763033
1.0 0.31868961998265277
------------------
0.1 0.058919401412568344
0.5 0.05882704633553349
0.75 0.05875189987664576
1.0 0.058667325243741186
------------------
0.1 0.07156615804290622
0.5 0.07126853113559695
0.75 0.07114270096439868
1.0 0.07102207063561539
------------------
0.1 0.06559085371180973
0.5 0.06525100734222178
0.75 0.06504384705631672
1.0 0.0648313943033505
------------------
0.1 0.29730807295334705
0.5 0.297301140773105
0.75 0.29729961303613406
1.0 0.29729853739572365
------------------
0.1 0.19265048634247706
0.5 0.1926452315425764
0.75 0.19264412737004155
1.0 0.19264334473553268
------------------
0.1 0.1748069164585282
0.5 0.17478356028400102
0.75 0.17476578474136595
1.0 0.17474727494423503
------------------
0.1 0.43803229207332933
0.5 0.4380307918749215
0.75 0.43803014288665315
1.0 0.4380293723259563
------------------
0.1 0.0016208068563269284
0.5 0.0016163966773466987
0.75 0.0016155744275534456
1.0 0.00161507349089407
------------------
0.1 0.04558853246204809
0.5 0.04554502188513587
0.75 0.045536490517141696
1.0 0.04553106759841841
------------------
0.1 0.05816076634096368
0.5 0.058108272616448124
0.75 0.05809704454806286
1.0 0.05808928716012451
------------------
0.1 0.14208904234077158
0.5 0.1420849131331977
0.75 0.14208374111057687
1.0 0.14208258100291293
------------------
0.1 0.19295768243802502
0.5 0.19294281489982212
0.75 0.1929390098982947
1.0 0.19293606522939977
------------------
0.1 0.07111900207059951
0.5 0.07102736351614891
0.75 0.07095020179710965
1.0 0.07086671256860966
------------------
0.1 0.23888927869855436
0.5 0.23888189324608725
0.75 0.23888070032259182
1.0 0.23887992644699105
------------------
0.1 0.16972905799179328
0.5 0.16954646611666568
0.75 0.16951043174174074
1.0 0.1694886806232755
------------------
0.1 0.34062597994691424
0.5 0.3405976836523227
0.75 0.34059218225129834
1.0 0.3405887172820029
------------------
0.1 0.17499433471107195
0.5 0.17496541950975486
0.75 0.1749573271216207
1.0 0.17495069349445258
------------------
0.1 0.20889288553887697
0.5 0.20887857564836687
0.75 0.20887385077125095
1.0 0.20886983974791637
------------------
0.1 0.09619467836456175
0.5 0.09611981933143054
0.75 0.09609321585505415
1.0 0.0960691803777528
------------------
0.1 0.003184518513993595
0.5 0.0026474271420748874
0.75 0.0025466184378878376
1.0 0.002488337729996015
------------------

sub_ridge_preds = np.zeros(sub_class_preds.shape)
from sklearn.linear_model import Ridge
for cls in np.arange(n_classes):
    y = y_train[:, cls]
    X = oof_class_preds
    clf = Ridge(alpha=1.0)
    clf.fit(X,y)
    X_test = sub_class_preds
    preds_ = clf.predict(X_test)
    sub_ridge_preds[:,cls] = preds_

sub_ridge_preds = np.zeros(sub_class_preds.shape)
from sklearn.linear_model import ElasticNet
for cls in np.arange(n_classes):
    for alpha in [1e-7, 1e-6, 0.00001]:
        y = y_train[:, cls]
        X = oof_class_preds
        clf = ElasticNet(alpha=alpha)
        clf.fit(X,y)
        score_ = clf.score(X, y)
        print(alpha, score_)
    print('------------------')

1e-07 0.30517198624325803
1e-06 0.30516774580674744
1e-05 0.30514618862874276
------------------
1e-07 0.3375364786687866
1e-06 0.33748254475792866
1e-05 0.3374263421478816
------------------
1e-07 0.34786604052700376
1e-06 0.34783322649381787
1e-05 0.3477989381724954
------------------
1e-07 0.26844766758924143
1e-06 0.2683777350190989
1e-05 0.26834353062419525
------------------
1e-07 0.3581716622471507
1e-06 0.3581573666200002
1e-05 0.35813814940153743
------------------
1e-07 0.22802335031175003
1e-06 0.22797847846562522
1e-05 0.22787254607824436
------------------
1e-07 0.1489708539219191
1e-06 0.14893858372796565
1e-05 0.1489118325409502
------------------
1e-07 0.3189865683838434
1e-06 0.31887587504856074
1e-05 0.3187025519995641
------------------
1e-07 0.05902418921044116
1e-06 0.05889336938503854
1e-05 0.058526393929919984
------------------
1e-07 0.07211817873010062
1e-06 0.07157435909791265
1e-05 0.07110603385724168
------------------
1e-07 0.0662061253927062
1e-06 0.065502